diff --git a/src/hawkmoth/doccursor.py b/src/hawkmoth/doccursor.py index 088b3404..09999a71 100644 --- a/src/hawkmoth/doccursor.py +++ b/src/hawkmoth/doccursor.py @@ -13,527 +13,523 @@ SourceRange, ) -def _get_meta(cursor): - return { - 'line': cursor.comment.extent.start.line if cursor.comment else '', - 'cursor.kind': cursor.kind, - 'cursor.displayname': cursor.displayname, - 'cursor.spelling': cursor.spelling, - } - -# Workaround for clang cursor.get_tokens() being unreliable for cursors whose -# extent contains macro expansions. The result may be empty or contain bogus -# tokens, depending on the case. -# -# The problem seems to be related to cursor.extent. Recreating the extent and -# getting the tokens from the translation unit works fine. The __repr__ for both -# the recreated and original extents is the same, but comparison indicates they -# do differ under the hood. -def _cursor_get_tokens(cursor): - tu = cursor.translation_unit - - start = cursor.extent.start - start = SourceLocation.from_position(tu, start.file, start.line, start.column) - - end = cursor.extent.end - end = SourceLocation.from_position(tu, end.file, end.line, end.column) - - extent = SourceRange.from_locations(start, end) - - yield from tu.get_tokens(extent=extent) - -# Return None for simple macros, a potentially empty list of arguments for -# function-like macros -def _get_macro_args(cursor): - if cursor.kind != CursorKind.MACRO_DEFINITION: - return None - - tokens = _cursor_get_tokens(cursor) - - # Use the first two tokens to make sure this starts with 'IDENTIFIER(' - one = next(tokens) - two = next(tokens, None) - if two is None or one.extent.end != two.extent.start or two.spelling != '(': - return None - - # Naïve parsing of macro arguments - # FIXME: This doesn't handle GCC named vararg extension FOO(vararg...) - args = [] - for token in tokens: - if token.spelling == ')': - return args - elif token.spelling == ',': - continue - elif token.kind == TokenKind.IDENTIFIER: - args.extend([('', token.spelling)]) - elif token.spelling == '...': - args.extend([('', token.spelling)]) - else: - break - - return None - -def _get_storage_class(cursor): - """Get the storage class of a cursor. - Only storage classes that are relevant to the documentation are returned. - - Returns: - Storage class as a string. ``None`` otherwise. - """ - storage_class_map = { - StorageClass.EXTERN: 'extern', - StorageClass.STATIC: 'static', - } - - return storage_class_map.get(cursor.storage_class) +class DocCursor: + """Documentation centric wrapper for Clang's own ``Cursor``. -def _get_function_quals(cursor): - """Get all the qualifiers of a function object. + This class abstracts a documentation worthy cursor so the user can query + relevant bits for documentation purpose, but otherwise hide all the + complications behind Clang's AST traversal and extraction of said bits of + information. - Returns: - List of (prefix) function qualifiers. + Technically, this class can hold any Clang cursor within itself, but it + won't expose any relevant information for those. """ - tokens = [t.spelling for t in _cursor_get_tokens(cursor)] - quals = [] - - if 'static' in tokens: - quals.append('static') - if 'inline' in tokens: - quals.append('inline') - return quals - -def _get_method_quals(cursor): - """Get all the qualifiers of a method. + def __init__(self, domain=None, cursor=None, comments=None): + self._comments = comments if comments else {} + self._cc = cursor - Returns: - List of prefix method qualifiers and list of suffix method qualifiers. - """ - tokens = [t.spelling for t in _cursor_get_tokens(cursor)] - pre_quals = [] - pos_quals = [] - - if cursor.is_static_method(): - pre_quals.append('static') - if cursor.is_virtual_method(): - pre_quals.append('virtual') - if 'constexpr' in tokens: - pre_quals.append('constexpr') - - if cursor.is_const_method(): - pos_quals.append('const') - if cursor.is_pure_virtual_method(): - pos_quals.append('= 0') - if cursor.is_default_method(): - pos_quals.append('= default') - if 'delete' in tokens: - pos_quals.append('= delete') - if 'override' in tokens: - pos_quals.append('override') - - except_spec = cursor.exception_specification_kind - if except_spec == ExceptionSpecificationKind.BASIC_NOEXCEPT: - pos_quals.append('noexcept') - - return pre_quals, pos_quals - -def _get_access_specifier(cursor, domain='cpp'): - """Get the access specifier of a cursor, if any. - - Returns: - One of 'private', 'protected', 'public' or `None`. - """ - # No access specifiers in C. - if domain == 'c': - return None + self.domain = domain + self.hash = self._cc.hash + self.kind = self._cc.kind - # No access specifiers in redundant contexts. - if cursor.semantic_parent and cursor.semantic_parent.kind == CursorKind.UNION_DECL: - return None + if self.hash in self._comments: + self.comment = self._comments[self.hash] + else: + self.comment = None - name_map = { - AccessSpecifier.PRIVATE: 'private', - AccessSpecifier.PROTECTED: 'protected', - AccessSpecifier.PUBLIC: 'public', - } + self.displayname = self._cc.displayname + if self.kind == CursorKind.ENUM_CONSTANT_DECL: + if '=' in [t.spelling for t in self.get_tokens()]: + self.enum_value = self._cc.enum_value + else: + self.enum_value = None + self.is_scoped_enum = self._cc.is_scoped_enum + self.spelling = self._cc.spelling + self.type = self._cc.type - return name_map.get(cursor.access_specifier, None) + def __hash__(self): + return self.hash -def _get_template_line(cursor): - """Get the template arguments of a cursor. + def get_meta(self): + return { + 'line': self.comment.extent.start.line if self.comment else '', + 'cursor.kind': self.kind, + 'cursor.displayname': self.displayname, + 'cursor.spelling': self.spelling, + } - This recurses for templated template arguments. + def get_children(self): + """Get children cursors.""" + domain = self.domain - Returns: - String with the form 'template<...> ' if the cursor is a template or - `None` otherwise. When the cursor represents a templated template - argument, the returned string is actually of the form 'template<...> - name', but this should only occur under recursion. - """ - # We only add the name when we recurse, in which case we need to track the - # name of the templated template argument. Otherwise the name is not part of - # the template arguments. - name = '' - - if cursor.kind not in [CursorKind.CLASS_TEMPLATE, - CursorKind.FUNCTION_TEMPLATE, - CursorKind.TEMPLATE_TEMPLATE_PARAMETER]: - return None + # Identify `extern "C"` blocks and change domain accordingly. For some + # reason, the Python bindings don't return the cursor kind LINKAGE_SPEC + # as one would expect, so we need to do it the hard way. + if domain == 'cpp' and self.kind == CursorKind.UNEXPOSED_DECL: + tokens = self.get_tokens() + ntoken = next(tokens, None) + if ntoken and ntoken.spelling == 'extern': + ntoken = next(tokens, None) + if ntoken and ntoken.spelling == '"C"': + domain = 'c' - # The type of type parameters can be 'typename' and 'class'. These are - # equivalent, but we want it to look like the source code for consistency. - # We can do it by looking at the tokens directly. This is slightly - # complicated due to variadic template type parameters. - def typetype(cursor): - tokens = list(_cursor_get_tokens(cursor)) - if tokens[-2].spelling == '...': - return f'{tokens[-3].spelling}...' - else: - return f'{tokens[-2].spelling}' - - # We need to add the keyword 'typename' or 'class' if we have recursed and - # therefore we are inside the template argument list. - if cursor.kind == CursorKind.TEMPLATE_TEMPLATE_PARAMETER: - name = f' {typetype(cursor)} {cursor.spelling}' - - template_args = [] - for child in cursor.get_children(): - if child.kind == CursorKind.TEMPLATE_TYPE_PARAMETER: - template_args.append(f'{typetype(child)} {child.spelling}') - elif child.kind == CursorKind.TEMPLATE_NON_TYPE_PARAMETER: - arg_name = f' {child.spelling}' if child.spelling != '' else '...' - template_args.append(f'{child.type.spelling}{arg_name}') - elif child.kind == CursorKind.TEMPLATE_TEMPLATE_PARAMETER: - arg = _get_template_line(child) - if arg: - template_args.append(arg) - - return f'template<{", ".join(template_args)}>{name}' - -def _specifiers_fixup(cursor, basetype): - """Fix the type for C++ specifiers. - - Note the ``basetype`` is not necessarily ``cursor.type``. When dealing with - pointers or arrays, we need to get to the base type as in - :py:func:`_var_type_fixup`. - - Returns: - List of C++ specifiers for the cursor. - """ - tokens = [t.spelling for t in _cursor_get_tokens(cursor)] - type_elem = [] + for c in self._cc.get_children(): + yield DocCursor(domain=domain, cursor=c, comments=self._comments) - if 'mutable' in tokens: - type_elem.append('mutable') + def get_tokens(self): + """Get cursor tokens. - # If 'constexpr', strip the redundant 'const' that Clang adds to the - # type spelling by default. - if 'constexpr' in tokens: - type_elem.append('constexpr') - type_elem.append(basetype.spelling[len('const '):]) - else: - type_elem.append(basetype.spelling) + Wrapper for Clang's `cursor.get_tokens()` that addresses issues for + cursors whose extent contains macro expansions. The result may be empty + or contain bogus tokens, depending on the case. - return type_elem + The problem seems to be related to `cursor.extent`. Recreating the + extent and getting the tokens from the translation unit works fine. The + `__repr__` for both the recreated and original extents is the same, but + comparison indicates they do differ under the hood. + """ + tu = self._cc.translation_unit -def _get_scopedenum_type(cursor): - """Get the explicit underlying type of a scoped enumerator. + start = self._cc.extent.start + start = SourceLocation.from_position(tu, start.file, start.line, start.column) - Returns: - Underlying type of a scoped enumerator that has been explicitly defined. - ``None`` otherwise. - """ - if cursor.kind == CursorKind.ENUM_DECL and cursor.is_scoped_enum(): - if list(_cursor_get_tokens(cursor))[3].spelling == ':': - return f': {cursor.enum_type.spelling}' - return None - -def _normalize_type(type_string): - return 'bool' if type_string == '_Bool' else type_string - -def _symbolic_dims(cursor): - dim = None - for spelling in [t.spelling for t in _cursor_get_tokens(cursor)]: - if spelling == '[': - # dim should be None here - dim = [] - elif spelling == ']': - # dim should not be None here - yield ' '.join(dim) - dim = None - elif dim is not None: - dim.append(spelling) - -def _dims_fixup(cursor, dims): - if not dims: - return '' - - symbolic_dims = list(_symbolic_dims(cursor)) - if len(symbolic_dims) == len(dims): - dims = symbolic_dims - - return ''.join([f'[{d}]' for d in dims]) - -def _var_type_fixup(cursor, domain): - """Fix non trivial variable and argument types. - - If this is an array, the dimensions should be applied to the name, not - the type. - If this is a function pointer, or an array of function pointers, the - name should be within the parenthesis as in ``(*name)`` or ``(*name[N])``. - """ - cursor_type = cursor.type - - stars_and_quals = '' - dims = [] - while True: - if cursor_type.kind == TypeKind.POINTER: - quals = [] - if cursor_type.is_const_qualified(): - quals.append('const') - if cursor_type.is_volatile_qualified(): - quals.append('volatile') - if cursor_type.is_restrict_qualified(): - quals.append('restrict') - - spacer = ' ' if quals and stars_and_quals else '' - stars_and_quals = '*' + ' '.join(quals) + spacer + stars_and_quals - - cursor_type = cursor_type.get_pointee() - elif cursor_type.kind == TypeKind.CONSTANTARRAY: - dims.append(cursor_type.element_count) - cursor_type = cursor_type.get_array_element_type() - elif cursor_type.kind == TypeKind.INCOMPLETEARRAY: - dims.append('') - cursor_type = cursor_type.get_array_element_type() - else: - break + end = self._cc.extent.end + end = SourceLocation.from_position(tu, end.file, end.line, end.column) - dims = _dims_fixup(cursor, dims) + extent = SourceRange.from_locations(start, end) - type_elem = [] + yield from tu.get_tokens(extent=extent) - access_spec = _get_access_specifier(cursor, domain) - if access_spec: - type_elem.append(access_spec) + def var_type_fixup(self): + """Fix non trivial variable and argument types. - if cursor_type.kind == TypeKind.FUNCTIONPROTO: - pad = lambda s: s if s.endswith('*') or s.endswith('&') else s + ' ' + If this is an array, the dimensions should be applied to the name, not + the type. If this is a function pointer, or an array of function + pointers, the name should be within the parenthesis as in ``(*name)`` + or ``(*name[N])``. + """ + return self._var_type_fixup(self) + def get_args(self): + """Get function / method arguments.""" args = [] - for c in cursor.get_children(): - if c.kind == CursorKind.PARM_DECL: - arg_ttype, arg_name = _var_type_fixup(c, domain) - args.append(f'{pad(arg_ttype)}{arg_name}' if arg_name else arg_ttype) - if cursor_type.is_function_variadic(): - args.append('...') - if len(args) == 0: - args.append('void') - - ret_type = _normalize_type(cursor_type.get_result().spelling) - - name = f'''{pad(ret_type)}({pad(stars_and_quals)}{cursor.spelling}{dims})({', '.join(args)})''' # noqa: E501 - else: - - storage_class = _get_storage_class(cursor) - if storage_class: - type_elem.append(storage_class) - type_elem.extend(_specifiers_fixup(cursor, cursor_type)) + # Only fully prototyped functions will have argument lists to process. + if self.type.kind == TypeKind.FUNCTIONPROTO: + for c in self.get_children(): + if c.kind == CursorKind.PARM_DECL: + arg_ttype, arg_name = self._var_type_fixup(c) + args.extend([(arg_ttype, arg_name)]) - if stars_and_quals: - type_elem.append(stars_and_quals) + if self.type.is_function_variadic(): + args.extend([('', '...')]) + if len(args) == 0: + args.extend([('', 'void')]) - name = cursor.spelling + dims + return args - # Convert _Bool to bool - type_elem = [_normalize_type(t) for t in type_elem] + def function_fixup(self): + """Parse additional details of a function declaration.""" + args = self.get_args() - ttype = ' '.join(type_elem) - return ttype, name + full_type = self._get_function_quals() -def _type_definition_fixup(cursor): - """Fix non trivial type definitions.""" - type_elem = [] + template_line = self._get_template_line() + if template_line: + full_type.append(template_line) - # Short cut for anonymous symbols. - if cursor.is_anonymous(): - return None + full_type.append(self._normalize_type(self._cc.result_type.spelling)) - # libclang 16 and later have cursor.spelling == cursor.type.spelling for - # typedefs of anonymous entities, while libclang 15 and earlier have an - # empty string. Match the behaviour across libclang versions. - if cursor.spelling == '': - return cursor.type.spelling + ttype = ' '.join(full_type) - type_elem.extend(_specifiers_fixup(cursor, cursor.type)) + return ttype, args - colon_suffix = '' - if cursor.kind in [CursorKind.STRUCT_DECL, - CursorKind.CLASS_DECL, - CursorKind.CLASS_TEMPLATE]: - inheritance = _get_inheritance(cursor) - if inheritance: - colon_suffix = inheritance - elif cursor.kind == CursorKind.ENUM_DECL: - scopedenum_type = _get_scopedenum_type(cursor) - if scopedenum_type: - colon_suffix = scopedenum_type + def method_fixup(self): + """Parse additional details of a method declaration.""" + args = self.get_args() - template = _get_template_line(cursor) - template = template + ' ' if template else '' + full_type = [] - return f'{template}{cursor.spelling}{colon_suffix}' + access_spec = self._get_access_specifier() + if access_spec: + full_type.append(access_spec) -def _get_args(cursor, domain): - """Get function / method arguments.""" - args = [] + pre_quals, pos_quals = self._get_method_quals() - # Only fully prototyped functions will have argument lists to process. - if cursor.type.kind == TypeKind.FUNCTIONPROTO: - for c in cursor.get_children(): - if c.kind == CursorKind.PARM_DECL: - arg_ttype, arg_name = _var_type_fixup(c, domain) - args.extend([(arg_ttype, arg_name)]) + full_type.extend(pre_quals) - if cursor.type.is_function_variadic(): - args.extend([('', '...')]) - if len(args) == 0: - args.extend([('', 'void')]) + template_line = self._get_template_line() + if template_line: + full_type.append(template_line) - return args + if self.kind not in [CursorKind.CONSTRUCTOR, CursorKind.DESTRUCTOR]: + full_type.append(self._cc.result_type.spelling) -def _function_fixup(cursor, domain): - """Parse additional details of a function declaration.""" - args = _get_args(cursor, domain) + ttype = ' '.join(full_type) + quals = ' '.join(pos_quals) - full_type = _get_function_quals(cursor) + return ttype, args, quals - template_line = _get_template_line(cursor) - if template_line: - full_type.append(template_line) + def type_definition_fixup(self): + """Fix non trivial type definitions.""" + type_elem = [] - full_type.append(_normalize_type(cursor.result_type.spelling)) + # Short cut for anonymous symbols. + if self._cc.is_anonymous(): + return None - ttype = ' '.join(full_type) + # libclang 16 and later have cursor.spelling == cursor.type.spelling + # for typedefs of anonymous entities, while libclang 15 and earlier + # have an empty string. Match the behaviour across libclang versions. + if self.spelling == '': + return self.type.spelling - return ttype, args + type_elem.extend(self._specifiers_fixup(self.type)) -def _method_fixup(cursor): - """Parse additional details of a method declaration.""" - args = _get_args(cursor, 'cpp') + colon_suffix = '' + if self.kind in [CursorKind.STRUCT_DECL, + CursorKind.CLASS_DECL, + CursorKind.CLASS_TEMPLATE]: + inheritance = self._get_inheritance() + if inheritance: + colon_suffix = inheritance + elif self.kind == CursorKind.ENUM_DECL: + scopedenum_type = self._get_scopedenum_type() + if scopedenum_type: + colon_suffix = scopedenum_type - full_type = [] + template = self._get_template_line() + template = template + ' ' if template else '' - access_spec = _get_access_specifier(cursor) - if access_spec: - full_type.append(access_spec) + return f'{template}{self.spelling}{colon_suffix}' - pre_quals, pos_quals = _get_method_quals(cursor) + def get_macro_args(self): + """Get macro arguments. - full_type.extend(pre_quals) + Returns: + None for simple macros, a potentially empty list of arguments for + function-like macros + """ + if self.kind != CursorKind.MACRO_DEFINITION: + return None - template_line = _get_template_line(cursor) - if template_line: - full_type.append(template_line) + tokens = self.get_tokens() - if cursor.kind not in [CursorKind.CONSTRUCTOR, CursorKind.DESTRUCTOR]: - full_type.append(cursor.result_type.spelling) + # Use the first two tokens to make sure this starts with 'IDENTIFIER(' + one = next(tokens) + two = next(tokens, None) + if two is None or one.extent.end != two.extent.start or two.spelling != '(': + return None - ttype = ' '.join(full_type) - quals = ' '.join(pos_quals) + # Naïve parsing of macro arguments + # FIXME: This doesn't handle GCC named vararg extension FOO(vararg...) + args = [] + for token in tokens: + if token.spelling == ')': + return args + elif token.spelling == ',': + continue + elif token.kind == TokenKind.IDENTIFIER: + args.extend([('', token.spelling)]) + elif token.spelling == '...': + args.extend([('', token.spelling)]) + else: + break - return ttype, args, quals + return None -def _get_inheritance(cursor): - """Get the full inheritance list of a cursor in C++ syntax. + def _symbolic_dims(self): + dim = None + for spelling in [t.spelling for t in self.get_tokens()]: + if spelling == '[': + # dim should be None here + dim = [] + elif spelling == ']': + # dim should not be None here + yield ' '.join(dim) + dim = None + elif dim is not None: + dim.append(spelling) + + def _dims_fixup(self, dims): + if not dims: + return '' + + symbolic_dims = list(self._symbolic_dims()) + if len(symbolic_dims) == len(dims): + dims = symbolic_dims + + return ''.join([f'[{d}]' for d in dims]) + + def _get_storage_class(self): + """Get the storage class of a cursor. + + Only storage classes that are relevant to the documentation are + returned. + + Returns: + Storage class as a string. ``None`` otherwise. + """ + storage_class_map = { + StorageClass.EXTERN: 'extern', + StorageClass.STATIC: 'static', + } + + return storage_class_map.get(self._cc.storage_class) + + def _get_function_quals(self): + """Get all the qualifiers of a function object. + + Returns: + List of (prefix) function qualifiers. + """ + tokens = [t.spelling for t in self.get_tokens()] + quals = [] + + if 'static' in tokens: + quals.append('static') + if 'inline' in tokens: + quals.append('inline') + + return quals + + def _get_method_quals(self): + """Get all the qualifiers of a method. + + Returns: + List of prefix method qualifiers and list of suffix method + qualifiers. + """ + tokens = [t.spelling for t in self.get_tokens()] + pre_quals = [] + pos_quals = [] + + if self._cc.is_static_method(): + pre_quals.append('static') + if self._cc.is_virtual_method(): + pre_quals.append('virtual') + if 'constexpr' in tokens: + pre_quals.append('constexpr') + + if self._cc.is_const_method(): + pos_quals.append('const') + if self._cc.is_pure_virtual_method(): + pos_quals.append('= 0') + if self._cc.is_default_method(): + pos_quals.append('= default') + if 'delete' in tokens: + pos_quals.append('= delete') + if 'override' in tokens: + pos_quals.append('override') + + except_spec = self._cc.exception_specification_kind + if except_spec == ExceptionSpecificationKind.BASIC_NOEXCEPT: + pos_quals.append('noexcept') + + return pre_quals, pos_quals + + def _specifiers_fixup(self, basetype): + """Fix the type for C++ specifiers. + + Note the ``basetype`` is not necessarily ``cursor.type``. When dealing + with pointers or arrays, we need to get to the base type as in + :py:func:`_var_type_fixup`. + + Returns: + List of C++ specifiers for the cursor. + """ + tokens = [t.spelling for t in self.get_tokens()] + type_elem = [] + + if 'mutable' in tokens: + type_elem.append('mutable') + + # If 'constexpr', strip the redundant 'const' that Clang adds to the + # type spelling by default. + if 'constexpr' in tokens: + type_elem.append('constexpr') + type_elem.append(basetype.spelling[len('const '):]) + else: + type_elem.append(basetype.spelling) + + return type_elem + + def _get_access_specifier(self): + """Get the access specifier of a cursor, if any. + + Returns: + One of 'private', 'protected', 'public' or `None`. + """ + # No access specifiers in C. + if self.domain == 'c': + return None + + # No access specifiers in redundant contexts. + if self._cc.semantic_parent: + if self._cc.semantic_parent.kind == CursorKind.UNION_DECL: + return None + + name_map = { + AccessSpecifier.PRIVATE: 'private', + AccessSpecifier.PROTECTED: 'protected', + AccessSpecifier.PUBLIC: 'public', + } + + return name_map.get(self._cc.access_specifier, None) + + def _get_template_line(self): + """Get the template arguments of a cursor. + + This recurses for templated template arguments. + + Returns: + String with the form 'template<...> ' if the cursor is a template + or `None` otherwise. When the cursor represents a templated + template argument, the returned string is actually of the form + 'template<...> name', but this should only occur under recursion. + """ + # We only add the name when we recurse, in which case we need to track + # the name of the templated template argument. Otherwise the name is + # not part of the template arguments. + name = '' + + if self.kind not in [CursorKind.CLASS_TEMPLATE, + CursorKind.FUNCTION_TEMPLATE, + CursorKind.TEMPLATE_TEMPLATE_PARAMETER]: + return None + + # The type of type parameters can be 'typename' and 'class'. These are + # equivalent, but we want it to look like the source code for + # consistency. We can do it by looking at the tokens directly. This is + # slightly complicated due to variadic template type parameters. + def typetype(cursor): + tokens = list(cursor.get_tokens()) + if tokens[-2].spelling == '...': + return f'{tokens[-3].spelling}...' + else: + return f'{tokens[-2].spelling}' + + # We need to add the keyword 'typename' or 'class' if we have recursed + # and therefore we are inside the template argument list. + if self.kind == CursorKind.TEMPLATE_TEMPLATE_PARAMETER: + name = f' {typetype(self)} {self.spelling}' + + template_args = [] + for child in self.get_children(): + if child.kind == CursorKind.TEMPLATE_TYPE_PARAMETER: + template_args.append(f'{typetype(child)} {child.spelling}') + elif child.kind == CursorKind.TEMPLATE_NON_TYPE_PARAMETER: + arg_name = f' {child.spelling}' if child.spelling != '' else '...' + template_args.append(f'{child.type.spelling}{arg_name}') + elif child.kind == CursorKind.TEMPLATE_TEMPLATE_PARAMETER: + arg = child._get_template_line() + if arg: + template_args.append(arg) + + return f'template<{", ".join(template_args)}>{name}' + + def _get_inheritance(self): + """Get the full inheritance list of a cursor in C++ syntax. + + Returns: + String with the form ': A, B, ...' when a cursor has + `CXX_BASE_SPECIFIER` children or `None` otherwise. + """ + inherited = [] + for child in self.get_children(): + if child.kind == CursorKind.CXX_BASE_SPECIFIER: + pad = lambda s: s + ' ' if s else '' + access_spec = child._get_access_specifier() + inherited.append(f'{pad(access_spec)}{child.type.spelling}') + + return ': ' + ', '.join(inherited) if len(inherited) > 0 else None + + def _get_scopedenum_type(self): + """Get the explicit underlying type of a scoped enumerator. + + Returns: + Underlying type of a scoped enumerator that has been explicitly + defined. ``None`` otherwise. + """ + if self.kind == CursorKind.ENUM_DECL and self._cc.is_scoped_enum(): + if list(self.get_tokens())[3].spelling == ':': + return f': {self._cc.enum_type.spelling}' + return None - Returns: - String with the form ': A, B, ...' when a cursor has - `CXX_BASE_SPECIFIER` children or `None` otherwise. - """ - inherited = [] - for child in cursor.get_children(): - if child.kind == CursorKind.CXX_BASE_SPECIFIER: - pad = lambda s: s + ' ' if s else '' - access_spec = _get_access_specifier(child) - inherited.append(f'{pad(access_spec)}{child.type.spelling}') + @staticmethod + def _normalize_type(type_string): + return 'bool' if type_string == '_Bool' else type_string + + @staticmethod + def _var_type_fixup(cursor): + cursor_type = cursor.type + + stars_and_quals = '' + dims = [] + while True: + if cursor_type.kind == TypeKind.POINTER: + quals = [] + if cursor_type.is_const_qualified(): + quals.append('const') + if cursor_type.is_volatile_qualified(): + quals.append('volatile') + if cursor_type.is_restrict_qualified(): + quals.append('restrict') + + spacer = ' ' if quals and stars_and_quals else '' + stars_and_quals = '*' + ' '.join(quals) + spacer + stars_and_quals + + cursor_type = cursor_type.get_pointee() + elif cursor_type.kind == TypeKind.CONSTANTARRAY: + dims.append(cursor_type.element_count) + cursor_type = cursor_type.get_array_element_type() + elif cursor_type.kind == TypeKind.INCOMPLETEARRAY: + dims.append('') + cursor_type = cursor_type.get_array_element_type() + else: + break - return ': ' + ', '.join(inherited) if len(inherited) > 0 else None + dims = cursor._dims_fixup(dims) + type_elem = [] -class DocCursor: - """Documentation centric wrapper for Clang's own ``Cursor``. + access_spec = cursor._get_access_specifier() + if access_spec: + type_elem.append(access_spec) - This class abstracts a documentation worthy cursor so the user can query - relevant bits for documentation purpose, but otherwise hide all the - complications behind Clang's AST traversal and extraction of said bits of - information. + if cursor_type.kind == TypeKind.FUNCTIONPROTO: + pad = lambda s: s if s.endswith('*') or s.endswith('&') else s + ' ' - Technically, this class can hold any Clang cursor within itself, but it - won't expose any relevant information for those. - """ + args = [] + for c in cursor.get_children(): + if c.kind == CursorKind.PARM_DECL: + arg_ttype, arg_name = cursor._var_type_fixup(c) + args.append(f'{pad(arg_ttype)}{arg_name}' if arg_name else arg_ttype) + if cursor_type.is_function_variadic(): + args.append('...') + if len(args) == 0: + args.append('void') - def __init__(self, domain=None, cursor=None, comments=None): - self._comments = comments if comments else {} - self._cc = cursor + ret_type = cursor._normalize_type(cursor_type.get_result().spelling) - self.domain = domain - self.hash = self._cc.hash - self.kind = self._cc.kind - - if self.hash in self._comments: - self.comment = self._comments[self.hash] + name = f'''{pad(ret_type)}({pad(stars_and_quals)}{cursor.spelling}{dims})({', '.join(args)})''' # noqa: E501 else: - self.comment = None - # TODO: - # We mimic everything we need from Clang's cursor for a drop in - # replacement. Later these will likely be more intelligent versions - # that incorporate logic from the helper parser functions. - self.access_specifier = self._cc.access_specifier - self.displayname = self._cc.displayname - if self.kind == CursorKind.ENUM_DECL: - self.enum_type = self._cc.enum_type - if self.kind == CursorKind.ENUM_CONSTANT_DECL: - if '=' in [t.spelling for t in _cursor_get_tokens(self._cc)]: - self.enum_value = self._cc.enum_value - else: - self.enum_value = None - self.exception_specification_kind = self._cc.exception_specification_kind - self.extent = self._cc.extent - self.is_anonymous = self._cc.is_anonymous - self.is_const_method = self._cc.is_const_method - self.is_default_method = self._cc.is_default_method - self.is_pure_virtual_method = self._cc.is_pure_virtual_method - self.is_scoped_enum = self._cc.is_scoped_enum - self.is_static_method = self._cc.is_static_method - self.is_virtual_method = self._cc.is_virtual_method - self.result_type = self._cc.result_type - self.semantic_parent = self._cc.semantic_parent - self.spelling = self._cc.spelling - self.storage_class = self._cc.storage_class - self.translation_unit = self._cc.translation_unit - self.type = self._cc.type + storage_class = cursor._get_storage_class() + if storage_class: + type_elem.append(storage_class) - def __hash__(self): - return self.hash + type_elem.extend(cursor._specifiers_fixup(cursor_type)) - def get_children(self): - """Get children cursors.""" - domain = self.domain + if stars_and_quals: + type_elem.append(stars_and_quals) - # Identify `extern "C"` blocks and change domain accordingly. For some - # reason, the Python bindings don't return the cursor kind LINKAGE_SPEC - # as one would expect, so we need to do it the hard way. - if domain == 'cpp' and self.kind == CursorKind.UNEXPOSED_DECL: - tokens = _cursor_get_tokens(self) - ntoken = next(tokens, None) - if ntoken and ntoken.spelling == 'extern': - ntoken = next(tokens, None) - if ntoken and ntoken.spelling == '"C"': - domain = 'c' + name = cursor.spelling + dims - for c in self._cc.get_children(): - yield DocCursor(domain=domain, cursor=c, comments=self._comments) + # Convert _Bool to bool + type_elem = [cursor._normalize_type(t) for t in type_elem] + + ttype = ' '.join(type_elem) + return ttype, name diff --git a/src/hawkmoth/parser.py b/src/hawkmoth/parser.py index f3e12162..ff3794c6 100644 --- a/src/hawkmoth/parser.py +++ b/src/hawkmoth/parser.py @@ -46,13 +46,6 @@ CursorKind, TokenKind, DocCursor, - _get_meta, - _cursor_get_tokens, - _function_fixup, - _get_macro_args, - _method_fixup, - _type_definition_fixup, - _var_type_fixup, ) class ErrorLevel(enum.IntEnum): @@ -205,11 +198,11 @@ def _recursive_parse(errors, cursor, nest): name = cursor.spelling ttype = cursor.type.spelling text = comment.spelling - meta = _get_meta(cursor) + meta = cursor.get_meta() if cursor.kind == CursorKind.MACRO_DEFINITION: # FIXME: check args against comment - args = _get_macro_args(cursor) + args = cursor.get_macro_args() if args is None: ds = docstring.MacroDocstring(domain=domain, text=text, @@ -223,7 +216,7 @@ def _recursive_parse(errors, cursor, nest): elif cursor.kind in [CursorKind.VAR_DECL, CursorKind.FIELD_DECL]: # Note: Preserve original name - ttype, decl_name = _var_type_fixup(cursor, domain) + ttype, decl_name = cursor.var_type_fixup() if cursor.kind == CursorKind.VAR_DECL: ds = docstring.VarDocstring(domain=domain, text=text, nest=nest, @@ -250,7 +243,7 @@ def _recursive_parse(errors, cursor, nest): CursorKind.CLASS_DECL, CursorKind.CLASS_TEMPLATE]: - decl_name = _type_definition_fixup(cursor) + decl_name = cursor.type_definition_fixup() if cursor.kind == CursorKind.STRUCT_DECL: ds = docstring.StructDocstring(domain=domain, text=text, @@ -288,7 +281,7 @@ def _recursive_parse(errors, cursor, nest): return [ds] elif cursor.kind == CursorKind.FUNCTION_DECL: - ttype, args = _function_fixup(cursor, domain) + ttype, args = cursor.function_fixup() ds = docstring.FunctionDocstring(domain=domain, text=text, nest=nest, name=name, ttype=ttype, args=args, @@ -299,7 +292,7 @@ def _recursive_parse(errors, cursor, nest): CursorKind.DESTRUCTOR, CursorKind.CXX_METHOD, CursorKind.FUNCTION_TEMPLATE]: - ttype, args, quals = _method_fixup(cursor) + ttype, args, quals = cursor.method_fixup() ds = docstring.FunctionDocstring(domain=domain, text=text, nest=nest, name=name, ttype=ttype, args=args, @@ -336,7 +329,7 @@ def _parse_undocumented_block(errors, cursor, nest): # For some reason, the Python bindings don't return the cursor kind # LINKAGE_SPEC as one would expect, so we need to do it the hard way. if cursor.kind == CursorKind.UNEXPOSED_DECL: - tokens = _cursor_get_tokens(cursor) + tokens = cursor.get_tokens() ntoken = next(tokens, None) if ntoken and ntoken.spelling == 'extern': ntoken = next(tokens, None)