Skip to content

Commit

Permalink
refactor: Use a custom autoref HTML tag
Browse files Browse the repository at this point in the history
  • Loading branch information
pawamoy committed May 24, 2024
1 parent ffcaa01 commit 5073006
Show file tree
Hide file tree
Showing 3 changed files with 145 additions and 12 deletions.
3 changes: 2 additions & 1 deletion src/mkdocs_autorefs/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ class AutorefsPlugin(BasePlugin):

scan_toc: bool = True
current_page: str | None = None
legacy_refs: bool = True

def __init__(self) -> None:
"""Initialize the object."""
Expand Down Expand Up @@ -211,7 +212,7 @@ def on_post_page(self, output: str, page: Page, **kwargs: Any) -> str: # noqa:
log.debug(f"Fixing references in page {page.file.src_path}")

url_mapper = functools.partial(self.get_item_url, from_url=page.url, fallback=self.get_fallback_anchor)
fixed_output, unmapped = fix_refs(output, url_mapper)
fixed_output, unmapped = fix_refs(output, url_mapper, _legacy_refs=self.legacy_refs)

if unmapped and log.isEnabledFor(logging.WARNING):
for ref in unmapped:
Expand Down
97 changes: 91 additions & 6 deletions src/mkdocs_autorefs/references.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import re
import warnings
from html import escape, unescape
from html.parser import HTMLParser
from typing import TYPE_CHECKING, Any, Callable, ClassVar, Match
from urllib.parse import urlsplit
from xml.etree.ElementTree import Element
Expand Down Expand Up @@ -44,7 +45,12 @@ def __getattr__(name: str) -> Any:
rf"(?: class=(?P<class>{_ATTR_VALUE}))?(?P<attrs> [^<>]+)?>(?P<title>.*?)</span>",
flags=re.DOTALL,
)
"""A regular expression to match mkdocs-autorefs' special reference markers
"""Deprecated. Use [`AUTOREF_RE`][mkdocs_autorefs.references.AUTOREF_RE] instead."""

AUTOREF_RE = re.compile(r"<autoref (?P<attrs>.*?)>(?P<title>.*?)</autoref>", flags=re.DOTALL)
"""The autoref HTML tag regular expression.
A regular expression to match mkdocs-autorefs' special reference markers
in the [`on_post_page` hook][mkdocs_autorefs.plugin.AutorefsPlugin.on_post_page].
"""

Expand Down Expand Up @@ -135,8 +141,8 @@ def _make_tag(self, identifier: str, text: str) -> Element:
Returns:
A new element.
"""
el = Element("span")
el.set("data-autorefs-identifier", identifier)
el = Element("autoref")
el.set("identifier", identifier)
el.text = text
return el

Expand Down Expand Up @@ -167,7 +173,7 @@ def relative_url(url_a: str, url_b: str) -> str:
return f"{relative}#{anchor}"


def fix_ref(url_mapper: Callable[[str], str], unmapped: list[str]) -> Callable:
def _legacy_fix_ref(url_mapper: Callable[[str], str], unmapped: list[str]) -> Callable:
"""Return a `repl` function for [`re.sub`](https://docs.python.org/3/library/re.html#re.sub).
In our context, we match Markdown references and replace them with HTML links.
Expand Down Expand Up @@ -216,7 +222,84 @@ def inner(match: Match) -> str:
return inner


def fix_refs(html: str, url_mapper: Callable[[str], str]) -> tuple[str, list[str]]:
class _AutorefsAttrs(dict):
_handled_attrs: ClassVar[set[str]] = {"identifier", "optional", "hover", "class"}

@property
def remaining(self) -> str:
return " ".join(k if v is None else f'{k}="{v}"' for k, v in self.items() if k not in self._handled_attrs)


class _HTMLAttrsParser(HTMLParser):
def __init__(self):
super().__init__()
self.attrs = {}

def parse(self, html: str) -> _AutorefsAttrs:
self.attrs.clear()
self.feed(html)
return _AutorefsAttrs(self.attrs)

def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None: # noqa: ARG002
self.attrs.update(attrs)


_html_attrs_parser = _HTMLAttrsParser()


def fix_ref(url_mapper: Callable[[str], str], unmapped: list[str]) -> Callable:
"""Return a `repl` function for [`re.sub`](https://docs.python.org/3/library/re.html#re.sub).
In our context, we match Markdown references and replace them with HTML links.
When the matched reference's identifier was not mapped to an URL, we append the identifier to the outer
`unmapped` list. It generally means the user is trying to cross-reference an object that was not collected
and rendered, making it impossible to link to it. We catch this exception in the caller to issue a warning.
Arguments:
url_mapper: A callable that gets an object's site URL by its identifier,
such as [mkdocs_autorefs.plugin.AutorefsPlugin.get_item_url][].
unmapped: A list to store unmapped identifiers.
Returns:
The actual function accepting a [`Match` object](https://docs.python.org/3/library/re.html#match-objects)
and returning the replacement strings.
"""

def inner(match: Match) -> str:
title = match["title"]
attrs = _html_attrs_parser.parse(f"<a {match['attrs']}>")
identifier: str = attrs["identifier"]
optional = "optional" in attrs
hover = "hover" in attrs

try:
url = url_mapper(unescape(identifier))
except KeyError:
if optional:
if hover:
return f'<span title="{identifier}">{title}</span>'
return title
unmapped.append(identifier)
if title == identifier:
return f"[{identifier}][]"
return f"[{title}][{identifier}]"

parsed = urlsplit(url)
external = parsed.scheme or parsed.netloc
classes = (attrs.get("class") or "").strip().split()
classes = ["autorefs", "autorefs-external" if external else "autorefs-internal", *classes]
class_attr = " ".join(classes)
if remaining := attrs.remaining:
remaining = f" {remaining}"
if optional and hover:
return f'<a class="{class_attr}" title="{identifier}" href="{escape(url)}"{remaining}>{title}</a>'
return f'<a class="{class_attr}" href="{escape(url)}"{remaining}>{title}</a>'

return inner


def fix_refs(html: str, url_mapper: Callable[[str], str], *, _legacy_refs: bool = True) -> tuple[str, list[str]]:
"""Fix all references in the given HTML text.
Arguments:
Expand All @@ -228,7 +311,9 @@ def fix_refs(html: str, url_mapper: Callable[[str], str]) -> tuple[str, list[str
The fixed HTML.
"""
unmapped: list[str] = []
html = AUTO_REF_RE.sub(fix_ref(url_mapper, unmapped), html)
html = AUTOREF_RE.sub(fix_ref(url_mapper, unmapped), html)
if _legacy_refs:
html = AUTO_REF_RE.sub(_legacy_fix_ref(url_mapper, unmapped), html)
return html, unmapped


Expand Down
57 changes: 52 additions & 5 deletions tests/test_references.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def test_ignore_reference_with_special_char() -> None:
)


def test_custom_required_reference() -> None:
def test_legacy_custom_required_reference() -> None:
"""Check that external HTML-based references are expanded or reported missing."""
url_map = {"ok": "ok.html#ok"}
source = "<span data-autorefs-identifier=bar>foo</span> <span data-autorefs-identifier=ok>ok</span>"
Expand All @@ -221,7 +221,16 @@ def test_custom_required_reference() -> None:
assert unmapped == ["bar"]


def test_custom_optional_reference() -> None:
def test_custom_required_reference() -> None:
"""Check that external HTML-based references are expanded or reported missing."""
url_map = {"ok": "ok.html#ok"}
source = "<autoref identifier=bar>foo</autoref> <autoref identifier=ok>ok</autoref>"
output, unmapped = fix_refs(source, url_map.__getitem__)
assert output == '[foo][bar] <a class="autorefs autorefs-internal" href="ok.html#ok">ok</a>'
assert unmapped == ["bar"]


def test_legacy_custom_optional_reference() -> None:
"""Check that optional HTML-based references are expanded and never reported missing."""
url_map = {"ok": "ok.html#ok"}
source = '<span data-autorefs-optional="bar">foo</span> <span data-autorefs-optional=ok>ok</span>'
Expand All @@ -230,7 +239,16 @@ def test_custom_optional_reference() -> None:
assert unmapped == []


def test_custom_optional_hover_reference() -> None:
def test_custom_optional_reference() -> None:
"""Check that optional HTML-based references are expanded and never reported missing."""
url_map = {"ok": "ok.html#ok"}
source = '<autoref optional identifier="bar">foo</autoref> <autoref identifier=ok optional>ok</autoref>'
output, unmapped = fix_refs(source, url_map.__getitem__)
assert output == 'foo <a class="autorefs autorefs-internal" href="ok.html#ok">ok</a>'
assert unmapped == []


def test_legacy_custom_optional_hover_reference() -> None:
"""Check that optional-hover HTML-based references are expanded and never reported missing."""
url_map = {"ok": "ok.html#ok"}
source = '<span data-autorefs-optional-hover="bar">foo</span> <span data-autorefs-optional-hover=ok>ok</span>'
Expand All @@ -242,7 +260,19 @@ def test_custom_optional_hover_reference() -> None:
assert unmapped == []


def test_external_references() -> None:
def test_custom_optional_hover_reference() -> None:
"""Check that optional-hover HTML-based references are expanded and never reported missing."""
url_map = {"ok": "ok.html#ok"}
source = '<autoref optional hover identifier="bar">foo</autoref> <autoref optional identifier=ok hover>ok</autoref>'
output, unmapped = fix_refs(source, url_map.__getitem__)
assert (
output
== '<span title="bar">foo</span> <a class="autorefs autorefs-internal" title="ok" href="ok.html#ok">ok</a>'
)
assert unmapped == []


def test_legacy_external_references() -> None:
"""Check that external references are marked as such."""
url_map = {"example": "https://example.com"}
source = '<span data-autorefs-optional="example">example</span>'
Expand All @@ -251,6 +281,15 @@ def test_external_references() -> None:
assert unmapped == []


def test_external_references() -> None:
"""Check that external references are marked as such."""
url_map = {"example": "https://example.com"}
source = '<autoref optional identifier="example">example</autoref>'
output, unmapped = fix_refs(source, url_map.__getitem__)
assert output == '<a class="autorefs autorefs-external" href="https://example.com">example</a>'
assert unmapped == []


def test_register_markdown_anchors() -> None:
"""Check that Markdown anchors are registered when enabled."""
plugin = AutorefsPlugin()
Expand Down Expand Up @@ -333,9 +372,17 @@ def test_register_markdown_anchors_with_admonition() -> None:
}


def test_keep_data_attributes() -> None:
def test_legacy_keep_data_attributes() -> None:
"""Keep HTML data attributes from autorefs spans."""
url_map = {"example": "https://e.com"}
source = '<span data-autorefs-optional="example" class="hi ho" data-foo data-bar="0">e</span>'
output, _ = fix_refs(source, url_map.__getitem__)
assert output == '<a class="autorefs autorefs-external hi ho" href="https://e.com" data-foo data-bar="0">e</a>'


def test_keep_data_attributes() -> None:
"""Keep HTML data attributes from autorefs spans."""
url_map = {"example": "https://e.com"}
source = '<autoref optional identifier="example" class="hi ho" data-foo data-bar="0">e</autoref>'
output, _ = fix_refs(source, url_map.__getitem__)
assert output == '<a class="autorefs autorefs-external hi ho" href="https://e.com" data-foo data-bar="0">e</a>'

0 comments on commit 5073006

Please sign in to comment.