✨ NEW: Add simple typographic replacements (#59)

executablebooks · Oct 21, 2020 · f290ba9 · f290ba9
1 parent c8f82bc
commit f290ba9
Show file tree

Hide file tree

Showing 8 changed files with 161 additions and 110 deletions.
diff --git a/docs/conf.py b/docs/conf.py
@@ -45,6 +45,8 @@
 # This pattern also affects html_static_path and html_extra_path.
 exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 
+nitpick_ignore = [("py:class", "Match")]
+
 
 # -- Options for HTML output -------------------------------------------------
 

diff --git a/markdown_it/parser_core.py b/markdown_it/parser_core.py
@@ -8,15 +8,15 @@
 
 from .ruler import Ruler
 from .rules_core.state_core import StateCore
-from .rules_core import normalize, block, inline
+from .rules_core import normalize, block, inline, replace
 
 # TODO linkify, replacements, smartquotes
 _rules = [
     ["normalize", normalize],
     ["block", block],
     ["inline", inline],
     #   [ 'linkify',        require('./rules_core/linkify')        ],
-    #   [ 'replacements',   require('./rules_core/replacements')   ],
+    ["replacements", replace],
     #   [ 'smartquotes',    require('./rules_core/smartquotes')    ]
 ]
 

diff --git a/markdown_it/rules_core/__init__.py b/markdown_it/rules_core/__init__.py
@@ -2,3 +2,4 @@
 from .normalize import normalize  # noqa: F401
 from .block import block  # noqa: F401
 from .inline import inline  # noqa: F401
+from .replacements import replace  # noqa: F401
diff --git a/markdown_it/rules_core/replacements.js b/markdown_it/rules_core/replacements.js
diff --git a/markdown_it/rules_core/replacements.py b/markdown_it/rules_core/replacements.py
@@ -0,0 +1,123 @@
+"""Simple typographic replacements
+
+* ``(c)``, ``(C)`` → ©
+* ``(tm)``, ``(TM)`` → ™
+* ``(r)``, ``(R)`` → ®
+* ``(p)``, ``(P)`` → §
+* ``+-`` → ±
+* ``...`` → …
+* ``?....`` → ?..
+* ``!....`` → !..
+* ``????????`` → ???
+* ``!!!!!`` → !!!
+* ``,,,`` → ,
+* ``--`` → &ndash
+* ``---`` → &mdash
+"""
+import logging
+import re
+from typing import List, Match
+
+from .state_core import StateCore
+from ..token import Token
+
+LOGGER = logging.getLogger(__name__)
+
+# TODO:
+# - fractionals 1/2, 1/4, 3/4 -> ½, ¼, ¾
+# - miltiplication 2 x 4 -> 2 × 4
+
+RARE_RE = re.compile(r"\+-|\.\.|\?\?\?\?|!!!!|,,|--")
+
+# Workaround for phantomjs - need regex without /g flag,
+# or root check will fail every second time
+# SCOPED_ABBR_TEST_RE = r"\((c|tm|r|p)\)"
+
+SCOPED_ABBR_RE = re.compile(r"\((c|tm|r|p)\)", flags=re.IGNORECASE)
+
+PLUS_MINUS_RE = re.compile(r"\+-")
+
+ELLIPSIS_RE = re.compile(r"\.{2,}")
+
+ELLIPSIS_QUESTION_EXCLAMATION_RE = re.compile(r"([?!])…")
+
+QUESTION_EXCLAMATION_RE = re.compile(r"([?!]){4,}")
+
+COMMA_RE = re.compile(r",{2,}")
+
+EM_DASH_RE = re.compile(r"(^|[^-])---(?=[^-]|$)", flags=re.MULTILINE)
+
+EN_DASH_RE = re.compile(r"(^|\s)--(?=\s|$)", flags=re.MULTILINE)
+
+EN_DASH_INDENT_RE = re.compile(r"(^|[^-\s])--(?=[^-\s]|$)", flags=re.MULTILINE)
+
+
+SCOPED_ABBR = {"c": "©", "r": "®", "p": "§", "tm": "™"}
+
+
+def replaceFn(match: Match[str]):
+    return SCOPED_ABBR[match.group(1).lower()]
+
+
+def replace_scoped(inlineTokens: List[Token]):
+    inside_autolink = 0
+
+    for token in inlineTokens:
+        if token.type == "text" and not inside_autolink:
+            token.content = SCOPED_ABBR_RE.sub(replaceFn, token.content)
+
+        if token.type == "link_open" and token.info == "auto":
+            inside_autolink -= 1
+
+        if token.type == "link_close" and token.info == "auto":
+            inside_autolink += 1
+
+
+def replace_rare(inlineTokens: List[Token]):
+    inside_autolink = 0
+
+    for token in inlineTokens:
+        if token.type == "text" and not inside_autolink:
+            if RARE_RE.search(token.content):
+                # +- -> ±
+                token.content = PLUS_MINUS_RE.sub("±", token.content)
+
+                # .., ..., ....... -> …
+                token.content = ELLIPSIS_RE.sub("…", token.content)
+
+                # but ?..... & !..... -> ?.. & !..
+                token.content = ELLIPSIS_QUESTION_EXCLAMATION_RE.sub(
+                    "\\1..", token.content
+                )
+                token.content = QUESTION_EXCLAMATION_RE.sub("\\1\\1\\1", token.content)
+
+                # ,,  ,,,  ,,,, -> ,
+                token.content = COMMA_RE.sub(",", token.content)
+
+                # em-dash
+                token.content = EM_DASH_RE.sub("\\1\u2014", token.content)
+
+                # en-dash
+                token.content = EN_DASH_RE.sub("\\1\u2013", token.content)
+                token.content = EN_DASH_INDENT_RE.sub("\\1\u2013", token.content)
+
+        if token.type == "link_open" and token.info == "auto":
+            inside_autolink -= 1
+
+        if token.type == "link_close" and token.info == "auto":
+            inside_autolink += 1
+
+
+def replace(state: StateCore):
+    if not state.md.options.typographer:
+        return
+
+    for token in state.tokens:
+        if token.type != "inline":
+            continue
+
+        if SCOPED_ABBR_RE.search(token.content):
+            replace_scoped(token.children)
+
+        if RARE_RE.search(token.content):
+            replace_rare(token.children)
diff --git a/tests/test_api/test_main.py b/tests/test_api/test_main.py
@@ -7,7 +7,7 @@ def test_get_rules():
     md = MarkdownIt("zero")
     # print(md.get_all_rules())
     assert md.get_all_rules() == {
-        "core": ["normalize", "block", "inline"],
+        "core": ["normalize", "block", "inline", "replacements"],
         "block": [
             "table",
             "code",

diff --git a/tests/test_port/fixtures/typographer.md b/tests/test_port/fixtures/typographer.md
@@ -4,6 +4,19 @@
 <p>(bad)</p>
 .
 
+copyright (Lower)
+.
+(c)
+.
+<p>©</p>
+.
+
+copyright (Upper)
+.
+(C)
+.
+<p>©</p>
+.
 
 copyright
 .
@@ -61,6 +74,14 @@ dupes
 .
 
 
+dupes-ellipsis
+.
+!... ?... ,... !!!!!!.... ????.... ,,...
+.
+<p>!.. ?.. ,… !!!.. ???.. ,…</p>
+.
+
+
 dashes
 .
 ---markdownit --- super---

diff --git a/tests/test_port/test_fixtures.py b/tests/test_port/test_fixtures.py
@@ -8,6 +8,17 @@
 FIXTURE_PATH = Path(__file__).parent.joinpath("fixtures")
 
 
+@pytest.mark.parametrize(
+    "line,title,input,expected",
+    read_fixture_file(FIXTURE_PATH.joinpath("typographer.md")),
+)
+def test_typographer(line, title, input, expected):
+    md = MarkdownIt().enable("replacements")
+    md.options["typographer"] = True
+    text = md.render(input)
+    assert text.rstrip() == expected.rstrip()
+
+
 @pytest.mark.parametrize(
     "line,title,input,expected", read_fixture_file(FIXTURE_PATH.joinpath("tables.md"))
 )