Skip to content

Commit

Permalink
✨ NEW: Add simple typographic replacements (#59)
Browse files Browse the repository at this point in the history
  • Loading branch information
tsutsu3 authored Oct 21, 2020
1 parent c8f82bc commit f290ba9
Show file tree
Hide file tree
Showing 8 changed files with 161 additions and 110 deletions.
2 changes: 2 additions & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]

nitpick_ignore = [("py:class", "Match")]


# -- Options for HTML output -------------------------------------------------

Expand Down
4 changes: 2 additions & 2 deletions markdown_it/parser_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@

from .ruler import Ruler
from .rules_core.state_core import StateCore
from .rules_core import normalize, block, inline
from .rules_core import normalize, block, inline, replace

# TODO linkify, replacements, smartquotes
_rules = [
["normalize", normalize],
["block", block],
["inline", inline],
# [ 'linkify', require('./rules_core/linkify') ],
# [ 'replacements', require('./rules_core/replacements') ],
["replacements", replace],
# [ 'smartquotes', require('./rules_core/smartquotes') ]
]

Expand Down
1 change: 1 addition & 0 deletions markdown_it/rules_core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
from .normalize import normalize # noqa: F401
from .block import block # noqa: F401
from .inline import inline # noqa: F401
from .replacements import replace # noqa: F401
107 changes: 0 additions & 107 deletions markdown_it/rules_core/replacements.js

This file was deleted.

123 changes: 123 additions & 0 deletions markdown_it/rules_core/replacements.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
"""Simple typographic replacements
* ``(c)``, ``(C)`` → ©
* ``(tm)``, ``(TM)`` → ™
* ``(r)``, ``(R)`` → ®
* ``(p)``, ``(P)`` → §
* ``+-`` → ±
* ``...`` → …
* ``?....`` → ?..
* ``!....`` → !..
* ``????????`` → ???
* ``!!!!!`` → !!!
* ``,,,`` → ,
* ``--`` → &ndash
* ``---`` → &mdash
"""
import logging
import re
from typing import List, Match

from .state_core import StateCore
from ..token import Token

LOGGER = logging.getLogger(__name__)

# TODO:
# - fractionals 1/2, 1/4, 3/4 -> ½, ¼, ¾
# - miltiplication 2 x 4 -> 2 × 4

RARE_RE = re.compile(r"\+-|\.\.|\?\?\?\?|!!!!|,,|--")

# Workaround for phantomjs - need regex without /g flag,
# or root check will fail every second time
# SCOPED_ABBR_TEST_RE = r"\((c|tm|r|p)\)"

SCOPED_ABBR_RE = re.compile(r"\((c|tm|r|p)\)", flags=re.IGNORECASE)

PLUS_MINUS_RE = re.compile(r"\+-")

ELLIPSIS_RE = re.compile(r"\.{2,}")

ELLIPSIS_QUESTION_EXCLAMATION_RE = re.compile(r"([?!])…")

QUESTION_EXCLAMATION_RE = re.compile(r"([?!]){4,}")

COMMA_RE = re.compile(r",{2,}")

EM_DASH_RE = re.compile(r"(^|[^-])---(?=[^-]|$)", flags=re.MULTILINE)

EN_DASH_RE = re.compile(r"(^|\s)--(?=\s|$)", flags=re.MULTILINE)

EN_DASH_INDENT_RE = re.compile(r"(^|[^-\s])--(?=[^-\s]|$)", flags=re.MULTILINE)


SCOPED_ABBR = {"c": "©", "r": "®", "p": "§", "tm": "™"}


def replaceFn(match: Match[str]):
return SCOPED_ABBR[match.group(1).lower()]


def replace_scoped(inlineTokens: List[Token]):
inside_autolink = 0

for token in inlineTokens:
if token.type == "text" and not inside_autolink:
token.content = SCOPED_ABBR_RE.sub(replaceFn, token.content)

if token.type == "link_open" and token.info == "auto":
inside_autolink -= 1

if token.type == "link_close" and token.info == "auto":
inside_autolink += 1


def replace_rare(inlineTokens: List[Token]):
inside_autolink = 0

for token in inlineTokens:
if token.type == "text" and not inside_autolink:
if RARE_RE.search(token.content):
# +- -> ±
token.content = PLUS_MINUS_RE.sub("±", token.content)

# .., ..., ....... -> …
token.content = ELLIPSIS_RE.sub("…", token.content)

# but ?..... & !..... -> ?.. & !..
token.content = ELLIPSIS_QUESTION_EXCLAMATION_RE.sub(
"\\1..", token.content
)
token.content = QUESTION_EXCLAMATION_RE.sub("\\1\\1\\1", token.content)

# ,, ,,, ,,,, -> ,
token.content = COMMA_RE.sub(",", token.content)

# em-dash
token.content = EM_DASH_RE.sub("\\1\u2014", token.content)

# en-dash
token.content = EN_DASH_RE.sub("\\1\u2013", token.content)
token.content = EN_DASH_INDENT_RE.sub("\\1\u2013", token.content)

if token.type == "link_open" and token.info == "auto":
inside_autolink -= 1

if token.type == "link_close" and token.info == "auto":
inside_autolink += 1


def replace(state: StateCore):
if not state.md.options.typographer:
return

for token in state.tokens:
if token.type != "inline":
continue

if SCOPED_ABBR_RE.search(token.content):
replace_scoped(token.children)

if RARE_RE.search(token.content):
replace_rare(token.children)
2 changes: 1 addition & 1 deletion tests/test_api/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ def test_get_rules():
md = MarkdownIt("zero")
# print(md.get_all_rules())
assert md.get_all_rules() == {
"core": ["normalize", "block", "inline"],
"core": ["normalize", "block", "inline", "replacements"],
"block": [
"table",
"code",
Expand Down
21 changes: 21 additions & 0 deletions tests/test_port/fixtures/typographer.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,19 @@
<p>(bad)</p>
.

copyright (Lower)
.
(c)
.
<p>©</p>
.

copyright (Upper)
.
(C)
.
<p>©</p>
.

copyright
.
Expand Down Expand Up @@ -61,6 +74,14 @@ dupes
.


dupes-ellipsis
.
!... ?... ,... !!!!!!.... ????.... ,,...
.
<p>!.. ?.. ,… !!!.. ???.. ,…</p>
.


dashes
.
---markdownit --- super---
Expand Down
11 changes: 11 additions & 0 deletions tests/test_port/test_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,17 @@
FIXTURE_PATH = Path(__file__).parent.joinpath("fixtures")


@pytest.mark.parametrize(
"line,title,input,expected",
read_fixture_file(FIXTURE_PATH.joinpath("typographer.md")),
)
def test_typographer(line, title, input, expected):
md = MarkdownIt().enable("replacements")
md.options["typographer"] = True
text = md.render(input)
assert text.rstrip() == expected.rstrip()


@pytest.mark.parametrize(
"line,title,input,expected", read_fixture_file(FIXTURE_PATH.joinpath("tables.md"))
)
Expand Down

0 comments on commit f290ba9

Please sign in to comment.