Skip to content

Commit

Permalink
Add a custom parser to insert the divs with lang attributes (#92)
Browse files Browse the repository at this point in the history
* added custom parser to insert the divs with lang attributes

* added unit tests

* bump version number

* remove the [[lang]] tags from the email preheader text

* add newlines around language tags if they do not exist

* fixed test and added fn to remove whitespace from lines containing language tags
  • Loading branch information
smcmurtry authored Jul 20, 2021
1 parent 00d4023 commit d34694d
Show file tree
Hide file tree
Showing 6 changed files with 251 additions and 15 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ nosetests.xml
coverage.xml
*,cover
.pytest_cache/
log/

# Translations
*.mo
Expand Down
121 changes: 115 additions & 6 deletions notifications_utils/formatters.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import string
import re
from typing import List
import urllib

import mistune
Expand All @@ -22,6 +23,13 @@
"\uFEFF" # zero width non-breaking space
)

EMAIL_P_OPEN_TAG = '<p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">'
EMAIL_P_CLOSE_TAG = "</p>"

FR_OPEN = r"\[\[fr\]\]" # matches [[fr]]
FR_CLOSE = r"\[\[/fr\]\]" # matches [[/fr]]
EN_OPEN = r"\[\[en\]\]" # matches [[en]]
EN_CLOSE = r"\[\[/en\]\]" # matches [[/en]]

mistune._block_quote_leading_pattern = re.compile(r"^ *\^ ?", flags=re.M)
mistune.BlockGrammar.block_quote = re.compile(r"^( *\^[^\n]+(\n[^\n]+)*\n*)+")
Expand All @@ -48,7 +56,8 @@
govuk_not_a_link = re.compile(r"(?<!\.|\/)(GOV)\.(UK)(?!\/|\?)", re.IGNORECASE)

dvla_markup_tags = re.compile(
str("|".join("<{}>".format(tag) for tag in {"cr", "h1", "h2", "p", "normal", "op", "np", "bul", "tab"})), re.IGNORECASE
str("|".join("<{}>".format(tag) for tag in {"cr", "h1", "h2", "p", "normal", "op", "np", "bul", "tab"})),
re.IGNORECASE,
)

smartypants.tags_to_skip = smartypants.tags_to_skip + ["a"]
Expand Down Expand Up @@ -129,7 +138,13 @@ def url_encode_full_stops(value):


def unescaped_formatted_list(
items, conjunction="and", before_each="‘", after_each="’", separator=", ", prefix="", prefix_plural=""
items,
conjunction="and",
before_each="‘",
after_each="’",
separator=", ",
prefix="",
prefix_plural="",
):
if prefix:
prefix += " "
Expand All @@ -146,10 +161,24 @@ def unescaped_formatted_list(
return ("{prefix_plural}{first_items} {conjunction} {last_item}").format(**locals())


def formatted_list(items, conjunction="and", before_each="‘", after_each="’", separator=", ", prefix="", prefix_plural=""):
def formatted_list(
items,
conjunction="and",
before_each="‘",
after_each="’",
separator=", ",
prefix="",
prefix_plural="",
):
return Markup(
unescaped_formatted_list(
[escape_html(x) for x in items], conjunction, before_each, after_each, separator, prefix, prefix_plural
[escape_html(x) for x in items],
conjunction,
before_each,
after_each,
separator,
prefix,
prefix_plural,
)
)

Expand Down Expand Up @@ -200,6 +229,55 @@ def add_trailing_newline(value):
return "{}\n".format(value)


def is_valid_index(index: int, lines: List[str]):
return index >= 0 and index < len(lines)


def insert_newline_after(lines: List[str], tag_index: int):
# no need to insert newlines at the end of the file
if tag_index == len(lines) - 1:
return
if not is_valid_index(tag_index + 1, lines):
return
if lines[tag_index + 1] == "":
return

lines.insert(tag_index + 1, "") # insert 1 newline


def insert_newline_before(lines: List[str], tag_index: int):
# no need to insert newlines at the beginning of the file
if tag_index == 0:
return
if not is_valid_index(tag_index - 1, lines):
return
if lines[tag_index - 1] == "":
return

lines.insert(tag_index, "") # insert 1 newline


def add_newlines_around_lang_tags(content: str) -> str:
lines = content.splitlines()
all_tags = ["[[fr]]", "[[/fr]]", "[[en]]", "[[/en]]"]
for tag in all_tags:
# strip whitespace
for index, line in enumerate(lines):
if tag in line and line.strip() == tag:
lines[index] = line.strip()

if tag not in lines:
continue

tag_index = lines.index(tag)

insert_newline_before(lines, tag_index)
new_tag_index = lines.index(tag)
insert_newline_after(lines, new_tag_index)
new_content = "\n".join(lines)
return new_content


def tweak_dvla_list_markup(value):
return value.replace("<cr><cr><np>", "<cr><np>").replace("<p><cr><p><cr>", "<p><cr>")

Expand Down Expand Up @@ -363,7 +441,7 @@ def list_item(self, text):

def paragraph(self, text):
if text.strip():
return ('<p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">{}</p>').format(text)
return f"{EMAIL_P_OPEN_TAG}{text}{EMAIL_P_CLOSE_TAG}"
return ""

def block_quote(self, text):
Expand All @@ -388,7 +466,9 @@ def autolink(self, link, is_email=False):
if is_email:
return link
return '<a style="{}" href="{}">{}</a>'.format(
LINK_STYLE, urllib.parse.quote(urllib.parse.unquote(link), safe=":/?#=&;"), link
LINK_STYLE,
urllib.parse.quote(urllib.parse.unquote(link), safe=":/?#=&;"),
link,
)

def double_emphasis(self, text):
Expand Down Expand Up @@ -515,3 +595,32 @@ def link(self, link, title, content):
hard_wrap=True,
use_xhtml=False,
)


def add_language_divs(_content: str) -> str:
"""
Custom parser to add the language divs. We need to search for and remove the EMAIL_P_OPEN_TAG
and EMAIL_P_CLOSE_TAG because the mistune parser has already run and put our [[lang]] tags inside
paragraphs.
"""
select_anything = r"([\s\S]*)"
fr_regex = re.compile(
f"{EMAIL_P_OPEN_TAG}{FR_OPEN}{EMAIL_P_CLOSE_TAG}{select_anything}{EMAIL_P_OPEN_TAG}{FR_CLOSE}{EMAIL_P_CLOSE_TAG}"
) # matches <p ...>[[fr]]</p>anything<p ...>[[/fr]]</p>
content = fr_regex.sub(r'<div lang="fr-ca">\1</div>', _content) # \1 returns the "anything" content above

en_regex = re.compile(
f"{EMAIL_P_OPEN_TAG}{EN_OPEN}{EMAIL_P_CLOSE_TAG}{select_anything}{EMAIL_P_OPEN_TAG}{EN_CLOSE}{EMAIL_P_CLOSE_TAG}"
) # matches <p ...>[[en]]</p>anything<p ...>[[/en]]</p>
content = en_regex.sub(r'<div lang="en-ca">\1</div>', content) # \1 returns the "anything" content above
return content


def remove_language_divs(_content: str) -> str:
"""Remove the tags from content. This fn is for use in the email
preheader, since this is plain text not html"""
content = re.compile(FR_OPEN).sub("", _content)
content = re.compile(FR_CLOSE).sub("", content)
content = re.compile(EN_OPEN).sub("", content)
content = re.compile(EN_CLOSE).sub("", content)
return content
58 changes: 50 additions & 8 deletions notifications_utils/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
unlink_govuk_escaped,
nl2br,
nl2li,
add_language_divs,
add_prefix,
add_newlines_around_lang_tags,
autolink_sms,
notify_email_markdown,
notify_email_preheader_markdown,
Expand All @@ -26,6 +28,7 @@
strip_dvla_markup,
strip_pipes,
remove_whitespace_before_punctuation,
remove_language_divs,
make_quotes_smart,
replace_hyphens_with_en_dashes,
replace_hyphens_with_non_breaking_hyphens,
Expand Down Expand Up @@ -57,7 +60,13 @@ class Template:

encoding = "utf-8"

def __init__(self, template, values=None, redact_missing_personalisation=False, jinja_path=None):
def __init__(
self,
template,
values=None,
redact_missing_personalisation=False,
jinja_path=None,
):
if not isinstance(template, dict):
raise TypeError("Template must be a dict")
if values is not None and not isinstance(values, dict):
Expand Down Expand Up @@ -140,7 +149,15 @@ def is_message_too_long(self):


class SMSMessageTemplate(Template):
def __init__(self, template, values=None, prefix=None, show_prefix=True, sender=None, jinja_path=None):
def __init__(
self,
template,
values=None,
prefix=None,
show_prefix=True,
sender=None,
jinja_path=None,
):
self.prefix = prefix
self.show_prefix = show_prefix
self.sender = sender
Expand Down Expand Up @@ -222,7 +239,10 @@ def __str__(self):
redact_missing_personalisation=self.redact_missing_personalisation,
)
)
.then(add_prefix, (escape_html(self.prefix) or None) if self.show_prefix else None)
.then(
add_prefix,
(escape_html(self.prefix) or None) if self.show_prefix else None,
)
.then(sms_encode if self.downgrade_non_sms_characters else str)
.then(remove_whitespace_before_punctuation)
.then(nl2br)
Expand All @@ -241,7 +261,12 @@ def __init__(
jinja_path=None,
):
self._subject = template["subject"]
super().__init__(template, values, redact_missing_personalisation=redact_missing_personalisation, jinja_path=jinja_path)
super().__init__(
template,
values,
redact_missing_personalisation=redact_missing_personalisation,
jinja_path=jinja_path,
)

def __str__(self):
return str(
Expand Down Expand Up @@ -354,6 +379,7 @@ def preheader(self):
.then(strip_unsupported_characters)
.then(add_trailing_newline)
.then(notify_email_preheader_markdown)
.then(remove_language_divs)
.then(do_nice_typography)
.split()
)[: self.PREHEADER_LENGTH_IN_CHARACTERS].strip()
Expand Down Expand Up @@ -396,7 +422,12 @@ def __init__(
logo_with_background_colour=None,
asset_domain=None,
):
super().__init__(template, values, redact_missing_personalisation=redact_missing_personalisation, jinja_path=jinja_path)
super().__init__(
template,
values,
redact_missing_personalisation=redact_missing_personalisation,
jinja_path=jinja_path,
)
self.from_name = from_name
self.from_address = from_address
self.reply_to = reply_to
Expand All @@ -415,7 +446,9 @@ def __str__(self):
self.jinja_template.render(
{
"body": get_html_email_body(
self.content, self.values, redact_missing_personalisation=self.redact_missing_personalisation
self.content,
self.values,
redact_missing_personalisation=self.redact_missing_personalisation,
),
"subject": self.subject,
"from_name": escape_html(self.from_name),
Expand All @@ -439,7 +472,10 @@ def subject(self):
return (
Take(
Field(
self._subject, self.values, html="escape", redact_missing_personalisation=self.redact_missing_personalisation
self._subject,
self.values,
html="escape",
redact_missing_personalisation=self.redact_missing_personalisation,
)
)
.then(do_nice_typography)
Expand Down Expand Up @@ -474,7 +510,11 @@ def __init__(
date=None,
):
self.contact_block = (contact_block or "").strip()
super().__init__(template, values, redact_missing_personalisation=redact_missing_personalisation)
super().__init__(
template,
values,
redact_missing_personalisation=redact_missing_personalisation,
)
self.admin_base_url = admin_base_url
self.logo_file_name = logo_file_name
self.date = date or datetime.utcnow()
Expand Down Expand Up @@ -696,8 +736,10 @@ def get_html_email_body(template_content, template_values, redact_missing_person
)
.then(unlink_govuk_escaped)
.then(strip_unsupported_characters)
.then(add_newlines_around_lang_tags)
.then(add_trailing_newline)
.then(notify_email_markdown)
.then(add_language_divs)
.then(do_nice_typography)
)

Expand Down
2 changes: 1 addition & 1 deletion notifications_utils/version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__version__ = "43.11.1"
__version__ = "44.0.0"
# GDS version '34.0.1'
41 changes: 41 additions & 0 deletions tests/test_formatters.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from flask import Markup

from notifications_utils.formatters import (
add_language_divs,
remove_language_divs,
unlink_govuk_escaped,
notify_email_markdown,
notify_letter_preview_markdown,
Expand Down Expand Up @@ -947,3 +949,42 @@ def test_strip_unsupported_characters():

def test_normalise_whitespace():
assert normalise_whitespace("\u200C Your tax is\ndue\n\n") == "Your tax is due"


@pytest.mark.parametrize("lang", ("en", "fr"))
def test_add_language_divs_fr_replaces(lang: str):
_content = (
f'<p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">[[{lang}]]</p>'
'<h2 style="Margin: 0 0 20px 0; padding: 0; font-size: 27px; line-height: 35px; font-weight: bold; color: #0B0C0C;">'
"title</h2>"
'<p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">'
"Comment vas-tu aujourd'hui?</p>"
f'<p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">[[/{lang}]]</p>'
)
content = (
f'<div lang="{lang}-ca">'
'<h2 style="Margin: 0 0 20px 0; padding: 0; font-size: 27px; line-height: 35px; font-weight: bold; color: #0B0C0C;">'
"title</h2>"
'<p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">'
"Comment vas-tu aujourd'hui?</p></div>"
)
assert add_language_divs(_content) == content


@pytest.mark.parametrize("lang", ("en", "fr"))
def test_add_language_divs_fr_does_not_replace(lang: str):
_content = f"[[{lang}]] asdf [[/{lang}]]"
assert add_language_divs(_content) == _content


@pytest.mark.parametrize(
"input,output",
(
("abc 123", "abc 123"),
("[[fr]]\n\nabc\n\n[[/fr]]", "\n\nabc\n\n"),
("[[en]]\n\nabc\n\n[[/en]]", "\n\nabc\n\n"),
("[[en]]\n\nabc\n\n[[/en]]\n\n[[fr]]\n\n123\n\n[[/fr]]", "\n\nabc\n\n\n\n\n\n123\n\n"),
),
)
def test_remove_language_divs(input: str, output: str):
assert remove_language_divs(input) == output
Loading

0 comments on commit d34694d

Please sign in to comment.