diff --git a/.gitignore b/.gitignore index 3d28606b..5e6d298c 100644 --- a/.gitignore +++ b/.gitignore @@ -43,6 +43,7 @@ nosetests.xml coverage.xml *,cover .pytest_cache/ +log/ # Translations *.mo diff --git a/notifications_utils/formatters.py b/notifications_utils/formatters.py index 53731e01..892c18be 100644 --- a/notifications_utils/formatters.py +++ b/notifications_utils/formatters.py @@ -1,5 +1,6 @@ import string import re +from typing import List import urllib import mistune @@ -22,6 +23,13 @@ "\uFEFF" # zero width non-breaking space ) +EMAIL_P_OPEN_TAG = '

' +EMAIL_P_CLOSE_TAG = "

" + +FR_OPEN = r"\[\[fr\]\]" # matches [[fr]] +FR_CLOSE = r"\[\[/fr\]\]" # matches [[/fr]] +EN_OPEN = r"\[\[en\]\]" # matches [[en]] +EN_CLOSE = r"\[\[/en\]\]" # matches [[/en]] mistune._block_quote_leading_pattern = re.compile(r"^ *\^ ?", flags=re.M) mistune.BlockGrammar.block_quote = re.compile(r"^( *\^[^\n]+(\n[^\n]+)*\n*)+") @@ -48,7 +56,8 @@ govuk_not_a_link = re.compile(r"(?".format(tag) for tag in {"cr", "h1", "h2", "p", "normal", "op", "np", "bul", "tab"})), re.IGNORECASE + str("|".join("<{}>".format(tag) for tag in {"cr", "h1", "h2", "p", "normal", "op", "np", "bul", "tab"})), + re.IGNORECASE, ) smartypants.tags_to_skip = smartypants.tags_to_skip + ["a"] @@ -129,7 +138,13 @@ def url_encode_full_stops(value): def unescaped_formatted_list( - items, conjunction="and", before_each="‘", after_each="’", separator=", ", prefix="", prefix_plural="" + items, + conjunction="and", + before_each="‘", + after_each="’", + separator=", ", + prefix="", + prefix_plural="", ): if prefix: prefix += " " @@ -146,10 +161,24 @@ def unescaped_formatted_list( return ("{prefix_plural}{first_items} {conjunction} {last_item}").format(**locals()) -def formatted_list(items, conjunction="and", before_each="‘", after_each="’", separator=", ", prefix="", prefix_plural=""): +def formatted_list( + items, + conjunction="and", + before_each="‘", + after_each="’", + separator=", ", + prefix="", + prefix_plural="", +): return Markup( unescaped_formatted_list( - [escape_html(x) for x in items], conjunction, before_each, after_each, separator, prefix, prefix_plural + [escape_html(x) for x in items], + conjunction, + before_each, + after_each, + separator, + prefix, + prefix_plural, ) ) @@ -200,6 +229,55 @@ def add_trailing_newline(value): return "{}\n".format(value) +def is_valid_index(index: int, lines: List[str]): + return index >= 0 and index < len(lines) + + +def insert_newline_after(lines: List[str], tag_index: int): + # no need to insert newlines at the end of the file + if tag_index == len(lines) - 1: + return + if not is_valid_index(tag_index + 1, lines): + return + if lines[tag_index + 1] == "": + return + + lines.insert(tag_index + 1, "") # insert 1 newline + + +def insert_newline_before(lines: List[str], tag_index: int): + # no need to insert newlines at the beginning of the file + if tag_index == 0: + return + if not is_valid_index(tag_index - 1, lines): + return + if lines[tag_index - 1] == "": + return + + lines.insert(tag_index, "") # insert 1 newline + + +def add_newlines_around_lang_tags(content: str) -> str: + lines = content.splitlines() + all_tags = ["[[fr]]", "[[/fr]]", "[[en]]", "[[/en]]"] + for tag in all_tags: + # strip whitespace + for index, line in enumerate(lines): + if tag in line and line.strip() == tag: + lines[index] = line.strip() + + if tag not in lines: + continue + + tag_index = lines.index(tag) + + insert_newline_before(lines, tag_index) + new_tag_index = lines.index(tag) + insert_newline_after(lines, new_tag_index) + new_content = "\n".join(lines) + return new_content + + def tweak_dvla_list_markup(value): return value.replace("", "").replace("

", "

") @@ -363,7 +441,7 @@ def list_item(self, text): def paragraph(self, text): if text.strip(): - return ('

{}

').format(text) + return f"{EMAIL_P_OPEN_TAG}{text}{EMAIL_P_CLOSE_TAG}" return "" def block_quote(self, text): @@ -388,7 +466,9 @@ def autolink(self, link, is_email=False): if is_email: return link return '{}'.format( - LINK_STYLE, urllib.parse.quote(urllib.parse.unquote(link), safe=":/?#=&;"), link + LINK_STYLE, + urllib.parse.quote(urllib.parse.unquote(link), safe=":/?#=&;"), + link, ) def double_emphasis(self, text): @@ -515,3 +595,32 @@ def link(self, link, title, content): hard_wrap=True, use_xhtml=False, ) + + +def add_language_divs(_content: str) -> str: + """ + Custom parser to add the language divs. We need to search for and remove the EMAIL_P_OPEN_TAG + and EMAIL_P_CLOSE_TAG because the mistune parser has already run and put our [[lang]] tags inside + paragraphs. + """ + select_anything = r"([\s\S]*)" + fr_regex = re.compile( + f"{EMAIL_P_OPEN_TAG}{FR_OPEN}{EMAIL_P_CLOSE_TAG}{select_anything}{EMAIL_P_OPEN_TAG}{FR_CLOSE}{EMAIL_P_CLOSE_TAG}" + ) # matches

[[fr]]

anything

[[/fr]]

+ content = fr_regex.sub(r'
\1
', _content) # \1 returns the "anything" content above + + en_regex = re.compile( + f"{EMAIL_P_OPEN_TAG}{EN_OPEN}{EMAIL_P_CLOSE_TAG}{select_anything}{EMAIL_P_OPEN_TAG}{EN_CLOSE}{EMAIL_P_CLOSE_TAG}" + ) # matches

[[en]]

anything

[[/en]]

+ content = en_regex.sub(r'
\1
', content) # \1 returns the "anything" content above + return content + + +def remove_language_divs(_content: str) -> str: + """Remove the tags from content. This fn is for use in the email + preheader, since this is plain text not html""" + content = re.compile(FR_OPEN).sub("", _content) + content = re.compile(FR_CLOSE).sub("", content) + content = re.compile(EN_OPEN).sub("", content) + content = re.compile(EN_CLOSE).sub("", content) + return content diff --git a/notifications_utils/template.py b/notifications_utils/template.py index 2311a964..6efbfc0d 100644 --- a/notifications_utils/template.py +++ b/notifications_utils/template.py @@ -14,7 +14,9 @@ unlink_govuk_escaped, nl2br, nl2li, + add_language_divs, add_prefix, + add_newlines_around_lang_tags, autolink_sms, notify_email_markdown, notify_email_preheader_markdown, @@ -26,6 +28,7 @@ strip_dvla_markup, strip_pipes, remove_whitespace_before_punctuation, + remove_language_divs, make_quotes_smart, replace_hyphens_with_en_dashes, replace_hyphens_with_non_breaking_hyphens, @@ -57,7 +60,13 @@ class Template: encoding = "utf-8" - def __init__(self, template, values=None, redact_missing_personalisation=False, jinja_path=None): + def __init__( + self, + template, + values=None, + redact_missing_personalisation=False, + jinja_path=None, + ): if not isinstance(template, dict): raise TypeError("Template must be a dict") if values is not None and not isinstance(values, dict): @@ -140,7 +149,15 @@ def is_message_too_long(self): class SMSMessageTemplate(Template): - def __init__(self, template, values=None, prefix=None, show_prefix=True, sender=None, jinja_path=None): + def __init__( + self, + template, + values=None, + prefix=None, + show_prefix=True, + sender=None, + jinja_path=None, + ): self.prefix = prefix self.show_prefix = show_prefix self.sender = sender @@ -222,7 +239,10 @@ def __str__(self): redact_missing_personalisation=self.redact_missing_personalisation, ) ) - .then(add_prefix, (escape_html(self.prefix) or None) if self.show_prefix else None) + .then( + add_prefix, + (escape_html(self.prefix) or None) if self.show_prefix else None, + ) .then(sms_encode if self.downgrade_non_sms_characters else str) .then(remove_whitespace_before_punctuation) .then(nl2br) @@ -241,7 +261,12 @@ def __init__( jinja_path=None, ): self._subject = template["subject"] - super().__init__(template, values, redact_missing_personalisation=redact_missing_personalisation, jinja_path=jinja_path) + super().__init__( + template, + values, + redact_missing_personalisation=redact_missing_personalisation, + jinja_path=jinja_path, + ) def __str__(self): return str( @@ -354,6 +379,7 @@ def preheader(self): .then(strip_unsupported_characters) .then(add_trailing_newline) .then(notify_email_preheader_markdown) + .then(remove_language_divs) .then(do_nice_typography) .split() )[: self.PREHEADER_LENGTH_IN_CHARACTERS].strip() @@ -396,7 +422,12 @@ def __init__( logo_with_background_colour=None, asset_domain=None, ): - super().__init__(template, values, redact_missing_personalisation=redact_missing_personalisation, jinja_path=jinja_path) + super().__init__( + template, + values, + redact_missing_personalisation=redact_missing_personalisation, + jinja_path=jinja_path, + ) self.from_name = from_name self.from_address = from_address self.reply_to = reply_to @@ -415,7 +446,9 @@ def __str__(self): self.jinja_template.render( { "body": get_html_email_body( - self.content, self.values, redact_missing_personalisation=self.redact_missing_personalisation + self.content, + self.values, + redact_missing_personalisation=self.redact_missing_personalisation, ), "subject": self.subject, "from_name": escape_html(self.from_name), @@ -439,7 +472,10 @@ def subject(self): return ( Take( Field( - self._subject, self.values, html="escape", redact_missing_personalisation=self.redact_missing_personalisation + self._subject, + self.values, + html="escape", + redact_missing_personalisation=self.redact_missing_personalisation, ) ) .then(do_nice_typography) @@ -474,7 +510,11 @@ def __init__( date=None, ): self.contact_block = (contact_block or "").strip() - super().__init__(template, values, redact_missing_personalisation=redact_missing_personalisation) + super().__init__( + template, + values, + redact_missing_personalisation=redact_missing_personalisation, + ) self.admin_base_url = admin_base_url self.logo_file_name = logo_file_name self.date = date or datetime.utcnow() @@ -696,8 +736,10 @@ def get_html_email_body(template_content, template_values, redact_missing_person ) .then(unlink_govuk_escaped) .then(strip_unsupported_characters) + .then(add_newlines_around_lang_tags) .then(add_trailing_newline) .then(notify_email_markdown) + .then(add_language_divs) .then(do_nice_typography) ) diff --git a/notifications_utils/version.py b/notifications_utils/version.py index 3047e9db..250ada76 100644 --- a/notifications_utils/version.py +++ b/notifications_utils/version.py @@ -1,2 +1,2 @@ -__version__ = "43.11.1" +__version__ = "44.0.0" # GDS version '34.0.1' diff --git a/tests/test_formatters.py b/tests/test_formatters.py index 7f0124cd..6747816b 100644 --- a/tests/test_formatters.py +++ b/tests/test_formatters.py @@ -2,6 +2,8 @@ from flask import Markup from notifications_utils.formatters import ( + add_language_divs, + remove_language_divs, unlink_govuk_escaped, notify_email_markdown, notify_letter_preview_markdown, @@ -947,3 +949,42 @@ def test_strip_unsupported_characters(): def test_normalise_whitespace(): assert normalise_whitespace("\u200C Your tax is\ndue\n\n") == "Your tax is due" + + +@pytest.mark.parametrize("lang", ("en", "fr")) +def test_add_language_divs_fr_replaces(lang: str): + _content = ( + f'

[[{lang}]]

' + '

' + "title

" + '

' + "Comment vas-tu aujourd'hui?

" + f'

[[/{lang}]]

' + ) + content = ( + f'
' + '

' + "title

" + '

' + "Comment vas-tu aujourd'hui?

" + ) + assert add_language_divs(_content) == content + + +@pytest.mark.parametrize("lang", ("en", "fr")) +def test_add_language_divs_fr_does_not_replace(lang: str): + _content = f"[[{lang}]] asdf [[/{lang}]]" + assert add_language_divs(_content) == _content + + +@pytest.mark.parametrize( + "input,output", + ( + ("abc 123", "abc 123"), + ("[[fr]]\n\nabc\n\n[[/fr]]", "\n\nabc\n\n"), + ("[[en]]\n\nabc\n\n[[/en]]", "\n\nabc\n\n"), + ("[[en]]\n\nabc\n\n[[/en]]\n\n[[fr]]\n\n123\n\n[[/fr]]", "\n\nabc\n\n\n\n\n\n123\n\n"), + ), +) +def test_remove_language_divs(input: str, output: str): + assert remove_language_divs(input) == output diff --git a/tests/test_template.py b/tests/test_template.py new file mode 100644 index 00000000..565b7c1e --- /dev/null +++ b/tests/test_template.py @@ -0,0 +1,43 @@ +import pytest +from notifications_utils.template import get_html_email_body + + +def test_lang_tags_in_templates(): + content = "[[en]]\n# EN title\nEN body\n[[/en]]\n[[fr]]\n# FR title\n FR content\n[[/fr]]" + html = get_html_email_body(content, {}) + assert '
' in html + assert '
' in html + assert "h2" in html + + +@pytest.mark.parametrize( + "bad_content", + [ + "[[en]\nEN text\n[[/en]]", # missing bracket + "[[en]]EN text\n[[/en]]", # missing \n + "[[en]]\nEN text[[/en]]", # missing \n + "[[EN]]\nEN text\n[[/EN]]", # tags not lowercase + "[[en]]\nEN text\n", # tag missing + "EN text\n[[/en]]", # tag missing + "((en))\nEN text\n((/en))", # wrong brackets + "[[en]]EN text[[/en]]", # tags not on their own line + ], +) +def test_lang_tags_in_templates_bad_content(bad_content: str): + html = get_html_email_body(bad_content, {}) + assert '
' not in html + + +@pytest.mark.parametrize( + "good_content", + [ + "[[fr]]\nFR text\n[[/fr]]", + "[[fr]]\n\nFR text\n\n[[/fr]]", # extra newline + "[[fr]]\n\n\nFR text\n\n\n[[/fr]]", # two extra newlines + "[[fr]] \nFR text\n[[/fr]] ", # extra spaces + " [[fr]] \nFR text\n [[/fr]] \t ", # more extra spaces and tabs + ], +) +def test_lang_tags_in_templates_good_content(good_content: str): + html = get_html_email_body(good_content, {}) + assert '
' in html