Skip to content

Commit

Permalink
Fix/repeated lang tags (#129)
Browse files Browse the repository at this point in the history
* Make the regex for language tag replacement more flexible

Allow the code to work whether or not mistune has added its paragraph tags or not;
Allow nested language tags to work (limitiation: you can't nest the same lang code inside itself i.e. FR within FR - but this doesn't make sense anyway)

* Remove obsolete newline code

* Update lang tag tests; some cases are now permitted that weren't before

* Remove check for "bad" cases that now work.

* Removed code that adds newlines around lang tags since they are no longer needed.

* Fix incorrect comment

* Bump version

* Bump version again (another PR took this version #!)
  • Loading branch information
andrewleith authored Sep 12, 2022
1 parent c3b96ec commit 15dc291
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 47 deletions.
10 changes: 5 additions & 5 deletions notifications_utils/formatters.py
Original file line number Diff line number Diff line change
Expand Up @@ -610,16 +610,16 @@ def add_language_divs(_content: str) -> str:
because the mistune parser has already run and put our [[lang]] tags inside
paragraphs.
"""
select_anything = r"([\s\S]*)"
select_anything = r"([\s\S]*?)"
fr_regex = re.compile(
f"{EMAIL_P_OPEN_TAG}{FR_OPEN}{EMAIL_P_CLOSE_TAG}{select_anything}{EMAIL_P_OPEN_TAG}{FR_CLOSE}{EMAIL_P_CLOSE_TAG}"
f"({EMAIL_P_OPEN_TAG})?{FR_OPEN}({EMAIL_P_CLOSE_TAG})?{select_anything}({EMAIL_P_OPEN_TAG})?{FR_CLOSE}({EMAIL_P_CLOSE_TAG})?" # noqa
) # matches <p ...>[[fr]]</p>anything<p ...>[[/fr]]</p>
content = fr_regex.sub(r'<div lang="fr-ca">\1</div>', _content) # \1 returns the "anything" content above
content = fr_regex.sub(r'<div lang="fr-ca">\3</div>', _content) # \3 returns the "anything" content above (3rd group)

en_regex = re.compile(
f"{EMAIL_P_OPEN_TAG}{EN_OPEN}{EMAIL_P_CLOSE_TAG}{select_anything}{EMAIL_P_OPEN_TAG}{EN_CLOSE}{EMAIL_P_CLOSE_TAG}"
f"({EMAIL_P_OPEN_TAG})?{EN_OPEN}({EMAIL_P_CLOSE_TAG})?{select_anything}({EMAIL_P_OPEN_TAG})?{EN_CLOSE}({EMAIL_P_CLOSE_TAG})?" # noqa
) # matches <p ...>[[en]]</p>anything<p ...>[[/en]]</p>
content = en_regex.sub(r'<div lang="en-ca">\1</div>', content) # \1 returns the "anything" content above
content = en_regex.sub(r'<div lang="en-ca">\3</div>', content) # \3 returns the "anything" content above (3rd group)
return content


Expand Down
2 changes: 0 additions & 2 deletions notifications_utils/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
add_ircc_gc_seal,
add_language_divs,
add_prefix,
add_newlines_around_lang_tags,
autolink_sms,
notify_email_markdown,
notify_email_preheader_markdown,
Expand Down Expand Up @@ -753,7 +752,6 @@ def get_html_email_body(template_content, template_values, redact_missing_person
)
.then(unlink_govuk_escaped)
.then(strip_unsupported_characters)
.then(add_newlines_around_lang_tags)
.then(add_trailing_newline)
.then(notify_email_markdown)
.then(add_language_divs)
Expand Down
2 changes: 1 addition & 1 deletion notifications_utils/version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__version__ = "48.1.1"
__version__ = "48.2.0"
# GDS version '34.0.1'
117 changes: 81 additions & 36 deletions tests/test_formatters.py
Original file line number Diff line number Diff line change
Expand Up @@ -956,43 +956,88 @@ def test_normalise_whitespace():
assert normalise_whitespace("\u200C Your tax is\ndue\n\n") == "Your tax is due"


@pytest.mark.parametrize("lang", ("en", "fr"))
def test_add_language_divs_fr_replaces(lang: str):
_content = (
f'<p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">[[{lang}]]</p>'
'<h2 style="Margin: 0 0 20px 0; padding: 0; font-size: 27px; line-height: 35px; font-weight: bold; color: #0B0C0C;">'
"title</h2>"
'<p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">'
"Comment vas-tu aujourd'hui?</p>"
f'<p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">[[/{lang}]]</p>'
)
content = (
f'<div lang="{lang}-ca">'
'<h2 style="Margin: 0 0 20px 0; padding: 0; font-size: 27px; line-height: 35px; font-weight: bold; color: #0B0C0C;">'
"title</h2>"
'<p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">'
"Comment vas-tu aujourd'hui?</p></div>"
)
assert add_language_divs(_content) == content


@pytest.mark.parametrize("lang", ("en", "fr"))
def test_add_language_divs_fr_does_not_replace(lang: str):
_content = f"[[{lang}]] asdf [[/{lang}]]"
assert add_language_divs(_content) == _content

class TestAddLanguageDivs:
@pytest.mark.parametrize("lang", ("en", "fr"))
def test_add_language_divs_fr_replaces(self, lang: str):
_content = (
f'<p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">[[{lang}]]</p>'
'<h2 style="Margin: 0 0 20px 0; padding: 0; font-size: 27px; line-height: 35px; font-weight: bold; color: #0B0C0C;">'
"title</h2>"
'<p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">'
"Comment vas-tu aujourd'hui?</p>"
f'<p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">[[/{lang}]]</p>'
)
content = (
f'<div lang="{lang}-ca">'
'<h2 style="Margin: 0 0 20px 0; padding: 0; font-size: 27px; line-height: 35px; font-weight: bold; color: #0B0C0C;">'
"title</h2>"
'<p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">'
"Comment vas-tu aujourd'hui?</p></div>"
)
assert add_language_divs(_content) == content

@pytest.mark.parametrize(
"input,output",
(
("abc 123", "abc 123"),
("[[fr]]\n\nabc\n\n[[/fr]]", "\n\nabc\n\n"),
("[[en]]\n\nabc\n\n[[/en]]", "\n\nabc\n\n"),
("[[en]]\n\nabc\n\n[[/en]]\n\n[[fr]]\n\n123\n\n[[/fr]]", "\n\nabc\n\n\n\n\n\n123\n\n"),
),
)
def test_remove_language_divs(input: str, output: str):
assert remove_language_divs(input) == output
@pytest.mark.parametrize(
"input,output",
(
("abc 123", "abc 123"),
("[[fr]]\n\nabc\n\n[[/fr]]", "\n\nabc\n\n"),
("[[en]]\n\nabc\n\n[[/en]]", "\n\nabc\n\n"),
("[[en]]\n\nabc\n\n[[/en]]\n\n[[fr]]\n\n123\n\n[[/fr]]", "\n\nabc\n\n\n\n\n\n123\n\n"),
),
)
def test_remove_language_divs(self, input: str, output: str):
assert remove_language_divs(input) == output

def test_multiple_language_div_after_mistune(self):
testString = '<p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">[[fr]]</p><p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">Le français suis l\'anglais</p><p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">[[/fr]]</p><p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">[[en]]</p><p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">hi</p><p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">[[/en]]</p><p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">[[fr]]<br />Bonjour<br />[[/fr]]</p>' # noqa
testResult = '<div lang="fr-ca"><p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">Le français suis l\'anglais</p></div><div lang="en-ca"><p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">hi</p></div><div lang="fr-ca"><br />Bonjour<br /></div>' # noqa

assert add_language_divs(testString) == testResult

def test_multiple_language_div_without_mistune(self):
testString = f"""
[[fr]]
Le français suis l'anglais
[[/fr]]
[[en]]
hi
[[/en]]
[[fr]]
bonjour
[[/fr]]
""" # noqa

testResult = '\n <div lang="fr-ca">\n Le français suis l\'anglais\n </div>\n\n <div lang="en-ca">\n hi\n </div>\n\n <div lang="fr-ca">\n bonjour\n </div>\n ' # noqa
assert add_language_divs(testString) == testResult

def test_nested_language_divs_after_mistune(self):
testString = '<p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">[[fr]]</p><p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">Le français suis l\'anglais</p><p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">[[en]]</p><p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">hi</p><p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">[[/en]]</p><p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">[[/fr]]</p><p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">[[en]]<br />hi<br />[[fr]]<br />Bonjour<br />[[/fr]]<br />[[/en]]</p>' # noqa
testResult = '<div lang="fr-ca"><p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">Le français suis l\'anglais</p><div lang="en-ca"><p style="Margin: 0 0 20px 0; font-size: 19px; line-height: 25px; color: #0B0C0C;">hi</p></div></div><div lang="en-ca"><br />hi<br /><div lang="fr-ca"><br />Bonjour<br /></div><br /></div>' # noqa

assert add_language_divs(testString) == testResult

def test_nested_language_divs_without_mistune(self):
testString = f"""
[[fr]]
Le français suis l'anglais
[[en]]
English!
[[/en]]
[[/fr]]
[[en]]
English
[[fr]]
French!
[[/fr]]
[[/en]]
""" # noqa
testResult = '\n <div lang="fr-ca">\n Le français suis l\'anglais\n \n <div lang="en-ca">\n English!\n </div>\n \n </div>\n\n <div lang="en-ca">\n English\n <div lang="fr-ca">\n French!\n </div>\n </div>\n ' # noqa
assert add_language_divs(testString) == testResult


@pytest.mark.parametrize(
Expand Down
3 changes: 0 additions & 3 deletions tests/test_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,10 @@ def test_lang_tags_in_templates():
"bad_content",
[
"[[en]\nEN text\n[[/en]]", # missing bracket
"[[en]]EN text\n[[/en]]", # missing \n
"[[en]]\nEN text[[/en]]", # missing \n
"[[EN]]\nEN text\n[[/EN]]", # tags not lowercase
"[[en]]\nEN text\n", # tag missing
"EN text\n[[/en]]", # tag missing
"((en))\nEN text\n((/en))", # wrong brackets
"[[en]]EN text[[/en]]", # tags not on their own line
],
)
def test_lang_tags_in_templates_bad_content(bad_content: str):
Expand Down

0 comments on commit 15dc291

Please sign in to comment.