diff --git a/README.rst b/README.rst
index 1b4039a..77a8e80 100644
--- a/README.rst
+++ b/README.rst
@@ -85,6 +85,13 @@ Details of the config.yaml file are in `edx-platform/conf/locale/config.yaml
Changes
=======
+v0.4.7
+-------
+
+* Test that tag validation catches HTML added to translations.
+
+* When tag validation finds differences, output should be deterministic.
+
v0.4.6
-------
diff --git a/i18n/__init__.py b/i18n/__init__.py
index e9dcc46..4299bb6 100644
--- a/i18n/__init__.py
+++ b/i18n/__init__.py
@@ -6,7 +6,7 @@
from . import config
-__version__ = '0.4.6'
+__version__ = '0.4.7'
class Runner:
diff --git a/i18n/validate.py b/i18n/validate.py
index f92faa6..b9f8876 100644
--- a/i18n/validate.py
+++ b/i18n/validate.py
@@ -161,8 +161,8 @@ def check_messages(filename, report_empty=False):
# Check if tags don't match
if id_tags != tx_tags:
- id_has = u", ".join(u'"{}"'.format(t) for t in id_tags - tx_tags)
- tx_has = u", ".join(u'"{}"'.format(t) for t in tx_tags - id_tags)
+ id_has = u", ".join(sorted(u'"{}"'.format(t) for t in id_tags - tx_tags))
+ tx_has = u", ".join(sorted(u'"{}"'.format(t) for t in tx_tags - id_tags))
if id_has and tx_has:
diff = u"{} vs {}".format(id_has, tx_has)
elif id_has:
diff --git a/setup.py b/setup.py
index c689e77..1876683 100755
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,7 @@
setup(
name='edx-i18n-tools',
- version='0.4.6',
+ version='0.4.7',
description='edX Internationalization Tools',
author='edX',
author_email='oscm@edx.org',
diff --git a/tests/data/validation_problems.po b/tests/data/validation_problems.po
index e42152c..ebc36b1 100644
--- a/tests/data/validation_problems.po
+++ b/tests/data/validation_problems.po
@@ -22,6 +22,10 @@ msgstr "One tag: {one} and not two"
msgid "One tag: {one}"
msgstr "Two tags: {one} and {two}"
+# HTML added to translations could create cross-site security issues
+msgid "No tags"
+msgstr "Added some HTML"
+
# TODO: This doesn't raise a validation error: the characters comes through as
# a surrogate pair, and so isn't a problem. So are astral characters a
# problem?
@@ -72,5 +76,6 @@ msgstr ""
msgid "Look — a dog!"
msgstr "Look -- a dog!"
+# could come-and-go with translations
msgid "The CIA said so"
msgstr "The secret agency said so"
diff --git a/tests/test_changed.py b/tests/test_changed.py
index 5f42fd8..ae2126f 100644
--- a/tests/test_changed.py
+++ b/tests/test_changed.py
@@ -26,6 +26,7 @@ def test_detect_changes(self):
file_name = fake_locale_dir / 'LC_MESSAGES' / 'mako.po'
copy = fake_locale_dir / 'LC_MESSAGES' / 'mako_copy.po'
+ # Note: this fails if you have not-yet-committed changes to test fixture .po files
self.assertFalse(self.changed.detect_changes())
try:
diff --git a/tests/test_validate.py b/tests/test_validate.py
index 8cc2458..d90c6bf 100644
--- a/tests/test_validate.py
+++ b/tests/test_validate.py
@@ -36,6 +36,12 @@
u'Two tags: {one} and {two}',
u'"{two}" added',
),
+ (
+ 'Different tags in source and translation',
+ 'No tags',
+ "Added some HTML",
+ '"", "" added'
+ ),
(
'Non-BMP char',
u'Astral character (pile of poo), bad for JavaScript: \U0001f4a9',