diff --git a/README.rst b/README.rst index 1b4039a..77a8e80 100644 --- a/README.rst +++ b/README.rst @@ -85,6 +85,13 @@ Details of the config.yaml file are in `edx-platform/conf/locale/config.yaml Changes ======= +v0.4.7 +------- + +* Test that tag validation catches HTML added to translations. + +* When tag validation finds differences, output should be deterministic. + v0.4.6 ------- diff --git a/i18n/__init__.py b/i18n/__init__.py index e9dcc46..4299bb6 100644 --- a/i18n/__init__.py +++ b/i18n/__init__.py @@ -6,7 +6,7 @@ from . import config -__version__ = '0.4.6' +__version__ = '0.4.7' class Runner: diff --git a/i18n/validate.py b/i18n/validate.py index f92faa6..b9f8876 100644 --- a/i18n/validate.py +++ b/i18n/validate.py @@ -161,8 +161,8 @@ def check_messages(filename, report_empty=False): # Check if tags don't match if id_tags != tx_tags: - id_has = u", ".join(u'"{}"'.format(t) for t in id_tags - tx_tags) - tx_has = u", ".join(u'"{}"'.format(t) for t in tx_tags - id_tags) + id_has = u", ".join(sorted(u'"{}"'.format(t) for t in id_tags - tx_tags)) + tx_has = u", ".join(sorted(u'"{}"'.format(t) for t in tx_tags - id_tags)) if id_has and tx_has: diff = u"{} vs {}".format(id_has, tx_has) elif id_has: diff --git a/setup.py b/setup.py index c689e77..1876683 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ setup( name='edx-i18n-tools', - version='0.4.6', + version='0.4.7', description='edX Internationalization Tools', author='edX', author_email='oscm@edx.org', diff --git a/tests/data/validation_problems.po b/tests/data/validation_problems.po index e42152c..ebc36b1 100644 --- a/tests/data/validation_problems.po +++ b/tests/data/validation_problems.po @@ -22,6 +22,10 @@ msgstr "One tag: {one} and not two" msgid "One tag: {one}" msgstr "Two tags: {one} and {two}" +# HTML added to translations could create cross-site security issues +msgid "No tags" +msgstr "Added some HTML" + # TODO: This doesn't raise a validation error: the characters comes through as # a surrogate pair, and so isn't a problem. So are astral characters a # problem? @@ -72,5 +76,6 @@ msgstr "" msgid "Look — a dog!" msgstr "Look -- a dog!" +# could come-and-go with translations msgid "The CIA said so" msgstr "The secret agency said so" diff --git a/tests/test_changed.py b/tests/test_changed.py index 5f42fd8..ae2126f 100644 --- a/tests/test_changed.py +++ b/tests/test_changed.py @@ -26,6 +26,7 @@ def test_detect_changes(self): file_name = fake_locale_dir / 'LC_MESSAGES' / 'mako.po' copy = fake_locale_dir / 'LC_MESSAGES' / 'mako_copy.po' + # Note: this fails if you have not-yet-committed changes to test fixture .po files self.assertFalse(self.changed.detect_changes()) try: diff --git a/tests/test_validate.py b/tests/test_validate.py index 8cc2458..d90c6bf 100644 --- a/tests/test_validate.py +++ b/tests/test_validate.py @@ -36,6 +36,12 @@ u'Two tags: {one} and {two}', u'"{two}" added', ), + ( + 'Different tags in source and translation', + 'No tags', + "Added some HTML", + '"", "" added' + ), ( 'Non-BMP char', u'Astral character (pile of poo), bad for JavaScript: \U0001f4a9',