Skip to content

Commit

Permalink
Simplify creation of translated f-strings in multilingual mode
Browse files Browse the repository at this point in the history
  • Loading branch information
janezd committed Jan 1, 2025
1 parent 8790e6e commit 48160d2
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 169 deletions.
2 changes: 2 additions & 0 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ The available options are
`smart-quotes` (default: true)
: If set to `false`, strings in translated sources will have the same quotes as in the original source. Otherwise, if translation of a single-quoted includes a single quote, Trubar will output a double-quoted string and vice-versa. If translated message contains both types of quotes, they must be escaped with backslash.

This setting has not effect in multilingual setup.

`auto-prefix` (default: true)
: If set, Trubar will turn strings into f-strings if translation contains braces and adding an f- prefix makes it a syntactically valid string, *unless* the original string already included braces, in which case this may had been a pattern for `str.format`.

Expand Down
117 changes: 27 additions & 90 deletions trubar/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,102 +349,38 @@ def push_context(self, node: NamespaceNode) -> None:

@classmethod
def _f_string_languages(cls,
node: SomeString,
original: str,
messages: List[str]) -> set[int]:
"""
For the given messages, return a set of indices of languages that
requires an f-prefix. This includes the original language, 0.
requires an f-prefix, excluding the original language.
This is determined by checking that the string includes braces and,
if so, that it compiles to something that includes an f-string after
trying all quote types.
Empty set if
- the original string is not an f-stirng and auto_prefix is disabled
- the original string already has braces but no f-prefix.
"""
add_f = set()
prefix = node.prefix
if "f" not in prefix:
if not config.auto_prefix or re_braced.search(messages[0]):
return add_f
prefix += "f"
else:
add_f.add(0)

for i, translation in enumerate(messages[1:], start=1):
if not re_braced.search(translation):
continue
for quote in all_quotes:
if re_braced.search(translation):
try:
new_node = cst.parse_expression(
f'{prefix}{quote}{translation}{quote}')
assert isinstance(new_node, cst.FormattedString)
except cst.ParserSyntaxError:
continue
new_node = cst.parse_expression("f" + repr(translation))
except cst.ParserSyntaxError as exc:
languages = list(config.languages.values())
language = languages[i].international_name
raise TranslationError(
f"Probable syntax error in translation to {language}.\n"
f"Original: {original}\n"
f"Translation:\n {translation}\n"
"This error occurred while trying to compile the translation "
"string as an f-string. The original Python message:"
) from exc

assert isinstance(new_node, cst.FormattedString)
if any(isinstance(part, cst.FormattedStringExpression)
for part in new_node.parts):
add_f.add(i)
break
return add_f

@classmethod
def _get_quote(cls,
node: SomeString,
orig_str: str,
translation: str,
language_index: int,
prefix: str) -> str:
"""
Return a suitable quote for the given translation.
The method tries all quote types (starting with the original) and
returns the first one that compiles. If none compiles, raises a
TranslationError.
The method is used for f-strings.
"""
quotes = (node.quote, ) + (all_quotes if config.smart_quotes else ())
for quote in quotes:
try:
compiled = ast.parse(
f"{prefix}{quote}{translation}{quote}",
mode="eval")
except SyntaxError:
pass
else:
compiled = compiled.body
if isinstance(compiled, ast.JoinedStr) \
or isinstance(compiled, ast.Constant) \
and isinstance(compiled.value, str):
return quote

# No suitable quotes, raise an exception
hints = ""
if "f" in node.prefix:
hints += f"\n- String {orig_str} is an f-string"
else:
hints += (
"\n- Original string is not an f-string, but the translation \n"
"seems to be an f-string and auto-prefix option is set.")
if config.smart_quotes:
hints += \
"\n- I tried all quote types, even triple-quotes"
else:
hints += \
"\n- Try enabling smart quotes to allow changing the quote type"
if len(quote) != 3 and "\n" in translation:
hints += \
"\n- Check for any unescaped \\n's"

languages = list(config.languages.values())
language = languages[language_index].international_name
raise TranslationError(
f"Probable syntax error in translation to {language}.\n"
f"Original: {orig_str}\n"
f"Translation to {language}:\n {translation}\n"
"Some hints:" + hints)

def translate(
self,
node: SomeString,
Expand All @@ -468,17 +404,20 @@ def translate(
for translation in messages]

idx = len(self.message_tables[0])
need_f = self._f_string_languages(node, messages)
if "f" in node.prefix \
or config.auto_prefix and not re_braced.search(original):
need_f = self._f_string_languages(orig_str, messages)
if "f" in node.prefix:
need_f.add(0)
else:
need_f = set()
if need_f:
fprefix = node.prefix
if "f" not in fprefix:
fprefix = "f" + fprefix
for lang_idx, (message, table) in \
enumerate(zip(messages, self.message_tables)):
prefix = fprefix if lang_idx in need_f else node.prefix
quote = self._get_quote(
node, orig_str, message, lang_idx, prefix)
table.append(f"{prefix}{quote}{message}{quote}")
quoted_str = repr(message)
if lang_idx in need_f:
quoted_str = "f" + quoted_str
table.append(quoted_str)
trans = f'_tr.e(_tr.c({idx}, {orig_str}))'
else:
for message, table in zip(messages, self.message_tables):
Expand All @@ -488,7 +427,6 @@ def translate(
trans = f"_tr.m[{idx}, {orig_str}]"
return cst.parse_expression(trans)


def collect(source: str,
existing: Optional[MsgDict] = None,
pattern: str = "",
Expand Down Expand Up @@ -660,7 +598,6 @@ def report(s, level):
with open(fname, "wt", encoding=config.encoding) as f:
json.dump(messages, f)


def _any_translations(translations: MsgDict):
return any(isinstance(value, str)
or isinstance(value, dict) and _any_translations(value)
Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1 @@
["English", "English", "\"default\"", "\"some/directory\"", "f\"File {x}\"", "f'Not file {x + \".bak\"}'", "f\"\"\"{\"nonsense\"}\"\"\"", "'Import it, if you must.'", "Oranges"]
["English", "English", "'default'", "'some/directory'", "f'File {x}'", "f'Not file {x + \".bak\"}'", "f'{\"nonsense\"}'", "'Import it, if you must.'", "Oranges"]
Original file line number Diff line number Diff line change
@@ -1 +1 @@
["Foo", "Foolanguage", "\"befault\"", "f\"an {f} foo string\"", "f\"File {x}\"", "f'Ne datoteka {x + \".bak\"}'", "f\"\"\"{\"sense\"}\"\"\"", "f'''{x} +'\" {y}'''", "Flemons"]
["Foo", "Foolanguage", "'befault'", "f'an {f} foo string'", "f'File {x}'", "f'Ne datoteka {x + \".bak\"}'", "f'{\"sense\"}'", "f'{x} +\\'\" {y}'", "Flemons"]
Original file line number Diff line number Diff line change
@@ -1 +1 @@
["Sloven\u0161\u010dina", "Slovenian", "f\"\"\"An {f} st'r\"i'''ng\"\"\"", "\"some/directory\"", "f\"Datoteka {x}\"", "f'Ne datoteka {x + \".bak\"}'", "f\"\"\"{\"nesmisel\"}\"\"\"", "'Import it, if you must.'", "Pomaran\u010de"]
["Sloven\u0161\u010dina", "Slovenian", "f'An {f} st\\'r\"i\\'\\'\\'ng'", "'some/directory'", "f'Datoteka {x}'", "f'Ne datoteka {x + \".bak\"}'", "f'{\"nesmisel\"}'", "'Import it, if you must.'", "Pomaran\u010de"]
84 changes: 8 additions & 76 deletions trubar/tests/test_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,9 +376,9 @@ class C:
""")
self.assertEqual(
message_tables,
[['msg1', 'msg2', 'msg3', 'foo', 'bar', 'f"baz{42}"', 'crux'],
['msg4', 'msg5', 'msg6', 'sea food', 'bar', 'f"baz{42}"', ''],
['msg7', 'msg8', 'msg8', 'foo', 'no-bar', 'f"bar(1)"', 'crux']]
[['msg1', 'msg2', 'msg3', 'foo', 'bar', "f'baz{42}'", 'crux'],
['msg4', 'msg5', 'msg6', 'sea food', 'bar', "f'baz{42}'", ''],
['msg7', 'msg8', 'msg8', 'foo', 'no-bar', "'bar(1)'", 'crux']]
)

def test_f_string_languages(self):
Expand All @@ -388,77 +388,21 @@ def test_f_string_languages(self):
node.prefix = "f"
node.quote = "'"
# Original is an f-string, and so is one of translations
self.assertEqual(m(node, ["a {s}tring", "one", "two{x}"]), {0, 2})
self.assertEqual(m(node, ["a {s}tring", "one", "two{x}"]), {2})

# Only original needs it
self.assertEqual(m(node, ["a {s}tring", "one", "two"]), {0})
self.assertEqual(m(node, ["a {s}tring", "one", "two"]), set())

node.prefix = ""
m = StringTranslatorMultilingual._f_string_languages
# No language needs it
self.assertEqual(m(node, ["a string", "one", "two"]), set())

# Original is not an f-string, but has {},
# hence translations are supposed to have them without being f-strings
self.assertEqual(m(node, ["a string{x}", "one{y}", "two{x}"]), set())

# Original is not an f-string, but one of translations is
for quote in ['"', "'", "'''", '"""']:
self.assertEqual(m(node, ["a string", "one", f"t{quote}wo{{x}}"]),
{2})

# Original is not an f-string, and auto-prefix is off
with patch("trubar.config.config.auto_prefix", False):
self.assertEqual(
m(node, ["a string", "on'e", "tw'o{x}"]),
set())

def test_get_quote(self):
node = Mock()
m = StringTranslatorMultilingual._get_quote

node.prefix = ""
node.quote = '"'
self.assertEqual(
m(node, "'a string'", "a string", 2, ""), '"')

node.quote = "'''"
self.assertEqual(
m(node, "'a string'", "a string", 2, ""), "'''")

node.quote = "'"
self.assertEqual(
m(node, "'a string'", "a string", 2, ""), "'")

node.quote = "'"
self.assertEqual(
m(node, "'a string'", "tw'o{x}", 2, ""), '"')

node.quote = "'"
self.assertIn(
m(node, "'a str'ing'", "a str'i\"ng", 2, ""),
("'''", '"""'))

node.quote = "'"
self.assertEqual(
m(node, "'a str'''ing'", "s\"tr'''i'n\"g", 2, ""), '"""')

node.quote = "'"
self.assertRaises(
TranslationError,
m, node, "'a str'''ing'", "a \"\"\"s\"t\"r'''in'g", 2, "")

with patch("trubar.config.config.smart_quotes", False):
node.quote = "'"
self.assertRaises(
TranslationError,
m, node, "'a str'ing'", "a str'ing", 2, "")

node.quote = "'"
self.assertRaises(
TranslationError,
m, node, "'a str'''ing'", "a str'''ing", 2, "")

def test_auto_prefix(self):
# No f-strings, no problems
translation, tables = self._translate(
Expand All @@ -476,7 +420,7 @@ def test_auto_prefix(self):
translation, tables = self._translate(
"print(f'fo{o}')", [{"fo{o}": "dont"}, {}])
self.assertEqual(translation, "print(_tr.e(_tr.c(0, f'fo{o}')))")
self.assertEqual(tables, [["f'fo{o}'"], ["f'dont'"], ["f'fo{o}'"]])
self.assertEqual(tables, [["f'fo{o}'"], ["'dont'"], ["f'fo{o}'"]])

# Original is not an f-string, one of translations is, one is not
translation, tables = self._translate(
Expand All @@ -497,27 +441,15 @@ def test_smart_quotes_and_f(self):
self.assertEqual(translation, "print(_tr.e(_tr.c(0, f'foo')))")
self.assertEqual(
tables,
[["f'foo'"], ["f\"don't\""], ["f'x\"y'"]])
[["f'foo'"], ['"don\'t"'], ['\'x"y\'']])

# One language has an f-string, and translations have different quotes
self._translate(
"print('foo')", [{"foo": "d{o}n't"}, {"foo": 'x"y'}])
self.assertEqual(translation, "print(_tr.e(_tr.c(0, f'foo')))")
self.assertEqual(
tables,
[["f'foo'"], ["f\"don't\""], ["f'x\"y'"]])

with patch("trubar.config.config.smart_quotes", False):
# Mismatching quotes
self.assertRaises(
TranslationError,
self._translate, "print(f'foo')", [{"foo": "do{n}'t"}, {}])

# Original has an f-string, but quotes are OK
translation, tables = self._translate(
'print(f"foo")', [{"foo": "don't"}, {"foo": "x'y"}])
self.assertEqual(translation, 'print(_tr.e(_tr.c(0, f"foo")))')
self.assertEqual(tables, [['f"foo"'], ['f"don\'t"'], ['f"x\'y"']])
[["f'foo'"], ["\"don't\""], ["'x\"y'"]])

def test_syntax_error(self):
tree = cst.parse_module("print('foo')")
Expand Down

0 comments on commit 48160d2

Please sign in to comment.