diff --git a/lib/i18n/tasks/translators/deepl_translator.rb b/lib/i18n/tasks/translators/deepl_translator.rb
index 5399b9ef..550b2e0a 100644
--- a/lib/i18n/tasks/translators/deepl_translator.rb
+++ b/lib/i18n/tasks/translators/deepl_translator.rb
@@ -3,7 +3,7 @@
require 'i18n/tasks/translators/base_translator'
module I18n::Tasks::Translators
- class DeeplTranslator < BaseTranslator
+ class DeeplTranslator < BaseTranslator # rubocop:disable Metrics/ClassLength
# max allowed texts per request
BATCH_SIZE = 50
# those languages must be specified with their sub-kind e.g en-us
@@ -60,14 +60,14 @@ def options_for_plain
# @param [String] value
# @return [String] 'hello, %{name}' => 'hello, %{name}'
- def replace_interpolations(value)
+ def original_replace_interpolations(value)
value.gsub(INTERPOLATION_KEY_RE, '\0')
end
# @param [String] untranslated
# @param [String] translated
# @return [String] 'hello, %{name}' => 'hello, %{name}'
- def restore_interpolations(untranslated, translated)
+ def original_restore_interpolations(untranslated, translated)
return translated if untranslated !~ INTERPOLATION_KEY_RE
translated.gsub(%r{?i18n>}, '')
@@ -75,6 +75,87 @@ def restore_interpolations(untranslated, translated)
raise_interpolation_error(untranslated, translated, e)
end
+ # deepl does a better job with interpolations when it doesn't have to deal
+ # with
tags, so we replace all of them with meaningless asterisk chains
+ BR_REGEXP = %r{(
\s*)+}i.freeze
+ BR_SINGLE_MARKER = ' *** '
+ BR_DOUBLE_MARKER = ' ***** '
+
+ # letting deepl 'read' the interpolations gives better translations (and
+ # solves the problem of interpolations getting pushed all the way to the
+ # front of the sentence), however, deepl will also try to translate
+ # the interpolations and that gets messy.
+ # we use nonsense three-letter acronyms so deepl will 'read' them and leave
+ # them alone (the letter X also works very well, except in sentences with
+ # several consecutive interpolations because that reads X X X and deepl
+ # doesn't handle that well)
+ # deepl also needs to know if an interpolation will be a word or a number,
+ # for romance languages it matters. a little Spanish lesson to illustrate:
+ # "%{foo} betalingen" translates either to "facturas %{foo}"
+ # (openstaande betalingen -> facturas pendientes) or to "%{foo} facturas"
+ # (5 betalingen -> 5 facturas)
+ # for interpolation keys that are usually numeric, we pick a number
+ # instead of the three-letter acronym (more consistency in how we name
+ # interpolation keys would help)
+ LETTER_SUBS = %w[RYX QFN VLB XOG DWP ZMQ JZQ WVS LRX HPM].freeze
+ NUM_SUBS = %w[17 19 23 29 31 37 41 43 47 53].freeze
+
+ def sub_for_handle(handle, index)
+ case handle.gsub(/[^a-z]/, '')
+ when 'count', 'minutes', 'hours'
+ NUM_SUBS[index % NUM_SUBS.size]
+ else
+ LETTER_SUBS[index % LETTER_SUBS.size]
+ end
+ end
+
+ # BEX version of replace_interpolation
+ def replace_interpolations(value)
+ index = 0
+ value.gsub(INTERPOLATION_KEY_RE) do |handle|
+ sub = sub_for_handle(handle, index)
+ index += 1
+ "#{sub}"
+ end.gsub(BR_REGEXP) do |br|
+ if br.downcase.count('b') == 2
+ # never more than two
in a row, it gets messy
+ BR_DOUBLE_MARKER
+ else
+ BR_SINGLE_MARKER
+ end
+ end
+ end
+
+ # reversing our substitutions should be straight-forward, but it's not
+ # because deepl gets creative. cases are explained inline.
+ def restore_interpolations(untranslated, translated)
+ translated.gsub(%r{(.?)([^<]*)}) do
+ char = ::Regexp.last_match(1)
+ handle = ::Regexp.last_match(2)
+ sub = ::Regexp.last_match(3)
+ body = ::Regexp.last_match(4)
+ if body == sub
+ # deepl kept the 'sub' text inside the tag and nothing else, clean.
+ "#{char}#{handle}"
+ elsif body.index(sub)
+ # deepl took some letters from outside the tag and placed them
+ # inside the e.g. task "RYX"
+ before, after = body.split(sub, 2)
+ "#{before}#{handle}#{after}"
+ elsif "#{char}#{body}".downcase == sub.downcase
+ # deepl took the first letter from inside the tag and placed it
+ # immediately before the tag e.g. Ryx
+ handle
+ else
+ # instead of trying to look normal the fallback prints something
+ # obviously wrong hoping to get some attention and a manual fix
+ "!!!!!#{sub.inspect} (#{char.inspect} #{body.inspect})!!!!!"
+ end
+ end.gsub(BR_DOUBLE_MARKER, '
').gsub(BR_SINGLE_MARKER, '
')
+ rescue StandardError => e
+ raise_interpolation_error(untranslated, translated, e)
+ end
+
def no_results_error_message
I18n.t('i18n_tasks.deepl_translate.errors.no_results')
end
diff --git a/spec/deepl_translate_spec.rb b/spec/deepl_translate_spec.rb
index 79494a68..aeb1f5b1 100644
--- a/spec/deepl_translate_spec.rb
+++ b/spec/deepl_translate_spec.rb
@@ -9,10 +9,10 @@
text_test = [
'key',
- "Hello, %{user} O'Neill! How are you? {{ Check out this Liquid tag, it should not be translated }} \
- {% That applies to this Liquid tag as well %}",
- "¡Hola, %{user} O'Neill! ¿Qué tal estás? {{ Check out this Liquid tag, it should not be translated }} \
- {% That applies to this Liquid tag as well %}"
+ "Hello, %{user} O'Neill! How are you? {{ Check out this Liquid tag, it should not be translated }} " \
+ '{% That applies to this Liquid tag as well %}',
+ "¡Hola, %{user} O'Neill! ¿Qué tal? {{ Check out this Liquid tag, it should not be translated }} " \
+ '{% That applies to this Liquid tag as well %}'
]
html_test_plrl = [
@@ -92,4 +92,93 @@
end
end
end
+
+ # Don't expect deepl's answers to be exactly the same each run
+ describe 'translating Dutch into other languages' do
+ let(:base_task) { I18n::Tasks::BaseTask.new }
+
+ before do
+ skip 'temporarily disabled on JRuby due to https://github.com/jruby/jruby/issues/4802' if RUBY_ENGINE == 'jruby'
+ skip 'DEEPL_AUTH_KEY env var not set' unless ENV['DEEPL_AUTH_KEY']
+ end
+
+ it 'tells time' do
+ german, english, spanish =
+ translate_dutch(hours_and_minutes: '%{hours} uur en %{minutes} minuten')
+ expect(german).to eq '%{hours} Stunden und %{minutes} Minuten'
+ expect(english).to eq '%{hours} hours and %{minutes} minutes'
+ expect(spanish).to eq '%{hours} horas y %{minutes} minutos'
+ end
+
+ it 'counts' do
+ german, english, spanish =
+ translate_dutch(other: '%{count} taken')
+ expect(german).to eq '%{count} Aufgaben'
+ expect(english).to eq '%{count} tasks'
+ expect(spanish).to eq '%{count} tareas'
+ end
+
+ it 'assigns' do
+ german, english, spanish =
+ translate_dutch(assigned: 'Taak "%{todo}" toegewezen aan %{user}')
+ expect(german).to eq 'To-dos "%{todo}" zugewiesen an %{user}'
+ expect(english).to eq 'Task "%{todo}" assigned to %{user}'
+ expect(spanish).to eq 'Tarea "%{todo}" asignada a %{user}'
+ end
+
+ it 'sings' do
+ german, english, spanish =
+ translate_dutch(verse: 'Ik zou zo graag een %{animal} kopen. Ik zag %{count} beren %{food} smeren')
+ expect(german).to eq 'Ich würde so gerne einen %{animal} kaufen. Ich sah %{count} Bären, die %{food} schmierten'
+ # greasing is a funny way to say smeren, but we let it slide
+ expect(english).to eq 'I would so love to buy a %{animal}. I saw %{count} bears greasing %{food}'
+ expect(spanish).to eq 'Me encantaría comprar un %{animal}. Vi %{count} osos engrasando %{food}'
+ end
+
+ it 'sends emails' do
+ german, english, spanish =
+ translate_dutch(
+ email_body_html: '{{ booking.greeting }},
Bijgevoegd ziet u een factuur van {{ park.name }} met ' \
+ 'factuurnummer {{ invoice.invoice_nr }}.
Volgens onze administratie had het ' \
+ 'verschuldigde bedrag van {{ locals.payment_collector_total }} op ' \
+ '{{ locals.payment_collector_deadline }} moeten zijn betaald. Helaas hebben we nog ' \
+ 'geen betaling ontvangen.
'
+ )
+ expect(german).to eq '{{ booking.greeting }},
Anbei finden Sie eine Rechnung von {{ park.name }} ' \
+ 'mit der Rechnungsnummer {{ invoice.invoice_nr }}.
Laut unserer Verwaltung hätte der ' \
+ 'von {{ locals.payment_collector_total }} geschuldete Betrag ' \
+ 'am {{ locals.payment_collector_deadline }} bezahlt werden müssen. Leider haben wir die ' \
+ 'Zahlungen noch nicht erhalten.
'
+ expect(english).to eq '{{ booking.greeting }},
Attached please find an invoice from {{ park.name }} ' \
+ 'with invoice number {{ invoice.invoice_nr }}.
According to our records, the amount ' \
+ 'due from {{ locals.payment_collector_total }} on ' \
+ '{{ locals.payment_collector_deadline }} should have been paid. Unfortunately, we have ' \
+ 'not yet received payment.
'
+ expect(spanish).to eq '{{ booking.greeting }},
Adjuntamos una factura de {{ park.name }} con el ' \
+ 'número de factura {{ invoice.invoice_nr }}.
Según nuestros registros, el importe ' \
+ 'adeudado por {{ locals.payment_collector_total }} debería haber sido abonado en ' \
+ '{{ locals.payment_collector_deadline }}. Lamentablemente, aún no hemos recibido ' \
+ 'el pago.
'
+ end
+
+ it 'asks itself why are you even translating this' do
+ german, english, spanish =
+ translate_dutch(action: '%{subject} %{verb} %{object}')
+ expect(german).to eq '%{subject} %{verb} %{object}'
+ expect(english).to eq '%{subject} %{verb} %{object}'
+ expect(spanish).to eq '%{subject} %{verb} %{object}'
+ end
+
+ def translate_dutch(dutch_pair)
+ key = dutch_pair.keys.first
+ phrase = dutch_pair[key]
+ locales = %w[de en-us es]
+ branches = locales.each_with_object({}) do |locale, hash|
+ hash[locale] = { 'testing' => { key => phrase } }
+ end
+ tree = build_tree(branches)
+ translations = base_task.translate_forest(tree, from: 'nl', backend: :deepl)
+ locales.map { |locale| translations[locale]['testing'][key].value.strip }
+ end
+ end
end