diff --git a/lib/i18n/tasks/translators/deepl_translator.rb b/lib/i18n/tasks/translators/deepl_translator.rb index 5399b9ef..550b2e0a 100644 --- a/lib/i18n/tasks/translators/deepl_translator.rb +++ b/lib/i18n/tasks/translators/deepl_translator.rb @@ -3,7 +3,7 @@ require 'i18n/tasks/translators/base_translator' module I18n::Tasks::Translators - class DeeplTranslator < BaseTranslator + class DeeplTranslator < BaseTranslator # rubocop:disable Metrics/ClassLength # max allowed texts per request BATCH_SIZE = 50 # those languages must be specified with their sub-kind e.g en-us @@ -60,14 +60,14 @@ def options_for_plain # @param [String] value # @return [String] 'hello, %{name}' => 'hello, %{name}' - def replace_interpolations(value) + def original_replace_interpolations(value) value.gsub(INTERPOLATION_KEY_RE, '\0') end # @param [String] untranslated # @param [String] translated # @return [String] 'hello, %{name}' => 'hello, %{name}' - def restore_interpolations(untranslated, translated) + def original_restore_interpolations(untranslated, translated) return translated if untranslated !~ INTERPOLATION_KEY_RE translated.gsub(%r{}, '') @@ -75,6 +75,87 @@ def restore_interpolations(untranslated, translated) raise_interpolation_error(untranslated, translated, e) end + # deepl does a better job with interpolations when it doesn't have to deal + # with
tags, so we replace all of them with meaningless asterisk chains + BR_REGEXP = %r{(\s*)+}i.freeze + BR_SINGLE_MARKER = ' *** ' + BR_DOUBLE_MARKER = ' ***** ' + + # letting deepl 'read' the interpolations gives better translations (and + # solves the problem of interpolations getting pushed all the way to the + # front of the sentence), however, deepl will also try to translate + # the interpolations and that gets messy. + # we use nonsense three-letter acronyms so deepl will 'read' them and leave + # them alone (the letter X also works very well, except in sentences with + # several consecutive interpolations because that reads X X X and deepl + # doesn't handle that well) + # deepl also needs to know if an interpolation will be a word or a number, + # for romance languages it matters. a little Spanish lesson to illustrate: + # "%{foo} betalingen" translates either to "facturas %{foo}" + # (openstaande betalingen -> facturas pendientes) or to "%{foo} facturas" + # (5 betalingen -> 5 facturas) + # for interpolation keys that are usually numeric, we pick a number + # instead of the three-letter acronym (more consistency in how we name + # interpolation keys would help) + LETTER_SUBS = %w[RYX QFN VLB XOG DWP ZMQ JZQ WVS LRX HPM].freeze + NUM_SUBS = %w[17 19 23 29 31 37 41 43 47 53].freeze + + def sub_for_handle(handle, index) + case handle.gsub(/[^a-z]/, '') + when 'count', 'minutes', 'hours' + NUM_SUBS[index % NUM_SUBS.size] + else + LETTER_SUBS[index % LETTER_SUBS.size] + end + end + + # BEX version of replace_interpolation + def replace_interpolations(value) + index = 0 + value.gsub(INTERPOLATION_KEY_RE) do |handle| + sub = sub_for_handle(handle, index) + index += 1 + "#{sub}" + end.gsub(BR_REGEXP) do |br| + if br.downcase.count('b') == 2 + # never more than two
in a row, it gets messy + BR_DOUBLE_MARKER + else + BR_SINGLE_MARKER + end + end + end + + # reversing our substitutions should be straight-forward, but it's not + # because deepl gets creative. cases are explained inline. + def restore_interpolations(untranslated, translated) + translated.gsub(%r{(.?)([^<]*)}) do + char = ::Regexp.last_match(1) + handle = ::Regexp.last_match(2) + sub = ::Regexp.last_match(3) + body = ::Regexp.last_match(4) + if body == sub + # deepl kept the 'sub' text inside the tag and nothing else, clean. + "#{char}#{handle}" + elsif body.index(sub) + # deepl took some letters from outside the tag and placed them + # inside the e.g. task "RYX" + before, after = body.split(sub, 2) + "#{before}#{handle}#{after}" + elsif "#{char}#{body}".downcase == sub.downcase + # deepl took the first letter from inside the tag and placed it + # immediately before the tag e.g. Ryx + handle + else + # instead of trying to look normal the fallback prints something + # obviously wrong hoping to get some attention and a manual fix + "!!!!!#{sub.inspect} (#{char.inspect} #{body.inspect})!!!!!" + end + end.gsub(BR_DOUBLE_MARKER, '

').gsub(BR_SINGLE_MARKER, '
') + rescue StandardError => e + raise_interpolation_error(untranslated, translated, e) + end + def no_results_error_message I18n.t('i18n_tasks.deepl_translate.errors.no_results') end diff --git a/spec/deepl_translate_spec.rb b/spec/deepl_translate_spec.rb index 79494a68..aeb1f5b1 100644 --- a/spec/deepl_translate_spec.rb +++ b/spec/deepl_translate_spec.rb @@ -9,10 +9,10 @@ text_test = [ 'key', - "Hello, %{user} O'Neill! How are you? {{ Check out this Liquid tag, it should not be translated }} \ - {% That applies to this Liquid tag as well %}", - "¡Hola, %{user} O'Neill! ¿Qué tal estás? {{ Check out this Liquid tag, it should not be translated }} \ - {% That applies to this Liquid tag as well %}" + "Hello, %{user} O'Neill! How are you? {{ Check out this Liquid tag, it should not be translated }} " \ + '{% That applies to this Liquid tag as well %}', + "¡Hola, %{user} O'Neill! ¿Qué tal? {{ Check out this Liquid tag, it should not be translated }} " \ + '{% That applies to this Liquid tag as well %}' ] html_test_plrl = [ @@ -92,4 +92,93 @@ end end end + + # Don't expect deepl's answers to be exactly the same each run + describe 'translating Dutch into other languages' do + let(:base_task) { I18n::Tasks::BaseTask.new } + + before do + skip 'temporarily disabled on JRuby due to https://github.com/jruby/jruby/issues/4802' if RUBY_ENGINE == 'jruby' + skip 'DEEPL_AUTH_KEY env var not set' unless ENV['DEEPL_AUTH_KEY'] + end + + it 'tells time' do + german, english, spanish = + translate_dutch(hours_and_minutes: '%{hours} uur en %{minutes} minuten') + expect(german).to eq '%{hours} Stunden und %{minutes} Minuten' + expect(english).to eq '%{hours} hours and %{minutes} minutes' + expect(spanish).to eq '%{hours} horas y %{minutes} minutos' + end + + it 'counts' do + german, english, spanish = + translate_dutch(other: '%{count} taken') + expect(german).to eq '%{count} Aufgaben' + expect(english).to eq '%{count} tasks' + expect(spanish).to eq '%{count} tareas' + end + + it 'assigns' do + german, english, spanish = + translate_dutch(assigned: 'Taak "%{todo}" toegewezen aan %{user}') + expect(german).to eq 'To-dos "%{todo}" zugewiesen an %{user}' + expect(english).to eq 'Task "%{todo}" assigned to %{user}' + expect(spanish).to eq 'Tarea "%{todo}" asignada a %{user}' + end + + it 'sings' do + german, english, spanish = + translate_dutch(verse: 'Ik zou zo graag een %{animal} kopen. Ik zag %{count} beren %{food} smeren') + expect(german).to eq 'Ich würde so gerne einen %{animal} kaufen. Ich sah %{count} Bären, die %{food} schmierten' + # greasing is a funny way to say smeren, but we let it slide + expect(english).to eq 'I would so love to buy a %{animal}. I saw %{count} bears greasing %{food}' + expect(spanish).to eq 'Me encantaría comprar un %{animal}. Vi %{count} osos engrasando %{food}' + end + + it 'sends emails' do + german, english, spanish = + translate_dutch( + email_body_html: '{{ booking.greeting }},

Bijgevoegd ziet u een factuur van {{ park.name }} met ' \ + 'factuurnummer {{ invoice.invoice_nr }}.
Volgens onze administratie had het ' \ + 'verschuldigde bedrag van {{ locals.payment_collector_total }} op ' \ + '{{ locals.payment_collector_deadline }} moeten zijn betaald. Helaas hebben we nog ' \ + 'geen betaling ontvangen.
' + ) + expect(german).to eq '{{ booking.greeting }},

Anbei finden Sie eine Rechnung von {{ park.name }} ' \ + 'mit der Rechnungsnummer {{ invoice.invoice_nr }}.
Laut unserer Verwaltung hätte der ' \ + 'von {{ locals.payment_collector_total }} geschuldete Betrag ' \ + 'am {{ locals.payment_collector_deadline }} bezahlt werden müssen. Leider haben wir die ' \ + 'Zahlungen noch nicht erhalten.
' + expect(english).to eq '{{ booking.greeting }},

Attached please find an invoice from {{ park.name }} ' \ + 'with invoice number {{ invoice.invoice_nr }}.
According to our records, the amount ' \ + 'due from {{ locals.payment_collector_total }} on ' \ + '{{ locals.payment_collector_deadline }} should have been paid. Unfortunately, we have ' \ + 'not yet received payment.
' + expect(spanish).to eq '{{ booking.greeting }},

Adjuntamos una factura de {{ park.name }} con el ' \ + 'número de factura {{ invoice.invoice_nr }}.
Según nuestros registros, el importe ' \ + 'adeudado por {{ locals.payment_collector_total }} debería haber sido abonado en ' \ + '{{ locals.payment_collector_deadline }}. Lamentablemente, aún no hemos recibido ' \ + 'el pago.
' + end + + it 'asks itself why are you even translating this' do + german, english, spanish = + translate_dutch(action: '%{subject} %{verb} %{object}') + expect(german).to eq '%{subject} %{verb} %{object}' + expect(english).to eq '%{subject} %{verb} %{object}' + expect(spanish).to eq '%{subject} %{verb} %{object}' + end + + def translate_dutch(dutch_pair) + key = dutch_pair.keys.first + phrase = dutch_pair[key] + locales = %w[de en-us es] + branches = locales.each_with_object({}) do |locale, hash| + hash[locale] = { 'testing' => { key => phrase } } + end + tree = build_tree(branches) + translations = base_task.translate_forest(tree, from: 'nl', backend: :deepl) + locales.map { |locale| translations[locale]['testing'][key].value.strip } + end + end end