Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

21941 replaces the <i18n> interpolation tag in i18n-tasks for our cus… #3

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 84 additions & 3 deletions lib/i18n/tasks/translators/deepl_translator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
require 'i18n/tasks/translators/base_translator'

module I18n::Tasks::Translators
class DeeplTranslator < BaseTranslator
class DeeplTranslator < BaseTranslator # rubocop:disable Metrics/ClassLength
# max allowed texts per request
BATCH_SIZE = 50
# those languages must be specified with their sub-kind e.g en-us
Expand Down Expand Up @@ -60,21 +60,102 @@ def options_for_plain

# @param [String] value
# @return [String] 'hello, %{name}' => 'hello, <i18n>%{name}</i18n>'
def replace_interpolations(value)
def original_replace_interpolations(value)
tom-brouwer-bex marked this conversation as resolved.
Show resolved Hide resolved
value.gsub(INTERPOLATION_KEY_RE, '<i18n>\0</i18n>')
end

# @param [String] untranslated
# @param [String] translated
# @return [String] 'hello, <i18n>%{name}</i18n>' => 'hello, %{name}'
def restore_interpolations(untranslated, translated)
def original_restore_interpolations(untranslated, translated)
return translated if untranslated !~ INTERPOLATION_KEY_RE

translated.gsub(%r{</?i18n>}, '')
rescue StandardError => e
raise_interpolation_error(untranslated, translated, e)
end

# deepl does a better job with interpolations when it doesn't have to deal
# with <br> tags, so we replace all of them with meaningless asterisk chains
BR_REGEXP = %r{(<br\s*/?>\s*)+}i.freeze
BR_SINGLE_MARKER = ' *** '
BR_DOUBLE_MARKER = ' ***** '

# letting deepl 'read' the interpolations gives better translations (and
# solves the problem of interpolations getting pushed all the way to the
# front of the sentence), however, deepl will also try to translate
# the interpolations and that gets messy.
# we use nonsense three-letter acronyms so deepl will 'read' them and leave
# them alone (the letter X also works very well, except in sentences with
# several consecutive interpolations because that reads X X X and deepl
# doesn't handle that well)
# deepl also needs to know if an interpolation will be a word or a number,
# for romance languages it matters. a little Spanish lesson to illustrate:
tom-brouwer-bex marked this conversation as resolved.
Show resolved Hide resolved
# "%{foo} betalingen" translates either to "facturas %{foo}"
# (openstaande betalingen -> facturas pendientes) or to "%{foo} facturas"
# (5 betalingen -> 5 facturas)
# for interpolation keys that are usually numeric, we pick a number
# instead of the three-letter acronym (more consistency in how we name
# interpolation keys would help)
LETTER_SUBS = %w[RYX QFN VLB XOG DWP ZMQ JZQ WVS LRX HPM].freeze
NUM_SUBS = %w[17 19 23 29 31 37 41 43 47 53].freeze

def sub_for_handle(handle, index)
case handle.gsub(/[^a-z]/, '')
when 'count', 'minutes', 'hours'
NUM_SUBS[index % NUM_SUBS.size]
else
LETTER_SUBS[index % LETTER_SUBS.size]
end
end

# BEX version of replace_interpolation
def replace_interpolations(value)
index = 0
value.gsub(INTERPOLATION_KEY_RE) do |handle|
sub = sub_for_handle(handle, index)
index += 1
"<var handle=\"#{handle}\" sub=\"#{sub}\">#{sub}</var>"
end.gsub(BR_REGEXP) do |br|
if br.downcase.count('b') == 2
# never more than two <br> in a row, it gets messy
BR_DOUBLE_MARKER
else
BR_SINGLE_MARKER
end
end
end

# reversing our substitutions should be straight-forward, but it's not
# because deepl gets creative. cases are explained inline.
def restore_interpolations(untranslated, translated)
translated.gsub(%r{(.?)<var handle="([^"]*)" sub="([^"]*)">([^<]*)</var>}) do
char = ::Regexp.last_match(1)
tom-brouwer-bex marked this conversation as resolved.
Show resolved Hide resolved
handle = ::Regexp.last_match(2)
sub = ::Regexp.last_match(3)
body = ::Regexp.last_match(4)
if body == sub
tom-brouwer-bex marked this conversation as resolved.
Show resolved Hide resolved
# deepl kept the 'sub' text inside the <var> tag and nothing else, clean.
"#{char}#{handle}"
elsif body.index(sub)
# deepl took some letters from outside the <var> tag and placed them
# inside the <var> e.g. task <var>"RYX</var>"
before, after = body.split(sub, 2)
"#{before}#{handle}#{after}"
elsif "#{char}#{body}".downcase == sub.downcase
# deepl took the first letter from inside the <var> tag and placed it
# immediately before the <var> tag e.g. R<var>yx</var>
handle
else
# instead of trying to look normal the fallback prints something
# obviously wrong hoping to get some attention and a manual fix
"!!!!!#{sub.inspect} (#{char.inspect} #{body.inspect})!!!!!"
end
end.gsub(BR_DOUBLE_MARKER, '<br /><br />').gsub(BR_SINGLE_MARKER, '<br />')
rescue StandardError => e
raise_interpolation_error(untranslated, translated, e)
end

def no_results_error_message
I18n.t('i18n_tasks.deepl_translate.errors.no_results')
end
Expand Down
97 changes: 93 additions & 4 deletions spec/deepl_translate_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@

text_test = [
'key',
"Hello, %{user} O'Neill! How are you? {{ Check out this Liquid tag, it should not be translated }} \
{% That applies to this Liquid tag as well %}",
"¡Hola, %{user} O'Neill! ¿Qué tal estás? {{ Check out this Liquid tag, it should not be translated }} \
{% That applies to this Liquid tag as well %}"
"Hello, %{user} O'Neill! How are you? {{ Check out this Liquid tag, it should not be translated }} " \
'{% That applies to this Liquid tag as well %}',
"¡Hola, %{user} O'Neill! ¿Qué tal? {{ Check out this Liquid tag, it should not be translated }} " \
'{% That applies to this Liquid tag as well %}'
]

html_test_plrl = [
Expand Down Expand Up @@ -92,4 +92,93 @@
end
end
end

# Don't expect deepl's answers to be exactly the same each run
describe 'translating Dutch into other languages' do
let(:base_task) { I18n::Tasks::BaseTask.new }

before do
skip 'temporarily disabled on JRuby due to https://github.com/jruby/jruby/issues/4802' if RUBY_ENGINE == 'jruby'
skip 'DEEPL_AUTH_KEY env var not set' unless ENV['DEEPL_AUTH_KEY']
end

it 'tells time' do
german, english, spanish =
translate_dutch(hours_and_minutes: '%{hours} uur en %{minutes} minuten')
expect(german).to eq '%{hours} Stunden und %{minutes} Minuten'
expect(english).to eq '%{hours} hours and %{minutes} minutes'
expect(spanish).to eq '%{hours} horas y %{minutes} minutos'
end

it 'counts' do
german, english, spanish =
translate_dutch(other: '%{count} taken')
expect(german).to eq '%{count} Aufgaben'
expect(english).to eq '%{count} tasks'
expect(spanish).to eq '%{count} tareas'
end

it 'assigns' do
german, english, spanish =
translate_dutch(assigned: 'Taak "%{todo}" toegewezen aan %{user}')
expect(german).to eq 'To-dos "%{todo}" zugewiesen an %{user}'
expect(english).to eq 'Task "%{todo}" assigned to %{user}'
expect(spanish).to eq 'Tarea "%{todo}" asignada a %{user}'
end

it 'sings' do
german, english, spanish =
translate_dutch(verse: 'Ik zou zo graag een %{animal} kopen. Ik zag %{count} beren %{food} smeren')
expect(german).to eq 'Ich würde so gerne einen %{animal} kaufen. Ich sah %{count} Bären, die %{food} schmierten'
# greasing is a funny way to say smeren, but we let it slide
expect(english).to eq 'I would so love to buy a %{animal}. I saw %{count} bears greasing %{food}'
expect(spanish).to eq 'Me encantaría comprar un %{animal}. Vi %{count} osos engrasando %{food}'
end

it 'sends emails' do
german, english, spanish =
translate_dutch(
email_body_html: '{{ booking.greeting }},<br><br>Bijgevoegd ziet u een factuur van {{ park.name }} met ' \
'factuurnummer {{ invoice.invoice_nr }}.<br />Volgens onze administratie had het ' \
'verschuldigde bedrag van {{ locals.payment_collector_total }} op ' \
'{{ locals.payment_collector_deadline }} moeten zijn betaald. Helaas hebben we nog ' \
'geen betaling ontvangen.<br>'
)
expect(german).to eq '{{ booking.greeting }},<br /><br />Anbei finden Sie eine Rechnung von {{ park.name }} ' \
'mit der Rechnungsnummer {{ invoice.invoice_nr }}.<br />Laut unserer Verwaltung hätte der ' \
'von {{ locals.payment_collector_total }} geschuldete Betrag ' \
'am {{ locals.payment_collector_deadline }} bezahlt werden müssen. Leider haben wir die ' \
'Zahlungen noch nicht erhalten.<br />'
expect(english).to eq '{{ booking.greeting }},<br /><br />Attached please find an invoice from {{ park.name }} ' \
'with invoice number {{ invoice.invoice_nr }}.<br />According to our records, the amount ' \
'due from {{ locals.payment_collector_total }} on ' \
'{{ locals.payment_collector_deadline }} should have been paid. Unfortunately, we have ' \
'not yet received payment.<br />'
expect(spanish).to eq '{{ booking.greeting }},<br /><br />Adjuntamos una factura de {{ park.name }} con el ' \
'número de factura {{ invoice.invoice_nr }}.<br />Según nuestros registros, el importe ' \
'adeudado por {{ locals.payment_collector_total }} debería haber sido abonado en ' \
'{{ locals.payment_collector_deadline }}. Lamentablemente, aún no hemos recibido ' \
'el pago.<br />'
end

it 'asks itself why are you even translating this' do
german, english, spanish =
translate_dutch(action: '%{subject} %{verb} %{object}')
expect(german).to eq '%{subject} %{verb} %{object}'
expect(english).to eq '%{subject} %{verb} %{object}'
expect(spanish).to eq '%{subject} %{verb} %{object}'
end

def translate_dutch(dutch_pair)
key = dutch_pair.keys.first
phrase = dutch_pair[key]
locales = %w[de en-us es]
branches = locales.each_with_object({}) do |locale, hash|
hash[locale] = { 'testing' => { key => phrase } }
end
tree = build_tree(branches)
translations = base_task.translate_forest(tree, from: 'nl', backend: :deepl)
locales.map { |locale| translations[locale]['testing'][key].value.strip }
end
end
end