From 675b17700a26cbf05c4aeffb12ba17cd4f228d5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABlle=20Salmon?= Date: Fri, 5 Apr 2024 10:53:20 +0200 Subject: [PATCH] feat: footnote spacing&colon protection --- R/translate.R | 32 +++++++++++++++---- inst/example-equations-footnote.md | 3 ++ tests/testthat/_snaps/translate.md | 11 +++++++ .../api/translate-746e3b-POST.json | 8 +++++ tests/testthat/test-translate.R | 18 ++++++++++- 5 files changed, 64 insertions(+), 8 deletions(-) create mode 100644 inst/example-equations-footnote.md create mode 100644 tests/testthat/fixtures/example-equations-footnote/api/translate-746e3b-POST.json diff --git a/R/translate.R b/R/translate.R index 6f8e449..20ec117 100644 --- a/R/translate.R +++ b/R/translate.R @@ -386,11 +386,13 @@ unprotect_math <- function(math) { } protect_squaries <- function(node) { text <- xml2::xml_text(node) - text <- gsub("\\[", "", text) - text <- gsub("\\]", "", text) - text <- sprintf("%s", text) - text <- gsub("<\\/text>", "", text) - text <- sprintf("%s", text) + text <- gsub('\\[', '', text) + text <- gsub('\\]', '', text) + text <- sprintf('%s', text) + text <- gsub('<\\/text>', '', text) + text <- sprintf('%s', text) + # hack to preserve colon that DeepL API kills + text <- gsub(":", ":", text) at_things <- regmatches(text, gregexpr("@[[:alnum:]]*", text))[[1]] footnote_things <- regmatches(text, gregexpr("\\^[[:alnum:]]*", text))[[1]] @@ -407,7 +409,13 @@ protect_squaries <- function(node) { unprotect_squary <- function(node) { xml2::xml_name(node) <- "text" - xml2::xml_text(node) <- sprintf("[%s]", trimws(xml2::xml_text(node))) + node_text <- trimws(xml2::xml_text(node)) + if (!grepl(":$", node_text)) { + xml2::xml_text(node) <- sprintf("[%s]", node_text) + } else { + node_text <- sub(":$", "", node_text) + xml2::xml_text(node) <- sprintf("[%s]:", node_text) + } } unprotect_notranslate <- function(node) { @@ -423,11 +431,21 @@ unprotect_non_code_block <- function(non_code_block) { } untangle_text <- function(node) { - text <- trimws(xml2::xml_text(node)) + text <- xml2::xml_text(node) + text <- gsub("\\s+", " ", text) # like str_squish w/o str_trim + # trying to only leave space where needed + no_left_sibling <- (length(xml2::xml_find_first(node, "preceding-sibling::*")) == 0) + which <- if (no_left_sibling) { + "both" + } else { + "right" + } + text <- trimws(text, which = which) xml2::xml_remove(xml2::xml_children(node)) xml2::xml_replace( node, xml2::xml_name(node), + `xml:space`="preserve", asis = 'true', gsub("\\\n", "", text) ) diff --git a/inst/example-equations-footnote.md b/inst/example-equations-footnote.md new file mode 100644 index 0000000..1fe953a --- /dev/null +++ b/inst/example-equations-footnote.md @@ -0,0 +1,3 @@ +Voyez $a$ une équation avec une note ensuite [^1]. + +[^1]: une note de bas de page. diff --git a/tests/testthat/_snaps/translate.md b/tests/testthat/_snaps/translate.md index efd456f..8379f04 100644 --- a/tests/testthat/_snaps/translate.md +++ b/tests/testthat/_snaps/translate.md @@ -46,3 +46,14 @@ Output [1] "$i_t = j_t$" +# deepl_translate() handles equations+footnote well + + Code + foot_math_lines + Output + [1] "See $a$ an equation with a note afterwards [^1] ." + [2] "" + [3] "[^1]: a footnote." + [4] "" + [5] "" + diff --git a/tests/testthat/fixtures/example-equations-footnote/api/translate-746e3b-POST.json b/tests/testthat/fixtures/example-equations-footnote/api/translate-746e3b-POST.json new file mode 100644 index 0000000..4f8bf0a --- /dev/null +++ b/tests/testthat/fixtures/example-equations-footnote/api/translate-746e3b-POST.json @@ -0,0 +1,8 @@ +{ + "translations": [ + { + "detected_source_language": "FR", + "text": "\n\n\n \n See <\/text>\n $a$<\/math>\n \n an equation with a note afterwards <\/text>\n \n ^1<\/notranslate>\n <\/squary>\n .<\/text>\n <\/text>\n <\/paragraph>\n \n \n ^1<\/notranslate>:<\/squary>\n a footnote.<\/text>\n <\/text>\n <\/paragraph>\n<\/document>\n" + } + ] +} diff --git a/tests/testthat/test-translate.R b/tests/testthat/test-translate.R index 9e6fd11..23631e4 100644 --- a/tests/testthat/test-translate.R +++ b/tests/testthat/test-translate.R @@ -205,7 +205,6 @@ test_that("deepl_translate() handles square brackets stuff well", { }) }) - test_that("deepl_translate() handles equations well", { to_translate <- system.file("example-equations.md", package = "babeldown") out_path <- withr::local_tempfile() @@ -224,3 +223,20 @@ test_that("deepl_translate() handles equations well", { expect_snapshot(sub(".*que ", "", math_lines[7])) expect_snapshot(math_lines[9]) }) + +test_that("deepl_translate() handles equations+footnote well", { + to_translate <- system.file("example-equations-footnote.md", package = "babeldown") + out_path <- withr::local_tempfile() + with_mock_dir("example-equations-footnote", { + deepl_translate( + path = to_translate, + out_path = out_path, + source_lang = "FR", + target_lang = "EN-US", + yaml_fields = NULL + ) + }) + foot_math_lines <- brio::read_lines(out_path) + expect_snapshot(foot_math_lines) +}) +