Skip to content

Commit

Permalink
feat: footnote spacing&colon protection
Browse files Browse the repository at this point in the history
  • Loading branch information
maelle committed Apr 5, 2024
1 parent 26f8025 commit 675b177
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 8 deletions.
32 changes: 25 additions & 7 deletions R/translate.R
Original file line number Diff line number Diff line change
Expand Up @@ -386,11 +386,13 @@ unprotect_math <- function(math) {
}
protect_squaries <- function(node) {
text <- xml2::xml_text(node)
text <- gsub("\\[", "</text><squary>", text)
text <- gsub("\\]", "</squary><text>", text)
text <- sprintf("<text>%s</text>", text)
text <- gsub("<text><\\/text>", "", text)
text <- sprintf("<text>%s</text>", text)
text <- gsub('\\[', '</text><squary>', text)
text <- gsub('\\]', '</squary><text>', text)
text <- sprintf('<text xml:space="preserve">%s</text>', text)
text <- gsub('<text xml:space="preserve"><\\/text>', '', text)
text <- sprintf('<text xml:space="preserve">%s</text>', text)
# hack to preserve colon that DeepL API kills
text <- gsub("</squary><text>:", ":</squary><text>", text)

at_things <- regmatches(text, gregexpr("@[[:alnum:]]*", text))[[1]]
footnote_things <- regmatches(text, gregexpr("\\^[[:alnum:]]*", text))[[1]]
Expand All @@ -407,7 +409,13 @@ protect_squaries <- function(node) {

unprotect_squary <- function(node) {
xml2::xml_name(node) <- "text"
xml2::xml_text(node) <- sprintf("[%s]", trimws(xml2::xml_text(node)))
node_text <- trimws(xml2::xml_text(node))
if (!grepl(":$", node_text)) {
xml2::xml_text(node) <- sprintf("[%s]", node_text)
} else {
node_text <- sub(":$", "", node_text)
xml2::xml_text(node) <- sprintf("[%s]:", node_text)
}
}

unprotect_notranslate <- function(node) {
Expand All @@ -423,11 +431,21 @@ unprotect_non_code_block <- function(non_code_block) {
}

untangle_text <- function(node) {
text <- trimws(xml2::xml_text(node))
text <- xml2::xml_text(node)
text <- gsub("\\s+", " ", text) # like str_squish w/o str_trim
# trying to only leave space where needed
no_left_sibling <- (length(xml2::xml_find_first(node, "preceding-sibling::*")) == 0)
which <- if (no_left_sibling) {
"both"
} else {
"right"
}
text <- trimws(text, which = which)
xml2::xml_remove(xml2::xml_children(node))
xml2::xml_replace(
node,
xml2::xml_name(node),
`xml:space`="preserve",
asis = 'true',
gsub("\\\n", "", text)
)
Expand Down
3 changes: 3 additions & 0 deletions inst/example-equations-footnote.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Voyez $a$ une équation avec une note ensuite [^1].

[^1]: une note de bas de page.
11 changes: 11 additions & 0 deletions tests/testthat/_snaps/translate.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,14 @@
Output
[1] "$i_t = j_t$"

# deepl_translate() handles equations+footnote well

Code
foot_math_lines
Output
[1] "See $a$ an equation with a note afterwards [^1] ."
[2] ""
[3] "[^1]: a footnote."
[4] ""
[5] ""

Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"translations": [
{
"detected_source_language": "FR",
"text": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n<document xmlns=\"http://commonmark.org/xml/1.0\">\n <paragraph>\n <text xml:space=\"preserve\">See <\/text>\n <math asis=\"true\">$a$<\/math>\n <text xml:space=\"preserve\">\n <text xml:space=\"preserve\"> an equation with a note afterwards <\/text>\n <squary>\n <notranslate>^1<\/notranslate>\n <\/squary>\n <text>.<\/text>\n <\/text>\n <\/paragraph>\n <paragraph>\n <text xml:space=\"preserve\">\n <squary><notranslate>^1<\/notranslate>:<\/squary>\n <text> a footnote.<\/text>\n <\/text>\n <\/paragraph>\n<\/document>\n"
}
]
}
18 changes: 17 additions & 1 deletion tests/testthat/test-translate.R
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,6 @@ test_that("deepl_translate() handles square brackets stuff well", {
})
})


test_that("deepl_translate() handles equations well", {
to_translate <- system.file("example-equations.md", package = "babeldown")
out_path <- withr::local_tempfile()
Expand All @@ -224,3 +223,20 @@ test_that("deepl_translate() handles equations well", {
expect_snapshot(sub(".*que ", "", math_lines[7]))
expect_snapshot(math_lines[9])
})

test_that("deepl_translate() handles equations+footnote well", {
to_translate <- system.file("example-equations-footnote.md", package = "babeldown")
out_path <- withr::local_tempfile()
with_mock_dir("example-equations-footnote", {
deepl_translate(
path = to_translate,
out_path = out_path,
source_lang = "FR",
target_lang = "EN-US",
yaml_fields = NULL
)
})
foot_math_lines <- brio::read_lines(out_path)
expect_snapshot(foot_math_lines)
})

0 comments on commit 675b177

Please sign in to comment.