Skip to content

Commit

Permalink
feat: use git diff (#47)
Browse files Browse the repository at this point in the history
* feat: add deepl_update()

* register deps

* tests: test deepl_update()
  • Loading branch information
maelle authored Dec 7, 2023
1 parent 08ee200 commit 99e62c1
Show file tree
Hide file tree
Showing 12 changed files with 570 additions and 1 deletion.
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,12 @@ Imports:
Suggests:
blogdown,
fs,
gert,
httptest2,
knitr,
quarto,
rmarkdown,
rprojroot,
sys,
testthat (>= 3.0.0)
Remotes:
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export(deepl_translate_hugo)
export(deepl_translate_markdown_string)
export(deepl_translate_quarto)
export(deepl_translate_vtt)
export(deepl_update)
export(deepl_upsert_glossary)
export(deepl_usage)
importFrom(lifecycle,deprecated)
Expand Down
6 changes: 5 additions & 1 deletion R/translate.R
Original file line number Diff line number Diff line change
Expand Up @@ -217,9 +217,13 @@ fakify_xml <- function(nodes_list) {
readLines(system.file("template.xml", package = "babeldown")),
collapse = "\n"
)

fill <- if (inherits(nodes_list, "xml_nodeset")) {
paste(as.character(nodes_list), collapse = "\n")
} else {
} else if (inherits(nodes_list, "xml_node")) {
as.character(nodes_list)
} else
{
paste(
purrr::map_chr(nodes_list, ~ paste(as.character(xml2::xml_children(.x)), collapse = "\n")),
collapse = "\n"
Expand Down
143 changes: 143 additions & 0 deletions R/update.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
#' Update a translation of a file in a Git repo
#'
#' Re-use existing translation where possible
#' (at the node level: paragraph, heading, etc.)
#'
#' @details
#' The function looks for the latest commit that updated the source file,
#' and for the latest commit that updated the target file.
#' If the target file was updated later than the source file,
#' or at the same time,
#' nothing happens: you might need to
#' reorder the Git history with rebase for instance.
#'
#'
#' @inheritParams deepl_translate
#'
#' @return None
#' @export
#'
#'
deepl_update <- function(path,
out_path,
yaml_fields = c("title", "description"),
glossary_name = NULL,
source_lang = NULL,
target_lang = NULL,
formality = c("default", "more", "less", "prefer_more", "prefer_less")) {


rlang::check_installed("rprojroot")
rlang::check_installed("gert")

formality <- rlang::arg_match(
formality,
values = c("default", "more", "less", "prefer_more", "prefer_less")
)

source_lang_code <- examine_source_lang(source_lang)
target_lang_code <- examine_target_lang(target_lang)

glossary_id <- examine_glossary(
glossary_name,
source_lang_code = source_lang_code,
target_lang_code = target_lang_code
)

translated_lines <- brio::read_lines(out_path)

repo <- rprojroot::find_root(rprojroot::is_git_root, path)

# determine whether out_path is out of date
# TODO or not, make it work for over 100 commits?
log <- gert::git_log(repo = repo)

found_source <- FALSE
latest_source_commit_index <- 0
while (!found_source) {
latest_source_commit_index <- latest_source_commit_index + 1
diff_info <- gert::git_diff(log[["commit"]][[latest_source_commit_index]], repo = repo)
# TODO or not, won't work if it was renamed in the important timeframe
found_source <- (fs::path_file(path) %in% diff_info[["new"]])
}

found_target <- FALSE
latest_target_commit_index <- 0
while (!found_target) {
latest_target_commit_index <- latest_target_commit_index + 1
diff_info <- gert::git_diff(log[["commit"]][[latest_target_commit_index]], repo = repo)
# TODO or not, won't work if it was renamed in the important timeframe
found_target <- (fs::path_file(out_path) %in% diff_info[["new"]])
}

if (latest_source_commit_index >= latest_target_commit_index) {
return(NULL)
}

dir_at_target_latest_update <- withr::local_tempdir()
fs::dir_copy(repo, dir_at_target_latest_update, overwrite = TRUE)
gert::git_reset_hard(
ref = log[["commit"]][[latest_target_commit_index]],
repo = dir_at_target_latest_update
)
old_source <- tinkr::yarn$new(
file.path(dir_at_target_latest_update, fs::path_file(path))
)

new_source <- tinkr::yarn$new(file.path(repo, fs::path_file(path)))

old_target <- tinkr::yarn$new(file.path(out_path))

same_structure <-
(xml2::xml_length(old_source$body) == xml2::xml_length(old_target$body)) &&
all(
purrr::map_chr(xml2::xml_children(old_source$body), xml2::xml_name) ==
purrr::map_chr(xml2::xml_children(old_target$body), xml2::xml_name)
)

if (!same_structure) {
cli::cli_abort("Old version of {path}, and current {out_path}, do not have an equivalent XML structure.")
}

new_target <- new_source
tags_seq <- seq_len(length(xml2::xml_children(new_target$body)))
for (tag_index in tags_seq) {
same_tag <- purrr::map_lgl(
xml2::xml_children(old_source$body),
tags_the_same,
xml2::xml_children(new_target$body)[[tag_index]]
)
existing_translation <- any(same_tag)
if (existing_translation) {
same_index <- which(same_tag)[1]
xml2::xml_replace(
xml2::xml_children(new_target$body)[[tag_index]],
xml2::xml_children(old_target$body)[[same_index]]
)
} else {
translation <- translate_part(
xml2::xml_children(new_target$body)[[tag_index]],
glossary_id = glossary_id,
source_lang = source_lang,
target_lang = target_lang,
formality = formality
)
translation_kiddo <- xml2::xml_child(translation)
xml2::xml_replace(
xml2::xml_children(new_target$body)[[tag_index]],
translation_kiddo
)
}
}

new_target$write(out_path)
}

tags_the_same <- function(tag1, tag2) {
# TODO: or just compare as.character() of tags??
xml2::xml_text(tag1) == xml2::xml_text(tag2) &&
all(
purrr::map_chr(xml2::xml_children(tag1), xml2::xml_name) ==
purrr::map_chr(xml2::xml_children(tag2), xml2::xml_name)
)
}
52 changes: 52 additions & 0 deletions man/deepl_update.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

157 changes: 157 additions & 0 deletions tests/testthat/fixtures/git1/api/languages-298d87.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
[
{
"language": "BG",
"name": "Bulgarian",
"supports_formality": false
},
{
"language": "CS",
"name": "Czech",
"supports_formality": false
},
{
"language": "DA",
"name": "Danish",
"supports_formality": false
},
{
"language": "DE",
"name": "German",
"supports_formality": true
},
{
"language": "EL",
"name": "Greek",
"supports_formality": false
},
{
"language": "EN-GB",
"name": "English (British)",
"supports_formality": false
},
{
"language": "EN-US",
"name": "English (American)",
"supports_formality": false
},
{
"language": "ES",
"name": "Spanish",
"supports_formality": true
},
{
"language": "ET",
"name": "Estonian",
"supports_formality": false
},
{
"language": "FI",
"name": "Finnish",
"supports_formality": false
},
{
"language": "FR",
"name": "French",
"supports_formality": true
},
{
"language": "HU",
"name": "Hungarian",
"supports_formality": false
},
{
"language": "ID",
"name": "Indonesian",
"supports_formality": false
},
{
"language": "IT",
"name": "Italian",
"supports_formality": true
},
{
"language": "JA",
"name": "Japanese",
"supports_formality": true
},
{
"language": "KO",
"name": "Korean",
"supports_formality": false
},
{
"language": "LT",
"name": "Lithuanian",
"supports_formality": false
},
{
"language": "LV",
"name": "Latvian",
"supports_formality": false
},
{
"language": "NB",
"name": "Norwegian",
"supports_formality": false
},
{
"language": "NL",
"name": "Dutch",
"supports_formality": true
},
{
"language": "PL",
"name": "Polish",
"supports_formality": true
},
{
"language": "PT-BR",
"name": "Portuguese (Brazilian)",
"supports_formality": true
},
{
"language": "PT-PT",
"name": "Portuguese (European)",
"supports_formality": true
},
{
"language": "RO",
"name": "Romanian",
"supports_formality": false
},
{
"language": "RU",
"name": "Russian",
"supports_formality": true
},
{
"language": "SK",
"name": "Slovak",
"supports_formality": false
},
{
"language": "SL",
"name": "Slovenian",
"supports_formality": false
},
{
"language": "SV",
"name": "Swedish",
"supports_formality": false
},
{
"language": "TR",
"name": "Turkish",
"supports_formality": false
},
{
"language": "UK",
"name": "Ukrainian",
"supports_formality": false
},
{
"language": "ZH",
"name": "Chinese (simplified)",
"supports_formality": false
}
]
Loading

0 comments on commit 99e62c1

Please sign in to comment.