Misc fixes from review (#2)

* Document and check possible lang values * Fold unnecessary gsub into subsequent strsplit * Simplify and tighten tests * Use match to avoid subsequent reordering * Remove duplicate line * Simplify case of nomatch by using dedicated arg * Remove unnecessary \dontrun * Remove unnnecessary indexing by looping over digits * Add GitHub links to DESCRIPTION * Run devtools::document() * Run spelling::update_wordlist() * Copy .lintr from packagetemplate
epiverse-trace · Nov 7, 2023 · d24b7f9 · d24b7f9
1 parent 3d4b788
commit d24b7f9
Show file tree

Hide file tree

Showing 8 changed files with 52 additions and 125 deletions.
diff --git a/.lintr b/.lintr
@@ -1,16 +1,32 @@
-linters: linters_with_tags(
-    tags = NULL, # include all linters
+linters: all_linters(
+    packages = c("lintr", "etdev"),
     object_name_linter = NULL,
-    undesirable_function_linter = NULL,
     implicit_integer_linter = NULL,
     extraction_operator_linter = NULL,
     todo_comment_linter = NULL,
+    library_call_linter = NULL,
+    undesirable_function_linter(
+      modify_defaults(
+        default_undesirable_functions,
+        citEntry = "use the more modern bibentry() function",
+        library = NULL # too many false positive in too many files
+      )
+    ),
     function_argument_linter = NULL,
     indentation_linter = NULL, # unstable as of lintr 3.1.0
     # Use minimum R declared in DESCRIPTION or fall back to current R version.
     # Install etdev package from https://github.com/epiverse-trace/etdev
     backport_linter(if (length(x <- etdev::extract_min_r_version())) x else getRversion())
   )
 exclusions: list(
-    "tests/testthat.R" = list(unused_import_linter = Inf)
+    "tests/testthat.R" = list(
+      unused_import_linter = Inf
+    ),
+    "tests" = list(
+      undesirable_function_linter = Inf
+    ),
+    "data-raw" = list(
+      missing_package_linter = Inf,
+      namespace_linter = Inf
+    )
   )
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -19,9 +19,8 @@ Description: Converts numbers written as English, French or Spanish words
     to their equivalent number.  English, French or Spanish words to their
     equivalent numeric.
 License: MIT + file LICENSE
-URL: https://bahadzie.github.io/numberizeR/index.html,
-    https://github.com/bahadzie/numberizeR
-BugReports: https://github.com/bahadzie/numberizeR/issues
+URL: https://github.com/epiverse-trace/numberize
+BugReports: https://github.com/epiverse-trace/numberize/issues
 Suggests: 
     spelling,
     testthat (>= 3.0.0)
@@ -35,3 +34,4 @@ Config/Needs/website: epiverse-trace/epiversetheme
 Config/Department: Centre for the Mathematical Modelling of Infectious Diseases
 Config/DepartmentURL: https://www.lshtm.ac.uk/research/centres/centre-mathematical-modelling-infectious-diseases
 Config/Recon: experimental
+
diff --git a/R/numberize.R b/R/numberize.R
@@ -64,18 +64,14 @@ digits_from <- function(text, lang = "en") {
     text <- gsub("billones", "bill\u00f3n", text, fixed = TRUE)
     text <- gsub("veinti\u00fan", "veintiuno", text, fixed = TRUE) # edge case
     text <- gsub("\\sun\\s", " uno ", text)
-    text <- gsub("\\sun\\s", " uno ", text)
   }
   if (lang == "fr") {
     text <- gsub("(cent|mille|million|milliard|billion)s\\b", "\\1", text) # lang=fr plural->singular # nolint: nonportable_path_linter, line_length_linter.
     text <- gsub("quatre vingt", "quatre-vingt", text, fixed = TRUE) # lang=fr one word # nolint: line_length_linter.
   }
 
-  text <- gsub("\\s{2,}", " ", text) # collapse spaces
-  words <- strsplit(text, " ", fixed = TRUE)[[1]]
-  mapping <- numbers[numbers[[lang]] %in% words, c("digit", lang)]
-  row.names(mapping) <- mapping[[lang]] # to easily subset next line
-  digits <- mapping[words, ]$digit
+  words <- strsplit(text, "\\s+")[[1]]
+  digits <- numbers[match(words, numbers[[lang]]), "digit"]
   digits
 }
 
@@ -95,11 +91,8 @@ digits_from <- function(text, lang = "en") {
 #' @return A numeric value.
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 number_from <- function(digits) {
-  # match can return NA so make a vector ending 0 and get the first non NA
-  thousand_index <- c(match(1000, digits), 0)
-  million_index <- c(match(1E6, digits), 0)
-  thousand_index <- thousand_index[!is.na(thousand_index)][1]
-  million_index <- million_index[!is.na(million_index)][1]
+  thousand_index <- match(1000, digits, nomatch = 0)
+  million_index <- match(1E6, digits, nomatch = 0)
 
   # for lang = "es" multiply 1000 * 1E6 for billion
   if (thousand_index < million_index) { # es thousand million = billion
@@ -108,15 +101,15 @@ number_from <- function(digits) {
 
   summed <- 0
   total <- 0
-  for (i in seq_along(digits)) {
-    if (digits[i] %in% c(1E3, 1E6, 1E9, 1E12)) {
-      total <- total + summed * digits[i]
+  for (d in digits) {
+    if (d %in% c(1E3, 1E6, 1E9, 1E12)) {
+      total <- total + summed * d
       summed <- 0
-    } else if (digits[i] == 100) {
+    } else if (d == 100) {
       if (summed == 0) summed <- 1 # needed for standalone cent/100 (fr)
-      summed <- summed * digits[i]
+      summed <- summed * d
     } else {
-      summed <- summed + digits[i]
+      summed <- summed + d
     }
   }
   summed + total
@@ -132,15 +125,13 @@ number_from <- function(digits) {
 #' @return A numeric value.
 #'
 #' @examples
-#' \dontrun{
 #' numberize("five hundred and thirty eight")
-#' # [1] 538
-#' }
 #'
 #' @return A numeric value.
 #'
 #' @export
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-numberize <- function(text, lang = "en") {
+numberize <- function(text, lang = c("en", "fr", "es")) {
+  lang <- match.arg(lang)
   number_from(digits_from(text, lang))
 }
diff --git a/inst/WORDLIST b/inst/WORDLIST
@@ -1,99 +1,22 @@
-www
-reconverse
-Centre
-Modelling
 CMD
+Centre
 Codecov
 Epiverse
 Lifecycle
-Titlecase
-al
+Modelling
+cleanepi
 codecov
-doi
-et
+deux
 gh
 github
 https
 io
 lifecycle
 packagename
+reconverse
 repo
-svg
-yaml
-zenodo
-numberize
-bcien
-catorce
-ciento
-cinco
-cincuenta
-cinq
-cinquante
-cuarenta
-cuatro
-cuatrocientos
-deux
-diecinueve
-dieciocho
-diecis
-diecisiete
-diez
-dix
-doce
-dontrun
-doscientos
-douze
-gsub
-huit
-lang
-mille
-millones
-na
-neuf
-novecientos
-noventa
-nueve
-numberize
-ochenta
-ocho
-ochocientos
-onze
-param
-quarante
-quatorze
-quatre
-quinientos
-quinze
-seis
-seiscientos
-sept
-sesenta
-setecientos
-setenta
-siete
-soixante
 spelt
-stringsAsFactors
-strsplit
-sy
-tolower
-trece
-treinta
-treize
-trente
-tres
-trescientos
+svg
 trois
-un
-uno
-veinte
-veinticinco
-veinticuatro
-veintid
-veintinueve
-veintiocho
-veintis
-veintisiete
-veintitr
-veintiuno
-vingt
+www
+yaml
diff --git a/man/digits_from.Rd b/man/digits_from.Rd
diff --git a/man/number_from.Rd b/man/number_from.Rd
diff --git a/man/numberize.Rd b/man/numberize.Rd
diff --git a/tests/testthat/test-numberize.R b/tests/testthat/test-numberize.R
@@ -63,17 +63,17 @@ test_df <- data.frame(
 
 test_that("translating English numbers works", {
   res <- sapply(test_df$en, numberize)
-  expect_identical(sum(res == test_df$num), length(test_df$num))
+  expect_identical(unname(res), test_df$num)
 })
 
 test_that("translating French numbers works", {
   res <- sapply(test_df$fr, numberize, lang = "fr")
-  expect_identical(sum(res == test_df$num), length(test_df$num))
+  expect_identical(unname(res), test_df$num)
 })
 
 test_that("translating Spanish numbers works", {
   res <- sapply(test_df$es, numberize, lang = "es")
-  expect_identical(sum(res == test_df$num), length(test_df$num))
+  expect_identical(unname(res), test_df$num)
 })
 
-# TODO test edge cases in es and fr
+# TODO test edge cases in es and fr