Skip to content

Commit

Permalink
Misc fixes from review (#2)
Browse files Browse the repository at this point in the history
* Document and check possible lang values

* Fold unnecessary gsub into subsequent strsplit

* Simplify and tighten tests

* Use match to avoid subsequent reordering

* Remove duplicate line

* Simplify case of nomatch by using dedicated arg

* Remove unnecessary \dontrun

* Remove unnnecessary indexing by looping over digits

* Add GitHub links to DESCRIPTION

* Run devtools::document()

* Run spelling::update_wordlist()

* Copy .lintr from packagetemplate
  • Loading branch information
Bisaloo authored Nov 7, 2023
1 parent 3d4b788 commit d24b7f9
Show file tree
Hide file tree
Showing 8 changed files with 52 additions and 125 deletions.
24 changes: 20 additions & 4 deletions .lintr
Original file line number Diff line number Diff line change
@@ -1,16 +1,32 @@
linters: linters_with_tags(
tags = NULL, # include all linters
linters: all_linters(
packages = c("lintr", "etdev"),
object_name_linter = NULL,
undesirable_function_linter = NULL,
implicit_integer_linter = NULL,
extraction_operator_linter = NULL,
todo_comment_linter = NULL,
library_call_linter = NULL,
undesirable_function_linter(
modify_defaults(
default_undesirable_functions,
citEntry = "use the more modern bibentry() function",
library = NULL # too many false positive in too many files
)
),
function_argument_linter = NULL,
indentation_linter = NULL, # unstable as of lintr 3.1.0
# Use minimum R declared in DESCRIPTION or fall back to current R version.
# Install etdev package from https://github.com/epiverse-trace/etdev
backport_linter(if (length(x <- etdev::extract_min_r_version())) x else getRversion())
)
exclusions: list(
"tests/testthat.R" = list(unused_import_linter = Inf)
"tests/testthat.R" = list(
unused_import_linter = Inf
),
"tests" = list(
undesirable_function_linter = Inf
),
"data-raw" = list(
missing_package_linter = Inf,
namespace_linter = Inf
)
)
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,8 @@ Description: Converts numbers written as English, French or Spanish words
to their equivalent number. English, French or Spanish words to their
equivalent numeric.
License: MIT + file LICENSE
URL: https://bahadzie.github.io/numberizeR/index.html,
https://github.com/bahadzie/numberizeR
BugReports: https://github.com/bahadzie/numberizeR/issues
URL: https://github.com/epiverse-trace/numberize
BugReports: https://github.com/epiverse-trace/numberize/issues
Suggests:
spelling,
testthat (>= 3.0.0)
Expand All @@ -35,3 +34,4 @@ Config/Needs/website: epiverse-trace/epiversetheme
Config/Department: Centre for the Mathematical Modelling of Infectious Diseases
Config/DepartmentURL: https://www.lshtm.ac.uk/research/centres/centre-mathematical-modelling-infectious-diseases
Config/Recon: experimental

33 changes: 12 additions & 21 deletions R/numberize.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,18 +64,14 @@ digits_from <- function(text, lang = "en") {
text <- gsub("billones", "bill\u00f3n", text, fixed = TRUE)
text <- gsub("veinti\u00fan", "veintiuno", text, fixed = TRUE) # edge case
text <- gsub("\\sun\\s", " uno ", text)
text <- gsub("\\sun\\s", " uno ", text)
}
if (lang == "fr") {
text <- gsub("(cent|mille|million|milliard|billion)s\\b", "\\1", text) # lang=fr plural->singular # nolint: nonportable_path_linter, line_length_linter.
text <- gsub("quatre vingt", "quatre-vingt", text, fixed = TRUE) # lang=fr one word # nolint: line_length_linter.
}

text <- gsub("\\s{2,}", " ", text) # collapse spaces
words <- strsplit(text, " ", fixed = TRUE)[[1]]
mapping <- numbers[numbers[[lang]] %in% words, c("digit", lang)]
row.names(mapping) <- mapping[[lang]] # to easily subset next line
digits <- mapping[words, ]$digit
words <- strsplit(text, "\\s+")[[1]]
digits <- numbers[match(words, numbers[[lang]]), "digit"]
digits
}

Expand All @@ -95,11 +91,8 @@ digits_from <- function(text, lang = "en") {
#' @return A numeric value.
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
number_from <- function(digits) {
# match can return NA so make a vector ending 0 and get the first non NA
thousand_index <- c(match(1000, digits), 0)
million_index <- c(match(1E6, digits), 0)
thousand_index <- thousand_index[!is.na(thousand_index)][1]
million_index <- million_index[!is.na(million_index)][1]
thousand_index <- match(1000, digits, nomatch = 0)
million_index <- match(1E6, digits, nomatch = 0)

# for lang = "es" multiply 1000 * 1E6 for billion
if (thousand_index < million_index) { # es thousand million = billion
Expand All @@ -108,15 +101,15 @@ number_from <- function(digits) {

summed <- 0
total <- 0
for (i in seq_along(digits)) {
if (digits[i] %in% c(1E3, 1E6, 1E9, 1E12)) {
total <- total + summed * digits[i]
for (d in digits) {
if (d %in% c(1E3, 1E6, 1E9, 1E12)) {
total <- total + summed * d
summed <- 0
} else if (digits[i] == 100) {
} else if (d == 100) {
if (summed == 0) summed <- 1 # needed for standalone cent/100 (fr)
summed <- summed * digits[i]
summed <- summed * d
} else {
summed <- summed + digits[i]
summed <- summed + d
}
}
summed + total
Expand All @@ -132,15 +125,13 @@ number_from <- function(digits) {
#' @return A numeric value.
#'
#' @examples
#' \dontrun{
#' numberize("five hundred and thirty eight")
#' # [1] 538
#' }
#'
#' @return A numeric value.
#'
#' @export
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
numberize <- function(text, lang = "en") {
numberize <- function(text, lang = c("en", "fr", "es")) {
lang <- match.arg(lang)
number_from(digits_from(text, lang))
}
93 changes: 8 additions & 85 deletions inst/WORDLIST
Original file line number Diff line number Diff line change
@@ -1,99 +1,22 @@
www
reconverse
Centre
Modelling
CMD
Centre
Codecov
Epiverse
Lifecycle
Titlecase
al
Modelling
cleanepi
codecov
doi
et
deux
gh
github
https
io
lifecycle
packagename
reconverse
repo
svg
yaml
zenodo
numberize
bcien
catorce
ciento
cinco
cincuenta
cinq
cinquante
cuarenta
cuatro
cuatrocientos
deux
diecinueve
dieciocho
diecis
diecisiete
diez
dix
doce
dontrun
doscientos
douze
gsub
huit
lang
mille
millones
na
neuf
novecientos
noventa
nueve
numberize
ochenta
ocho
ochocientos
onze
param
quarante
quatorze
quatre
quinientos
quinze
seis
seiscientos
sept
sesenta
setecientos
setenta
siete
soixante
spelt
stringsAsFactors
strsplit
sy
tolower
trece
treinta
treize
trente
tres
trescientos
svg
trois
un
uno
veinte
veinticinco
veinticuatro
veintid
veintinueve
veintiocho
veintis
veintisiete
veintitr
veintiuno
vingt
www
yaml
2 changes: 1 addition & 1 deletion man/digits_from.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/number_from.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 2 additions & 5 deletions man/numberize.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions tests/testthat/test-numberize.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,17 +63,17 @@ test_df <- data.frame(

test_that("translating English numbers works", {
res <- sapply(test_df$en, numberize)
expect_identical(sum(res == test_df$num), length(test_df$num))
expect_identical(unname(res), test_df$num)
})

test_that("translating French numbers works", {
res <- sapply(test_df$fr, numberize, lang = "fr")
expect_identical(sum(res == test_df$num), length(test_df$num))
expect_identical(unname(res), test_df$num)
})

test_that("translating Spanish numbers works", {
res <- sapply(test_df$es, numberize, lang = "es")
expect_identical(sum(res == test_df$num), length(test_df$num))
expect_identical(unname(res), test_df$num)
})

# TODO test edge cases in es and fr
# TODO test edge cases in es and fr

0 comments on commit d24b7f9

Please sign in to comment.