Skip to content

Commit

Permalink
update gna_parse to new api
Browse files Browse the repository at this point in the history
  • Loading branch information
Zachary Foster committed Nov 9, 2024
1 parent 65d564f commit de0849d
Show file tree
Hide file tree
Showing 20 changed files with 198 additions and 267 deletions.
5 changes: 3 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -243,10 +243,11 @@ export(get_wormsid)
export(get_wormsid_)
export(getkey)
export(gisd_isinvasive)
export(gn_parse)
export(gna_parse)
export(gna_search)
export(gni_details)
export(gni_parse)
export(gni_search)
export(gni_seach)
export(gnr_datasources)
export(gnr_resolve)
export(id2name)
Expand Down
2 changes: 1 addition & 1 deletion R/gbif_parse.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#' all species names in `scientificname`.
#' @author John Baumgartner \email{johnbb@@student.unimelb.edu.au}
#' @references https://www.gbif.org/tools/name-parser/about
#' @seealso [gni_parse()], [gn_parse()]
#' @seealso [gni_parse()], [gna_parse()]
#' @examples \dontrun{
#' gbif_parse(scientificname='x Agropogon littoralis')
#' gbif_parse(c('Arrhenatherum elatius var. elatius',
Expand Down
40 changes: 25 additions & 15 deletions R/gn_parse.R → R/gna_parse.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,43 +8,53 @@
#' @seealso [gbif_parse()], [gni_parse()]
#' @references http://gni.globalnames.org/
#' @examples \dontrun{
#' gn_parse("Cyanistes caeruleus")
#' gn_parse("Plantago minor")
#' gn_parse("Plantago minor minor")
#' gn_parse(c("Plantago minor minor","Helianthus annuus texanus"))
#' gna_parse("Cyanistes caeruleus")
#' gna_parse("Plantago minor")
#' gna_parse("Plantago minor minor")
#' gna_parse(c("Plantago minor minor","Helianthus annuus texanus"))
#'
#' # if > 20 names, uses an HTTP POST request
#' x <- names_list("species", size = 30)
#' gn_parse(x)
#' gna_parse(x)
#'
#' # pass on curl options
#' gn_parse("Cyanistes caeruleus", verbose = TRUE)
#' gna_parse("Cyanistes caeruleus", verbose = TRUE)
#' }
gn_parse <- function(names, ...) {
gna_parse <- function(names, ...) {
assert(names, "character")
method <- ifelse(length(names) <= 20, "get", "post")
tmp <- gn_http(method, names, ...)
tmp <- gna_http(method, names, ...)
out <- jsonlite::fromJSON(tmp)
out[paste0('canonical_', colnames(out$canonical))] <- out$canonical
out$canonical <- NULL
tibble::as_tibble(out)
}

gn_http <- function(method, names, ...) {
cli <- crul::HttpClient$new("https://parser.globalnames.org",
gna_http <- function(method, names, ...) {
cli <- crul::HttpClient$new("https://parser.globalnames.org/api/v1/",
headers = tx_ual, opts = list(...))
res <- switch(method,
get = {
names <- paste0(names, collapse = "|")
# args <- list(q = names)
cli$get(paste0("api/v1/", names))
cli$get(paste0('api/v1/', paste0(names, collapse = "|")))
},
post = {
cli$headers <- c(cli$headers, list(`Content-Type` = "application/json",
Accept = "application/json"))
cli$post("api", body = jsonlite::toJSON(names))
accept = "application/json"))
cli$post(body = jsonlite::toJSON(list(names = names)))
}
)
res$raise_for_status()
res$parse("UTF-8")
}


#' Parse scientific names using EOL's name parser.
#'
#' THIS FUNCTION IS DEFUNCT.
#'
#' @export
#' @keywords internal
gni_parse <- function(names, ...) {
.Defunct("ncbi_searcher", "traits",
msg = "This function is defunct. See gna_parse()")
}
80 changes: 80 additions & 0 deletions R/gna_search.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#' @title Search for taxonomic names using the Global Names Architecture
#'
#' @description Uses the Global Names Index, see http://gni.globalnames.org
#'
#' @export
#' @param sci (character) required. Name pattern you want to search
#' for. WARNING: Does not work for common names. Search term may include
#' following options:
#'
#' * `n`: A shortcut that allows to put together several elements (e.g., `n:B. bubo Linn. 1750-1800`)
#' * `g`: a genus name. (e.g. `g:B.`, `g:Bub.`, `g:Bubo`)
#' * `isp`: an infraspecies name (e.g. `sp:bubo`, `sp:gallop.`)
#' * `asp`: either species or infraspecies (all sp) (e.g. `asp:bubo`)
#' * `ds`: data-sources IDs (e.g., `ds:1,2,3`)
#' * `tx`: parent taxon . Uses classification of the first data-source from `ds`. If data-sources are not set, uses Catalogue of Life. (e.g. `tx:Aves`)
#' * `au`: author - Search by author word (e.g. `au:Linnaeus`, `au:Linn.`)
#' * `y`: year - Search by year (e.g. `y:2005`)
#'
#' @param justtotal Return only the total results found.
#' @param ... Curl options passed on to [crul::verb-GET]
#' @author Scott Chamberlain, Zachary Foster
#' @return data.frame of results.
#' @seealso [gnr_datasources()], [gna_search()]
#' @keywords globalnamesindex names taxonomy
#' @references http://gni.globalnames.org/
#' https://apidoc.globalnames.org/gnames
#'
#' @examples \dontrun{
#' gna_search('n:B. bubo ds:1,2 au:Linn. y:1700-')
#' }
gna_search <- function(sci, justtotal = FALSE, parse_names = FALSE,
per_page = NULL, page = NULL, search_term = NULL, ...) {

query <- tc(list(search_term = sci))
cli <- crul::HttpClient$new('https://verifier.globalnames.org',
headers = tx_ual, opts = list(...))
tt <- cli$get(path = paste0('/api/v1/search/', curl::curl_escape(sci)))
tt$raise_for_status()
out <- jsonlite::fromJSON(tt$parse("UTF-8"), FALSE)
if (justtotal) {
return(out$metadata$namesNumber)
} else {
df <- do.call(rbind, lapply(out$names, function(x) {
all_feilds <- unlist(x)
names(all_feilds) <- vapply(strsplit(names(all_feilds), split = '.', fixed = TRUE), function(x) x[length(x)], FUN.VALUE = character(1))
all_feilds <- all_feilds[! duplicated(names(all_feilds))]
return(as.data.frame(as.list(all_feilds)))
}))
df <- colClasses(df, "character")
df <- tibble::as_tibble(df)
if (NROW(df) != 0) {
names(df) <- c("id", "name", "cardinality", "matchType", "dataSourceId", "dataSourceTitleShort",
"curation", "recordId", "outlink", "entryDate", "sortScore",
"matchedNameID", "matchedName", "matchedCardinality", "currentRecordId",
"currentNameId", "currentName", "currentCardinality", "currentCanonicalSimple",
"currentCanonicalFull", "taxonomicStatus", "isSynonym", "classificationPath",
"classificationRanks", "classificationIds", "editDistance", "stemEditDistance",
"cardinalityScore", "infraSpecificRankScore", "fuzzyLessScore",
"curatedDataScore", "authorMatchScore", "acceptedNameScore",
"parsingQualityScore", "dataSourcesNum", "dataSourcesIds")
}

if (parse_names) {
data.frame(df, gni_parse(as.character(df$name)), stringsAsFactors = FALSE)
} else {
df
}
}
}

#' Search for taxonomic names using the Global Names Index
#'
#' THIS FUNCTION IS DEFUNCT.
#'
#' @export
#' @keywords internal
gni_seach <- function(names, ...) {
.Defunct("ncbi_searcher", "traits",
msg = "This function is defunct. See gna_search()")
}
2 changes: 1 addition & 1 deletion R/gni_details.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#' @param ... Curl options passed on to [crul::verb-GET]
#' @author Scott Chamberlain
#' @return Data.frame of results.
#' @seealso [gnr_datasources()], [gni_search()].
#' @seealso [gnr_datasources()], [gna_search()].
#' @keywords globalnamesindex names taxonomy
#' @examples \dontrun{
#' gni_details(id = 17802847)
Expand Down
45 changes: 0 additions & 45 deletions R/gni_parse.R

This file was deleted.

87 changes: 0 additions & 87 deletions R/gni_verifier.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,90 +24,3 @@



#' @title Search for taxonomic names using the Global Names Index
#'
#' @description Uses the Global Names Index, see http://gni.globalnames.org
#'
#' @export
#' @param sci (character) required. Name pattern you want to search
#' for. WARNING: Does not work for common names. Search term may include
#' following options:
#'
#' * `n`: A shortcut that allows to put together several elements (e.g., `n:B. bubo Linn. 1750-1800`)
#' * `g`: a genus name. (e.g. `g:B.`, `g:Bub.`, `g:Bubo`)
#' * `isp`: an infraspecies name (e.g. `sp:bubo`, `sp:gallop.`)
#' * `asp`: either species or infraspecies (all sp) (e.g. `asp:bubo`)
#' * `ds`: data-sources IDs (e.g., `ds:1,2,3`)
#' * `tx`: parent taxon . Uses classification of the first data-source from `ds`. If data-sources are not set, uses Catalogue of Life. (e.g. `tx:Aves`)
#' * `au`: author - Search by author word (g.g. `au:Linnaeus`, `au:Linn.`)
#' * `y`: year - Search by year (e.g. `y:2005`)
#'
#' @param justtotal Return only the total results found.
#' @param parse_names If `TRUE`, use [gni_parse()] to parse
#' @param per_page Number of items per one page (numbers larger
#' than 1000 will be decreased to 1000) (default is 30).
#' @param page Page number you want to see (default is 1).
#' names. Default: `FALSE`
#' @param search_term Deprecated, see `sci`
#' @param ... Curl options passed on to [crul::verb-GET]
#' @author Scott Chamberlain, Zachary Foster
#' @return data.frame of results.
#' @seealso [gnr_datasources()], [gni_search()]
#' @keywords globalnamesindex names taxonomy
#' @references http://gni.globalnames.org/
#' https://apidoc.globalnames.org/gnames
#'
#' @examples \dontrun{
#' gni_search('n:B. bubo ds:1,2 au:Linn. y:1700-')
#' }
gni_search <- function(sci, justtotal = FALSE, parse_names = FALSE,
per_page = NULL, page = NULL, search_term = NULL, ...) {

if (!is.null(search_term)) {
lifecycle::deprecate_warn(when = "v0.9.97", what = "gni_search(search_term)", with = "gni_search(sci)")
sci <- search_term
}
if (!is.null(per_page)) {
lifecycle::deprecate_warn(when = "v0.9.97", what = "gni_search(per_page)")
}
if (!is.null(page)) {
lifecycle::deprecate_warn(when = "v0.9.97", what = "gni_search(page)")
}

query <- tc(list(search_term = sci))
cli <- crul::HttpClient$new('https://verifier.globalnames.org',
headers = tx_ual, opts = list(...))
tt <- cli$get(path = paste0('/api/v1/search/', curl::curl_escape(sci)))
tt$raise_for_status()
out <- jsonlite::fromJSON(tt$parse("UTF-8"), FALSE)

if (justtotal) {
return(out$metadata$namesNumber)
} else {
df <- do.call(rbind, lapply(out$names, function(x) {
all_feilds <- unlist(x)
names(all_feilds) <- vapply(strsplit(names(all_feilds), split = '.', fixed = TRUE), function(x) x[length(x)], FUN.VALUE = character(1))
all_feilds <- all_feilds[! duplicated(names(all_feilds))]
return(as.data.frame(as.list(all_feilds)))
}))
df <- colClasses(df, "character")
df <- tibble::as_tibble(df)
if (NROW(df) != 0) {
names(df) <- c("id", "name", "cardinality", "matchType", "dataSourceId", "dataSourceTitleShort",
"curation", "recordId", "outlink", "entryDate", "sortScore",
"matchedNameID", "matchedName", "matchedCardinality", "currentRecordId",
"currentNameId", "currentName", "currentCardinality", "currentCanonicalSimple",
"currentCanonicalFull", "taxonomicStatus", "isSynonym", "classificationPath",
"classificationRanks", "classificationIds", "editDistance", "stemEditDistance",
"cardinalityScore", "infraSpecificRankScore", "fuzzyLessScore",
"curatedDataScore", "authorMatchScore", "acceptedNameScore",
"parsingQualityScore", "dataSourcesNum", "dataSourcesIds")
}

if (parse_names) {
data.frame(df, gni_parse(as.character(df$name)), stringsAsFactors = FALSE)
} else {
df
}
}
}
2 changes: 1 addition & 1 deletion R/gnr_datasources.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#' @param ... Curl options passed on to [crul::HttpClient]
#' @param todf defunct, always get a data.frame back now
#' @return data.frame/tibble
#' @seealso [gnr_resolve()], [gni_search()]
#' @seealso [gnr_resolve()], [gna_search()]
#' @keywords resolve names taxonomy
#' @references https://resolver.globalnames.org/data_sources
#' @examples \dontrun{
Expand Down
2 changes: 1 addition & 1 deletion R/iucn_getname.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#' }
iucn_getname <- function(name, verbose = TRUE, ...) {
mssg(verbose, "searching Global Names ...")
all_names <- gni_search(sci = name, parse_names = TRUE)
all_names <- gna_search(sci = name, parse_names = TRUE)
if (NROW(all_names) == 0) {
stop("No names found matching ", name, call. = FALSE)
}
Expand Down
1 change: 0 additions & 1 deletion R/taxize-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@
#' @importFrom cli symbol cat_line rule
#' @name taxize-package
#' @aliases taxize
#' @docType package
#' @author Scott Chamberlain
#' @author Eduard Szoecs \email{eduardszoecs@@gmail.com}
#' @author Zachary Foster \email{zacharyfoster1989@@gmail.com}
Expand Down
2 changes: 1 addition & 1 deletion R/taxize_cite.R
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ data_citations <- function(x){
gisd_isinvasive = list(gisd = c_gisd),
gni_details = list(gni = c_gni),
gni_parse = list(gni = c_gni),
gni_search = list(gni = c_gni),
gna_search = list(gni = c_gni),
gnr_datasources = list(global_names = c_gnames),
gnr_resolve = list(global_names = c_gnames),
iplant_resolve = list(iplant = c_iplant),
Expand Down
2 changes: 1 addition & 1 deletion man/gbif_parse.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit de0849d

Please sign in to comment.