Skip to content

Commit

Permalink
Resolve taxonomic names from external APIs to PhyloPic (#97)
Browse files Browse the repository at this point in the history
* Start resolve_phylopic

* Implement taxonomic hierarchy for some APIs

* Implement taxonomic hierarchy for some other APIs

* Get names from APIs; add tests and more docs

* Add function to pkgdown index

* Make error message more useful; add test

* Lint fixes

* Update docs; shorten example

* Apply suggestions from code review

Co-authored-by: Lewis A. Jones <[email protected]>

* Fix some docs; allow shortened api names

* hack for the pbdb

* Fix api abbreviation support

* Fix EOL subspecies issue

* Pull out some shared code

* Pull out some more shared code

* Fix docs

* Test api abbreviations

---------

Co-authored-by: Lewis A. Jones <[email protected]>
  • Loading branch information
willgearty and LewisAJones authored Dec 20, 2023
1 parent ded8df0 commit df53f80
Show file tree
Hide file tree
Showing 10 changed files with 484 additions and 9 deletions.
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ export(get_uuid)
export(phylopic_key_glyph)
export(pick_phylopic)
export(recolor_phylopic)
export(resolve_phylopic)
export(rotate_phylopic)
export(save_phylopic)
importFrom(curl,nslookup)
Expand Down Expand Up @@ -65,6 +66,7 @@ importFrom(grid,nullGrob)
importFrom(grid,rasterGrob)
importFrom(grid,unit)
importFrom(httr,GET)
importFrom(httr,POST)
importFrom(httr,content)
importFrom(jsonlite,fromJSON)
importFrom(knitr,combine_words)
Expand All @@ -75,6 +77,8 @@ importFrom(png,readPNG)
importFrom(rsvg,rsvg_png)
importFrom(rsvg,rsvg_svg)
importFrom(stats,setNames)
importFrom(utils,URLdecode)
importFrom(utils,URLencode)
importFrom(utils,browseURL)
importFrom(utils,menu)
importFrom(utils,packageDescription)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
* updated citation
* added warning when specified size is more than 1000 times smaller than the y-axis range (mostly useful for when making maps with coord_sf) (#86)
* changed the defaults and behavior of the color and fill argument/aesthetics to better maintain backwards compatibility but also prevent unnecessary outlines (#87)
* added resolve_phylopic (#66)
* pick_phylopic now accepts a list of uuids via the uuid argument (#95)

# rphylopic 1.2.2

Expand Down
2 changes: 1 addition & 1 deletion R/get_uuid.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ get_uuid <- function(name = NULL, img = NULL, n = 1, filter = NULL,
if (!is.logical(url)) {
stop("`url` should be of class logical.")
}
# Normalise name -------------------------------------------------------
# Normalize name -------------------------------------------------------
name <- tolower(name)
name <- gsub("_", " ", name)
# API call -------------------------------------------------------------
Expand Down
3 changes: 2 additions & 1 deletion R/pick_phylopic.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ utils::globalVariables(c("x", "y", "uuid", "label"))
#' of available images, all available uuids will be returned. Defaults to 5.
#' Only relevant if `name` supplied.
#' @param uuid \code{character}. A vector (or list) of valid PhyloPic
#' silhouette uuids, such as that returned by [get_uuid()].
#' silhouette uuids, such as that returned by [get_uuid()] or
#' [resolve_phylopic()].
#' @param view \code{numeric}. Number of silhouettes that should be plotted at
#' the same time. Defaults to 1.
#' @param filter \code{character}. Filter uuid(s) by usage license. Use "by"
Expand Down
227 changes: 227 additions & 0 deletions R/resolve_phylopic.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
#' Use a taxonomic name from another database to get a PhyloPic image UUID
#'
#' This function takes a supplied taxonomic name, queries it via the specified
#' external API, resolves the API's returned taxonomic ID to the PhyloPic
#' taxonomic node database, then retrieves PhyloPic image uuids (or urls) for
#' that node.
#'
#' @param name \code{character}. A full or partial taxonomic name to be queried
#' via the specified `api`.
#' @param api \code{character}. The API in which to query `name`. See Details
#' for the available options.
#' @param hierarchy \code{logical}. Whether the taxonomic hierarchy of `name`
#' should be retrieved from the API and used to get several sets of PhyloPic
#' image uuids (or urls).
#' @param max_ranks \code{numeric}. The maximum number of taxonomic ranks that
#' should be included if `hierarchy` is `TRUE`.
#' @inheritParams get_uuid
#'
#' @return A `list` where each value is `n` (or fewer) PhyloPic image uuids (or
#' urls if `url = TRUE`) and each name is the taxonomic name as matched and
#' reported by the specified `api`. If `hierarchy` is `FALSE`, the list has
#' length 1. If `hierarchy` is `TRUE`, the list has maximum length
#' `max_ranks`.
#'
#' @details If `hierarchy` is `FALSE`, the specified `name` is queried via the
#' specified `api`. The matched id is then resolved to the matching taxonomic
#' node in the PhyloPic database. If `hierarchy` is `TRUE`, the full taxonomic
#' hierarchy for `name` is retrieved from the specified `api`, those taxonomic
#' names are subset to `max_ranks` ranks (starting from the specified `name`
#' and ascending the hierarchy). Then each of those names is resolved to the
#' matching taxonomic node in the PhyloPic database (where possible). In
#' either case, [get_uuid()] is then used to retrieve `n` image UUID(s) for
#' each taxonomic name.
#'
#' Note that while the names of the returned list are the taxonomic names as
#' reported by the specified `api`, the PhyloPic images that are returned are
#' associated with whatever taxonomic node that taxonomic name resolves to in
#' the PhyloPic database. This means that, if `hierarchy` is `TRUE`, the same
#' images may be returned for multiple taxonomic names. Also, if a particular
#' taxonomic name does not resolve to any node in the PhyloPic database, no
#' images will be returned for that name.
#'
#' The following APIs are available for querying (`api`):
#' \itemize{
#' \item{"eol.org": the \href{https://eol.org/}{Encyclopedia of Life}}
#' (note: `hierarchy = TRUE` is not currently available for this API) ("eol"
#' is also allowed)
#' \item{"gbif.org": the \href{https://www.gbif.org/}{Global Biodiversity
#' Information Facility}} ("gbif" is also allowed)
#' \item{"marinespecies.org": the \href{https://marinespecies.org/}{World
#' Registor of Marine Species}} ("worms" is also allowed)
#' \item{"opentreeoflife.org": the \href{https://tree.opentreeoflife.org/}{
#' Open Tree of Life}} ("otol" is also allowed)
#' \item{"paleobiodb.org": the \href{https://paleobiodb.org/#/}{Paleobiology
#' Database}} ("pbdb" is also allowed)
#' }
#'
#' @importFrom httr POST
#' @importFrom utils URLencode URLdecode
#' @importFrom stats setNames
#' @export
#' @examples \dontrun{
#' # get a uuid for a single name
#' resolve_phylopic(name = "Canis lupus")
#' # get uuids for the taxonomic hierarchy
#' resolve_phylopic(name = "Velociraptor mongoliensis", api = "paleobiodb.org",
#' hierarchy = TRUE, max_ranks = 3)
#' }
resolve_phylopic <- function(name, api = "gbif.org", hierarchy = FALSE,
max_ranks = 5, n = 1, filter = NULL, url = FALSE) {
url_arg <- url
# replace api abbreviations
abbrvs <- setNames(c("eol.org", "gbif.org", "marinespecies.org",
"opentreeoflife.org", "paleobiodb.org"),
c("eol", "gbif", "worms", "otol", "pbdb"))
if (api %in% names(abbrvs)) api <- abbrvs[[api]]
# Check arguments ------------------------------------------------------
api <- match.arg(api, unname(abbrvs))
if (!is.character(name)) {
stop("`name` should be of class character.")
}
if (!is.character(api)) {
stop("`api` should be of class character.")
}
if (!is.logical(hierarchy)) {
stop("`hierarchy` should be of class logical.")
}
if (!is.numeric(max_ranks)) {
stop("`max_ranks` should be of class numeric.")
}
# Normalize name -------------------------------------------------------
name <- tolower(name)
name <- gsub("_", " ", name)
name_encode <- URLencode(name)
# Query specified API for the name -------------------------------------
if (api == "eol.org") {
check_url("https://eol.org/api/search/1.0.json")
namespace <- "pages"
url <- paste0("https://eol.org/api/search/1.0.json?page=1&q=", name_encode)
jsn <- json_GET(url)
# EOL appears to return lots of subspecies, so check if any match `name`
# first, otherwise, return the first result
if (jsn$totalResults == 0) stop("No results returned from the API.")
matches <- which(tolower(jsn$results$title) == name)
ind <- ifelse(any(matches), matches[1], 1)
ids <- jsn$results$id[ind]
name_vec <- jsn$results$title[ind]
if (hierarchy) {
warning("`hierarchy = TRUE` is not currently available for eol.org.")
hierarchy <- FALSE
}
} else if (api == "gbif.org") {
check_url("https://api.gbif.org/v1/")
namespace <- "species"
url <- paste0("https://api.gbif.org/v1/species/suggest?",
"limit=1&q=", name_encode)
jsn <- json_GET(url)
if (length(jsn) == 0) stop("No results returned from the API.")
ids <- jsn$key
name_vec <- jsn$canonicalName
if (hierarchy) {
url <- paste0("https://api.gbif.org/v1/species/match?verbose=true&",
"name=", URLencode(jsn$canonicalName[1]))
jsn <- json_GET(url)
ids <- c(jsn$speciesKey[1], jsn$genusKey[1], jsn$familyKey[1],
jsn$orderKey[1], jsn$classKey[1], jsn$phylumKey[1],
jsn$kingdomKey[1])
name_vec <- c(jsn$species[1], jsn$genus[1], jsn$family[1],
jsn$order[1], jsn$class[1], jsn$phylum[1],
jsn$kingdom[1])
}
} else if (api == "marinespecies.org") {
check_url("https://www.marinespecies.org/rest/")
namespace <- "taxname"
url <- paste0("https://www.marinespecies.org/rest/",
"AphiaRecordsByMatchNames?marine_only=false&",
"scientificnames%5B%5D=", name_encode)
jsn <- json_GET(url)
ids <- jsn[[1]]$AphiaID[1]
name_vec <- jsn[[1]]$scientificname[1]
if (hierarchy) {
url <- paste0("https://www.marinespecies.org/rest/",
"AphiaClassificationByAphiaID/", ids)
jsn <- json_GET(url)
lst_sub <- jsn
ids <- character()
name_vec <- character()
while ("child" %in% names(lst_sub)) {
ids <- c(ids, lst_sub$AphiaID)
name_vec <- c(name_vec, lst_sub$scientificname)
lst_sub <- lst_sub$child
}
ids <- rev(ids)
name_vec <- rev(name_vec)
}
} else if (api == "paleobiodb.org") {
check_url("https://paleobiodb.org/data1.2/")
namespace <- "txn"
url <- paste0("https://paleobiodb.org/data1.2/taxa/auto.json?",
"limit=10&name=", name_encode)
jsn <- json_GET(url)
if ("errors" %in% jsn || length(jsn$records) == 0)
stop("No results returned from the API.")
# sometimes returns higher taxonomic ranks first even when there is a
# perfect match, so check if any match `name`, otherwise, return the first
# result
matches <- which(tolower(jsn$records$nam) == name)
ind <- ifelse(any(matches), matches[1], 1)
ids <- jsn$records$oid[ind]
name_vec <- jsn$records$nam[ind]
if (hierarchy) {
url <- paste0("https://paleobiodb.org/data1.2/taxa/list.json?",
"rel=all_parents&", "id=txn:", ids)
jsn <- json_GET(url)
ids <- rev(gsub("txn:", "", jsn$records$oid))
name_vec <- rev(jsn$records$nam)
}
} else if (api == "opentreeoflife.org") {
check_url("https://api.opentreeoflife.org/")
namespace <- "taxonomy"
url <- "https://api.opentreeoflife.org/v3/tnrs/autocomplete_name"
res <- POST(url = url, encode = "json", body = list("name" = name))
jsn <- response_to_JSON(res)
if (length(jsn) == 0) stop("No results returned from the API.")
ids <- jsn$ott_id[1]
name_vec <- jsn$unique_name[1]
if (hierarchy) {
url <- "https://api.opentreeoflife.org/v3/taxonomy/taxon_info"
res <- POST(url = url, encode = "json",
body = list("include_lineage" = TRUE, "ott_id" = ids))
jsn <- response_to_JSON(res)
ids <- c(ids, jsn$lineage$ott_id)
name_vec <- c(name_vec, jsn$lineage$unique_name)
}
}
# subset ids if more than max_ranks
ids <- ids[seq_len(min(length(ids), max_ranks))]
# Resolve to PhyloPic and get images -----------------------------------
lst <- list()
for (i in seq_along(ids)) {
api_return <- phy_GET(path = paste0("resolve/", api, "/", namespace, "/",
ids[i]))
# catch any errors here
if ("errors" %in% names(api_return)) {
lst[[name_vec[i]]] <- character()
} else {
tax <- api_return$names[[1]]$text[1]
lst[[name_vec[i]]] <- get_uuid(tax, n = n, filter = filter, url = url_arg)
}
}
return(lst)
}

# check that a particular URL (e.g., for an API) is online
check_url <- function(url) {
headers <- curlGetHeaders(url)
if (attr(headers, "status") != 200) {
stop("API is not available or you have no internet connection.")
}
}

#' @importFrom httr GET
json_GET <- function(url) {
res <- GET(url = url)
if (length(res$content) == 0) stop("No results returned from the API.")
response_to_JSON(res)
}
16 changes: 10 additions & 6 deletions R/zzz.r
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@ as_null <- function(x) if (length(x) == 0) NULL else x

pbase <- function() "https://api.phylopic.org"

#' @importFrom jsonlite fromJSON
#' @importFrom httr GET content
#' @importFrom httr GET
#' @importFrom curl nslookup
phy_GET <- function(path, query = list(), ...) {
# Check PhyloPic (or user) is online
Expand All @@ -26,13 +25,18 @@ phy_GET <- function(path, query = list(), ...) {
})
query <- as_null(pc(query))
tt <- GET(url = pbase(), path = path, query = query)
tmp <- content(tt, as = "text", encoding = "UTF-8")
jsn <- fromJSON(tmp)
jsn <- response_to_JSON(tt)
if (tt$status == 400) { # need to supply the build argument
query[["build"]] <- jsn$build
tt <- GET(url = pbase(), path = path, query = query)
tmp <- content(tt, as = "text", encoding = "UTF-8")
jsn <- fromJSON(tmp)
jsn <- response_to_JSON(tt)
}
jsn
}

#' @importFrom httr content
#' @importFrom jsonlite fromJSON
response_to_JSON <- function(response) {
tmp <- content(response, as = "text", encoding = "UTF-8")
return(fromJSON(tmp))
}
1 change: 1 addition & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ reference:
Functions for identifying and retrieving PhyloPic silhouettes.
contents:
- get_uuid
- resolve_phylopic
- pick_phylopic
- get_phylopic
- plot_phylopic
Expand Down
3 changes: 2 additions & 1 deletion man/pick_phylopic.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit df53f80

Please sign in to comment.