palaeoverse · willgearty · Dec 20, 2023 · Dec 13, 2023 · Dec 14, 2023 · Dec 15, 2023
diff --git a/NAMESPACE b/NAMESPACE
@@ -21,6 +21,7 @@ export(get_uuid)
 export(phylopic_key_glyph)
 export(pick_phylopic)
 export(recolor_phylopic)
+export(resolve_phylopic)
 export(rotate_phylopic)
 export(save_phylopic)
 importFrom(curl,nslookup)
@@ -65,6 +66,7 @@ importFrom(grid,nullGrob)
 importFrom(grid,rasterGrob)
 importFrom(grid,unit)
 importFrom(httr,GET)
+importFrom(httr,POST)
 importFrom(httr,content)
 importFrom(jsonlite,fromJSON)
 importFrom(knitr,combine_words)
@@ -75,6 +77,8 @@ importFrom(png,readPNG)
 importFrom(rsvg,rsvg_png)
 importFrom(rsvg,rsvg_svg)
 importFrom(stats,setNames)
+importFrom(utils,URLdecode)
+importFrom(utils,URLencode)
 importFrom(utils,browseURL)
 importFrom(utils,menu)
 importFrom(utils,packageDescription)

diff --git a/NEWS.md b/NEWS.md
@@ -3,6 +3,8 @@
 * updated citation
 * added warning when specified size is more than 1000 times smaller than the y-axis range (mostly useful for when making maps with coord_sf) (#86)
 * changed the defaults and behavior of the color and fill argument/aesthetics to better maintain backwards compatibility but also prevent unnecessary outlines (#87)
+* added resolve_phylopic (#66)
+* pick_phylopic now accepts a list of uuids via the uuid argument (#95)
 
 # rphylopic 1.2.2
 

diff --git a/R/get_uuid.R b/R/get_uuid.R
@@ -67,7 +67,7 @@ get_uuid <- function(name = NULL, img = NULL, n = 1, filter = NULL,
   if (!is.logical(url)) {
     stop("`url` should be of class logical.")
   }
-  # Normalise name -------------------------------------------------------
+  # Normalize name -------------------------------------------------------
   name <- tolower(name)
   name <- gsub("_", " ", name)
   # API call -------------------------------------------------------------

diff --git a/R/pick_phylopic.R b/R/pick_phylopic.R
@@ -16,7 +16,8 @@ utils::globalVariables(c("x", "y", "uuid", "label"))
 #'   of available images, all available uuids will be returned. Defaults to 5.
 #'   Only relevant if `name` supplied.
 #' @param uuid \code{character}. A vector (or list) of valid PhyloPic 
-#'   silhouette uuids, such as that returned by [get_uuid()].
+#'   silhouette uuids, such as that returned by [get_uuid()] or
+#'   [resolve_phylopic()].
 #' @param view \code{numeric}. Number of silhouettes that should be plotted at
 #'   the same time. Defaults to 1.
 #' @param filter \code{character}. Filter uuid(s) by usage license. Use "by"

diff --git a/R/resolve_phylopic.R b/R/resolve_phylopic.R
@@ -0,0 +1,227 @@
+#' Use a taxonomic name from another database to get a PhyloPic image UUID
+#'
+#' This function takes a supplied taxonomic name, queries it via the specified
+#' external API, resolves the API's returned taxonomic ID to the PhyloPic
+#' taxonomic node database, then retrieves PhyloPic image uuids (or urls) for
+#' that node.
+#'
+#' @param name \code{character}. A full or partial taxonomic name to be queried
+#'   via the specified `api`.
+#' @param api \code{character}. The API in which to query `name`. See Details
+#'   for the available options.
+#' @param hierarchy \code{logical}. Whether the taxonomic hierarchy of `name`
+#'   should be retrieved from the API and used to get several sets of PhyloPic
+#'   image uuids (or urls).
+#' @param max_ranks \code{numeric}. The maximum number of taxonomic ranks that
+#'   should be included if `hierarchy` is `TRUE`.
+#' @inheritParams get_uuid
+#'
+#' @return A `list` where each value is `n` (or fewer) PhyloPic image uuids (or
+#'   urls if `url = TRUE`) and each name is the taxonomic name as matched and
+#'   reported by the specified `api`. If `hierarchy` is `FALSE`, the list has
+#'   length 1. If `hierarchy` is `TRUE`, the list has maximum length
+#'   `max_ranks`.
+#'
+#' @details If `hierarchy` is `FALSE`, the specified `name` is queried via the
+#'   specified `api`. The matched id is then resolved to the matching taxonomic
+#'   node in the PhyloPic database. If `hierarchy` is `TRUE`, the full taxonomic
+#'   hierarchy for `name` is retrieved from the specified `api`, those taxonomic
+#'   names are subset to `max_ranks` ranks (starting from the specified `name`
+#'   and ascending the hierarchy). Then each of those names is resolved to the
+#'   matching taxonomic node in the PhyloPic database (where possible). In
+#'   either case, [get_uuid()] is then used to retrieve `n` image UUID(s) for
+#'   each taxonomic name.
+#'
+#'   Note that while the names of the returned list are the taxonomic names as
+#'   reported by the specified `api`, the PhyloPic images that are returned are
+#'   associated with whatever taxonomic node that taxonomic name resolves to in
+#'   the PhyloPic database. This means that, if `hierarchy` is `TRUE`, the same
+#'   images may be returned for multiple taxonomic names. Also, if a particular
+#'   taxonomic name does not resolve to any node in the PhyloPic database, no
+#'   images will be returned for that name.
+#'
+#'   The following APIs are available for querying (`api`):
+#'   \itemize{
+#'     \item{"eol.org": the \href{https://eol.org/}{Encyclopedia of Life}}
+#'     (note: `hierarchy = TRUE` is not currently available for this API) ("eol"
+#'     is also allowed)
+#'     \item{"gbif.org": the \href{https://www.gbif.org/}{Global Biodiversity
+#'     Information Facility}} ("gbif" is also allowed)
+#'     \item{"marinespecies.org": the \href{https://marinespecies.org/}{World
+#'     Registor of Marine Species}} ("worms" is also allowed)
+#'     \item{"opentreeoflife.org": the \href{https://tree.opentreeoflife.org/}{
+#'     Open Tree of Life}} ("otol" is also allowed)
+#'     \item{"paleobiodb.org": the \href{https://paleobiodb.org/#/}{Paleobiology
+#'     Database}} ("pbdb" is also allowed)
+#'   }
+#'
+#' @importFrom httr POST
+#' @importFrom utils URLencode URLdecode
+#' @importFrom stats setNames
+#' @export
+#' @examples \dontrun{
+#' # get a uuid for a single name
+#' resolve_phylopic(name = "Canis lupus")
+#' # get uuids for the taxonomic hierarchy
+#' resolve_phylopic(name = "Velociraptor mongoliensis", api = "paleobiodb.org",
+#'                  hierarchy = TRUE, max_ranks = 3)
+#' }
+resolve_phylopic <- function(name, api = "gbif.org", hierarchy = FALSE,
+                             max_ranks = 5, n = 1, filter = NULL, url = FALSE) {
+  url_arg <- url
+  # replace api abbreviations
+  abbrvs <- setNames(c("eol.org", "gbif.org", "marinespecies.org",
+                       "opentreeoflife.org", "paleobiodb.org"),
+                     c("eol", "gbif", "worms", "otol", "pbdb"))
+  if (api %in% names(abbrvs)) api <- abbrvs[[api]]
+  # Check arguments ------------------------------------------------------
+  api <- match.arg(api, unname(abbrvs))
+  if (!is.character(name)) {
+    stop("`name` should be of class character.")
+  }
+  if (!is.character(api)) {
+    stop("`api` should be of class character.")
+  }
+  if (!is.logical(hierarchy)) {
+    stop("`hierarchy` should be of class logical.")
+  }
+  if (!is.numeric(max_ranks)) {
+    stop("`max_ranks` should be of class numeric.")
+  }
+  # Normalize name -------------------------------------------------------
+  name <- tolower(name)
+  name <- gsub("_", " ", name)
+  name_encode <- URLencode(name)
+  # Query specified API for the name -------------------------------------
+  if (api == "eol.org") {
+    check_url("https://eol.org/api/search/1.0.json")
+    namespace <- "pages"
+    url <- paste0("https://eol.org/api/search/1.0.json?page=1&q=", name_encode)
+    jsn <- json_GET(url)
+    # EOL appears to return lots of subspecies, so check if any match `name`
+    # first, otherwise, return the first result
+    if (jsn$totalResults == 0) stop("No results returned from the API.")
+    matches <- which(tolower(jsn$results$title) == name)
+    ind <- ifelse(any(matches), matches[1], 1)
+    ids <- jsn$results$id[ind]
+    name_vec <- jsn$results$title[ind]
+    if (hierarchy) {
+      warning("`hierarchy = TRUE` is not currently available for eol.org.")
+      hierarchy <- FALSE
+    }
+  } else if (api == "gbif.org") {
+    check_url("https://api.gbif.org/v1/")
+    namespace <- "species"
+    url <- paste0("https://api.gbif.org/v1/species/suggest?",
+                  "limit=1&q=", name_encode)
+    jsn <- json_GET(url)
+    if (length(jsn) == 0) stop("No results returned from the API.")
+    ids <- jsn$key
+    name_vec <- jsn$canonicalName
+    if (hierarchy) {
+      url <- paste0("https://api.gbif.org/v1/species/match?verbose=true&",
+                    "name=", URLencode(jsn$canonicalName[1]))
+      jsn <- json_GET(url)
+      ids <- c(jsn$speciesKey[1], jsn$genusKey[1], jsn$familyKey[1],
+               jsn$orderKey[1], jsn$classKey[1], jsn$phylumKey[1],
+               jsn$kingdomKey[1])
+      name_vec <- c(jsn$species[1], jsn$genus[1], jsn$family[1],
+                    jsn$order[1], jsn$class[1], jsn$phylum[1],
+                    jsn$kingdom[1])
+    }
+  } else if (api == "marinespecies.org") {
+    check_url("https://www.marinespecies.org/rest/")
+    namespace <- "taxname"
+    url <- paste0("https://www.marinespecies.org/rest/",
+                  "AphiaRecordsByMatchNames?marine_only=false&",
+                  "scientificnames%5B%5D=", name_encode)
+    jsn <- json_GET(url)
+    ids <- jsn[[1]]$AphiaID[1]
+    name_vec <- jsn[[1]]$scientificname[1]
+    if (hierarchy) {
+      url <- paste0("https://www.marinespecies.org/rest/",
+                    "AphiaClassificationByAphiaID/", ids)
+      jsn <- json_GET(url)
+      lst_sub <- jsn
+      ids <- character()
+      name_vec <- character()
+      while ("child" %in% names(lst_sub)) {
+        ids <- c(ids, lst_sub$AphiaID)
+        name_vec <- c(name_vec, lst_sub$scientificname)
+        lst_sub <- lst_sub$child
+      }
+      ids <- rev(ids)
+      name_vec <- rev(name_vec)
+    }
+  } else if (api == "paleobiodb.org") {
+    check_url("https://paleobiodb.org/data1.2/")
+    namespace <- "txn"
+    url <- paste0("https://paleobiodb.org/data1.2/taxa/auto.json?",
+                  "limit=10&name=", name_encode)
+    jsn <- json_GET(url)
+    if ("errors" %in% jsn || length(jsn$records) == 0)
+      stop("No results returned from the API.")
+    # sometimes returns higher taxonomic ranks first even when there is a
+    # perfect match, so check if any match `name`, otherwise, return the first
+    # result
+    matches <- which(tolower(jsn$records$nam) == name)
+    ind <- ifelse(any(matches), matches[1], 1)
+    ids <- jsn$records$oid[ind]
+    name_vec <- jsn$records$nam[ind]
+    if (hierarchy) {
+      url <- paste0("https://paleobiodb.org/data1.2/taxa/list.json?",
+                    "rel=all_parents&", "id=txn:", ids)
+      jsn <- json_GET(url)
+      ids <- rev(gsub("txn:", "", jsn$records$oid))
+      name_vec <- rev(jsn$records$nam)
+    }
+  } else if (api == "opentreeoflife.org") {
+    check_url("https://api.opentreeoflife.org/")
+    namespace <- "taxonomy"
+    url <- "https://api.opentreeoflife.org/v3/tnrs/autocomplete_name"
+    res <- POST(url = url, encode = "json", body = list("name" = name))
+    jsn <- response_to_JSON(res)
+    if (length(jsn) == 0) stop("No results returned from the API.")
+    ids <- jsn$ott_id[1]
+    name_vec <- jsn$unique_name[1]
+    if (hierarchy) {
+      url <- "https://api.opentreeoflife.org/v3/taxonomy/taxon_info"
+      res <- POST(url = url, encode = "json",
+                  body = list("include_lineage" = TRUE, "ott_id" = ids))
+      jsn <- response_to_JSON(res)
+      ids <- c(ids, jsn$lineage$ott_id)
+      name_vec <- c(name_vec, jsn$lineage$unique_name)
+    }
+  }
+  # subset ids if more than max_ranks
+  ids <- ids[seq_len(min(length(ids), max_ranks))]
+  # Resolve to PhyloPic and get images -----------------------------------
+  lst <- list()
+  for (i in seq_along(ids)) {
+    api_return <- phy_GET(path = paste0("resolve/", api, "/", namespace, "/",
+                                        ids[i]))
+    # catch any errors here
+    if ("errors" %in% names(api_return)) {
+      lst[[name_vec[i]]] <- character()
+    } else {
+      tax <- api_return$names[[1]]$text[1]
+      lst[[name_vec[i]]] <- get_uuid(tax, n = n, filter = filter, url = url_arg)
+    }
+  }
+  return(lst)
+}
+
+# check that a particular URL (e.g., for an API) is online
+check_url <- function(url) {
+  headers <- curlGetHeaders(url)
+  if (attr(headers, "status") != 200) {
+    stop("API is not available or you have no internet connection.")
+  }
+}
+
+#' @importFrom httr GET
+json_GET <- function(url) {
+  res <- GET(url = url)
+  if (length(res$content) == 0) stop("No results returned from the API.")
+  response_to_JSON(res)
+}
diff --git a/R/zzz.r b/R/zzz.r
@@ -13,8 +13,7 @@ as_null <- function(x) if (length(x) == 0) NULL else x
 
 pbase <- function() "https://api.phylopic.org"
 
-#' @importFrom jsonlite fromJSON
-#' @importFrom httr GET content
+#' @importFrom httr GET
 #' @importFrom curl nslookup
 phy_GET <- function(path, query = list(), ...) {
   # Check PhyloPic (or user) is online
@@ -26,13 +25,18 @@ phy_GET <- function(path, query = list(), ...) {
   })
   query <- as_null(pc(query))
   tt <- GET(url = pbase(), path = path, query = query)
-  tmp <- content(tt, as = "text", encoding = "UTF-8")
-  jsn <- fromJSON(tmp)
+  jsn <- response_to_JSON(tt)
   if (tt$status == 400) { # need to supply the build argument
     query[["build"]] <- jsn$build
     tt <- GET(url = pbase(), path = path, query = query)
-    tmp <- content(tt, as = "text", encoding = "UTF-8")
-    jsn <- fromJSON(tmp)
+    jsn <- response_to_JSON(tt)
   }
   jsn
 }
+
+#' @importFrom httr content
+#' @importFrom jsonlite fromJSON
+response_to_JSON <- function(response) {
+  tmp <- content(response, as = "text", encoding = "UTF-8")
+  return(fromJSON(tmp))
+}
diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -24,6 +24,7 @@ reference:
     Functions for identifying and retrieving PhyloPic silhouettes.
   contents:
   - get_uuid
+  - resolve_phylopic
   - pick_phylopic
   - get_phylopic
   - plot_phylopic

diff --git a/man/pick_phylopic.Rd b/man/pick_phylopic.Rd