Skip to content

Commit

Permalink
Add intchron_get_doi() and intchron_get_labcode()
Browse files Browse the repository at this point in the history
Not functional. See #11
  • Loading branch information
joeroe committed Jan 20, 2021
1 parent 30ae710 commit 1c37f71
Show file tree
Hide file tree
Showing 11 changed files with 169 additions and 12 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ Imports:
httr,
tibble,
dplyr,
readr
readr,
magrittr
Suggests:
knitr,
roxygen2,
Expand Down
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
# Generated by roxygen2: do not edit by hand

export("%>%")
export(intchron)
export(intchron_countries)
export(intchron_crawl)
export(intchron_extract)
export(intchron_get_doi)
export(intchron_get_labcode)
export(intchron_hosts)
export(intchron_request)
export(intchron_tabulate)
Expand All @@ -12,3 +15,4 @@ export(read_intchron_csv)
export(read_intchron_delim)
export(read_intchron_tsv)
export(read_intchron_txt)
importFrom(magrittr,"%>%")
46 changes: 45 additions & 1 deletion R/intchron.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# High-level interface to IntChron


# Main interface ----------------------------------------------------------

#' Query IntChron
#'
#' IntChron is an indexing service for chronological data from multiple sources.
Expand All @@ -10,7 +13,7 @@
#' @param countries (Optional) Vector of countries to be retrieved.
#' @param sites (Optional) Vector of sites to be retrieved.
#' @param tabulate If `TRUE` (the default), the data retrieved will be combined
#' into a data frame. Set `FALSE` to get the raw data from IntChron.
#' into a data frame. Set `FALSE` to get the raw response from IntChron.
#'
#' @details
#' At least `hosts` must be specified. Use `intchron_hosts()` to get a list of
Expand Down Expand Up @@ -68,6 +71,47 @@ intchron <- function(hosts,
}


# Auxiliary interfaces ----------------------------------------------------

#' Retrieve individual records from IntChron
#'
#' These functions retrieve individual records from IntChron by their primary
#' keys. Currently the two types of primary key supported by IntChron are
#' lab codes (for radiocarbon dates) and DOIs (for publications).
#'
#' @param lab_code For radiocarbon dates, a vector of lab codes to be retrieved.
#' @param doi For publications, a vector of DOIs to be retrieved.
#' @param tabulate If `TRUE` (the default), the data retrieved will be combined
#' into a data frame. Set `FALSE` to get the raw responses from IntChron.
#'
#' @return
#' A `tibble`, or if `tabulate = FALSE`, a list, of IntChron responses.
#'
#' @family functions for querying IntChron
#'
#' @export
#'
#' @examples
#' intchron_get_labcode("OxA-18955")
intchron_get_labcode <- function(lab_code, tabulate = TRUE) {
url <- intchron_url("labcode", lab_code)
response <- intchron_request(url)

if (!tabulate) {
return(response)
}
else {
return(intchron_tabulate(response, series = TRUE))
}
}

#' @rdname intchron_get_labcode
#' @export
intchron_get_doi <- function(doi, tabulate = TRUE) {
warning("intchron_get_doi() not yet implemented!")
return(NA)
}

# Helper functions --------------------------------------------------------

#' List available databases and countries on IntChron
Expand Down
45 changes: 39 additions & 6 deletions R/response.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,26 +7,37 @@
#' access to specific named elements of those objects.
#'
#' @param x List of IntChron records from [intchron_request()].
#' @param what Element to extract: 'data_url', 'records', or 'file_data'.
#' @param what Element to extract: `"data_url"`, `"records"`, `"file_data"`, or
#' `"series_file_data"`
#'
#' @return
#' `what` from `x`.
#'
#' @family functions for interacting with the IntChron API
#'
#' @references
#' * IntChron Data Schema <http://intchron.org/schema>
#'
#' @export
#'
#' @examples
#' khiv <- intchron_request(intchron_url("record", c("oxa", "nrcf"), "Jordan", "Kharaneh IV"))
#' intchron_extract(khiv, "data_url")
intchron_extract <- function(x, what) {
switch(what,
y <- switch(what,
data_url = purrr::map(x, "data_url"),
records = purrr::map(x, "records"),
file_data = purrr::map(x, list("records", "file_data")),
series_file_data = purrr::map(x, list("project_series_list", "file_data")),
series_list = purrr::map(x, list("records", "file_data", "series_list")),
stop("No method for extracting '", what, "' from an IntChron response.")
stop('No method for extracting "', what, '" from an IntChron response.')
)

if (any(purrr::map_lgl(y, is.null))) {
stop('"', what, '" not found in IntChron response `x`')
}

return(y)
}

#' Extract a data frame from IntChron responses
Expand All @@ -35,21 +46,31 @@ intchron_extract <- function(x, what) {
#' series and associated metadata as a data frame.
#'
#' @param x A list of IntChron responses.
#' @param series Logical. Are records stored in a "project_series_list" instead
#' of an array of records? Default: `FALSE`.
#'
#' @return
#' A `tibble` combining the data from all responses. Bibliographic references
#' are returned as a list column containing a vector of citation keys (ref: or
#' are returned as a list-column containing a vector of citation keys (ref: or
#' doi:) for each record.
#'
#' @family functions for interacting with the IntChron API
#'
#' @references
#' * IntChron Data Schema <http://intchron.org/schema>
#'
#' @export
#'
#' @examples
#' khiv <- intchron_request(intchron_url("record", c("oxa", "nrcf"), "Jordan", "Kharaneh IV"))
#' intchron_tabulate(khiv)
intchron_tabulate <- function(x) {
data <- intchron_extract(x, "file_data")
intchron_tabulate <- function(x, series = FALSE) {
if (series) {
data <- intchron_extract(x, "series_file_data")
}
else {
data <- intchron_extract(x, "file_data")
}
data <- purrr::map_dfr(data,
~intchron_tabulate_series(.x$series_list, .x$header))
return(data)
Expand All @@ -63,6 +84,7 @@ intchron_tabulate <- function(x) {
#' @return
#' A tibble.
#'
#' @keywords internal
#' @noRd
intchron_tabulate_series <- function(series_list, header) {
# Pull out series data as a data frame
Expand All @@ -81,6 +103,17 @@ intchron_tabulate_series <- function(series_list, header) {
refs <- paste0(refs)
data <- cbind(data, refs = refs)

# Make NAs explicit
data <- tibble::as_tibble(data)
data <- dplyr::mutate(
data,
dplyr::across(
dplyr::everything(),
~ .x %>%
dplyr::na_if("") %>%
dplyr::na_if("-")
)
)

return(data)
}
11 changes: 11 additions & 0 deletions R/utils-pipe.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#' Pipe operator
#'
#' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
#'
#' @name %>%
#' @rdname pipe
#' @keywords internal
#' @export
#' @importFrom magrittr %>%
#' @usage lhs \%>\% rhs
NULL
3 changes: 2 additions & 1 deletion man/intchron.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 7 additions & 1 deletion man/intchron_extract.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

36 changes: 36 additions & 0 deletions man/intchron_get_labcode.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/intchron_hosts.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 10 additions & 2 deletions man/intchron_tabulate.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions man/pipe.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 1c37f71

Please sign in to comment.