diff --git a/NAMESPACE b/NAMESPACE index 33e0412d..a7d40d3d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,6 +14,7 @@ export(filter_media) export(filter_observations) export(locations) export(media) +export(merge_camtrapdp) export(observations) export(read_camtrapdp) export(round_coordinates) @@ -23,6 +24,7 @@ export(version) export(write_camtrapdp) export(write_dwc) export(write_eml) +import(rlang) importFrom(dplyr,"%>%") importFrom(dplyr,.data) importFrom(memoise,memoise) diff --git a/NEWS.md b/NEWS.md index 4960dc5a..5afc36f6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,7 @@ # camtrapdp (development version) * New function `write_camtrapdp()` writes a Camera Trap Data Package to disk as a `datapackage.json` and CSV files (#137). +* New function `merge_camtrapdp()` allows to merge two datasets (#112). * New function `write_eml()` transforms Camtrap DP metadata to EML (#99). * New function `round_coordinates()` allows to fuzzy/generalize location information by rounding deployment `latitude` and `longitude`. It also updates `coordinateUncertainty` in the deployments and `coordinatePrecision` and spatial scope in the metadata (#106). * New function `shift_time()` allows to shift/correct date-times in data and metadata for specified deploymentIDs and duration (#108). diff --git a/R/camtrapdp-package.R b/R/camtrapdp-package.R index e282e3ee..8555b226 100644 --- a/R/camtrapdp-package.R +++ b/R/camtrapdp-package.R @@ -3,5 +3,6 @@ ## usethis namespace: start #' @importFrom dplyr %>% .data +#' @import rlang ## usethis namespace: end NULL diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R new file mode 100644 index 00000000..026d8959 --- /dev/null +++ b/R/merge_camtrapdp.R @@ -0,0 +1,165 @@ +#' Merge two Camera Trap Data Packages +#' +#' Merges two Camera Trap Data Package objects into one. +#' +#' @param x,y Camera Trap Data Package objects, as returned by +#' [read_camtrapdp()]. +#' @return A single Camera Trap Data Package object that is the combination of +#' `x` and `y`. +#' @family transformation functions +#' @export +#' @section Transformation details: +#' +#' Both `x` and `y` must have a unique dataset name `x$name` and `y$name`. +#' This name is used to prefix identifiers in the data that occur in both +#' datasets. +#' For example: +#' - `x` contains `deploymentID`s `c("a", "b")`. +#' - `y` contains `deploymentID`s `c("b", "c")`. +#' - Then merged `xy` will contain `deploymentID`s `c("a", "x_b", "y_b", "c")`. +#' +#' Data are merged as follows: +#' - Deployments are combined, with `deploymentID` kept unique. +#' - Media are combined, with `mediaID`, `deploymentID` and `eventID` kept +#' unique. +#' - Observations are combined, with `observationID`, `deploymentID`, `mediaID` +#' and `eventID` kept unique. +#' - Additional resources are retained, with the resource name kept unique. +#' +#' Metadata properties are merged as follows: +#' - **name**: Removed. +#' - **id**: Removed. +#' - **created**: Set to current timestamp. +#' - **title**: Removed. +#' - **contributors**: Combined, with duplicates removed. +#' - **description**: Combined as two paragraphs. +#' - **version**: Set to `1.0`. +#' - **keywords**: Combined, with duplicates removed. +#' - **image**: Removed. +#' - **homepage**: Removed. +#' - **sources**: Combined, with duplicates removed. +#' - **licenses**: Combined, with duplicates removed. +#' - **bibliographicCitation**: Removed. +#' - **project$id**: Removed. +#' - **project$title**: Combined. +#' - **project$acronym**: Removed. +#' - **project$description**: Combined as two paragraphs. +#' - **project$path**: Removed. +#' - **project$samplingDesign**: Sampling design of `x`. +#' - **project$captureMethod**: Combined, with duplicates removed. +#' - **project$individuals**: `TRUE` if one of the datasets has `TRUE`. +#' - **project$observationLevel**: Combined, with duplicates removed. +#' - **coordinatePrecision**: Set to the least precise `coordinatePrecision`. +#' - **spatial**: Reset based on the new deployments. +#' - **temporal**: Reset based on the new deployments. +#' - **taxonomic**: Combined, with duplicates removed. +#' - **relatedIdentifiers**: Combined, with duplicates removed. +#' - **references**: Combined, with duplicates removed. +#' - Custom properties of `x` are also retained. +#' @examples +#' x <- example_dataset() %>% +#' filter_deployments(deploymentID %in% c("00a2c20d", "29b7d356")) +#' y <- example_dataset() %>% +#' filter_deployments(deploymentID %in% c("577b543a", "62c200a9")) +#' x$name <- "x" +#' y$name <- "y" +#' merge_camtrapdp(x, y) +merge_camtrapdp <- function(x, y) { + check_camtrapdp(x) + check_camtrapdp(y) + + # Check names + check_name <- function(name, arg) { + if (is.null(name) || is.na(name) || !is.character(name)) { + cli::cli_abort( + c( + "{.arg {arg}} must have a unique (character) name.", + "i" = "Assign one to {.field {arg}$name}." + ), + class = "camtrapdp_error_name_invalid" + ) + } + } + check_name(x$name, "x") + check_name(y$name, "y") + if (x$name == y$name) { + cli::cli_abort( + c( + "{.arg x} and {.arg y} must have different unique names.", + "x" = "{.field x$name} and {.field y$name} currently have the same + value: {.val {x$name}}." + ), + class = "camtrapdp_error_name_duplicated" + ) + } + prefixes <- c(x$name, y$name) + + # Create xy from x + xy <- x + + # Merge resources + xy$resources <- merge_resources(x, y, prefixes) + + # Merge data + deployments(xy) <- merge_deployments(x, y, prefixes) + media(xy) <- merge_media(x, y, prefixes) + observations(xy) <- merge_observations(x, y, prefixes) + + # Merge/update metadata + xy$name <- NULL + xy$id <- NULL + xy$created <- format(Sys.time(), "%Y-%m-%dT%H:%M:%SZ") + xy$title <- NULL + xy$contributors <- unique(c(x$contributors, y$contributors)) + xy$description <- paste(x$description, y$description, sep = "/n") + xy$version <- "1.0" + xy$keywords <- unique(c(x$keywords, y$keywords)) + xy$image <- NULL + xy$homepage <- NULL + xy$sources <- unique(c(x$sources, y$sources)) + xy$licenses <- unique(c(x$licenses, y$licenses)) + xy$bibliographicCitation <- NULL + xy$project$id <- NULL + xy$project$title <- paste(x$project$title, y$project$title, sep = " / ") + xy$project$acronym <- NULL + xy$project$description <- + paste(x$project$description, y$project$description, sep = "/n") + xy$project$path <- NULL + xy$project$samplingDesign <- x$project$samplingDesign # Second one ignored + xy$project$captureMethod <- + unique(c(x$project$captureMethod, y$project$captureMethod)) + xy$project$individuals <- any(x$project$individuals, y$project$individiuals) + xy$project$observationLevel <- + unique(c(x$project$observationLevel, y$project$observationLevel)) + xy$coordinatePrecision <- + max(x$coordinatePrecision, y$coordinatePrecision, na.rm = TRUE) + xy$relatedIdentifiers <- unique(c(x$relatedIdentifiers, y$relatedIdentifiers)) + xy$references <- unique(c(x$references, y$references)) + xy$directory <- "." + + # Add package$id to related identifiers if it is a DOI + add_related_id <- function(id, related_ids) { + if (grepl("doi", id %||% "")) { + new_related_id <- list( + relationType = "isDerivedFrom", + relatedIdentifier = id, + resourceTypeGeneral = "Dataset", + relatedIdentifierType = "DOI" + ) + related_ids <- c(related_ids, list(new_related_id)) + + } + return(related_ids) + } + xy$relatedIdentifiers <- add_related_id(x$id, xy$relatedIdentifiers) + xy$relatedIdentifiers <- add_related_id(y$id, xy$relatedIdentifiers) + + # Update scopes + xy <- + xy %>% + update_spatial() %>% + update_temporal() %>% + update_taxonomic() + + return(xy) +} diff --git a/R/print.R b/R/print.R index bc64e7f5..ef689c0b 100644 --- a/R/print.R +++ b/R/print.R @@ -20,26 +20,26 @@ print.camtrapdp <- function(x, ...) { # check_camtrapdp() not necessary: print only triggered for camtrapdp object # Calculate number of rows for the tables (resources in x$data) - tables_rows <- + tables <- purrr::pluck(x, "data") %>% purrr::map(nrow) + tables_length <- length(tables) - # List tables - tables <- names(tables_rows) + # Show name and tables + name <- if (!is.null(x$name)) cli::format_inline("{.val {x$name}} ") else "" cli::cat_line( cli::format_inline( - "A Camera Trap Data Package with {length(tables)} table{?s}{?./:/:}" + "A Camera Trap Data Package {name}with {tables_length} table{?s}{?./:/:}" ) ) purrr::walk2( - names(tables_rows), - tables_rows, + names(tables), + tables, ~ cli::cat_bullet(cli::format_inline("{.x}: {.val {.y}} rows")) ) - # List additional resources (not in x$data), if any - resources <- frictionless::resources(x) - extra_resources <- resources[!resources %in% tables] + # List additional resources, if any + extra_resources <- additional_resources(x) if (length(extra_resources) > 0) { cli::cat_line("") cli::cat_line( diff --git a/R/taxa.R b/R/taxa.R index 156dec9e..8288e0eb 100644 --- a/R/taxa.R +++ b/R/taxa.R @@ -23,7 +23,7 @@ taxa <- function(x) { dplyr::select("scientificName", dplyr::starts_with("taxon.")) %>% dplyr::distinct() %>% dplyr::rename_with(~ sub("^taxon.", "", .x)) %>% - dplyr::arrange(scientificName) + dplyr::arrange(.data$scientificName) # Remove duplicates without taxonID if ("taxonID" %in% names(taxa)) { diff --git a/R/taxonomic.R b/R/taxonomic.R index 1a23ae03..28294678 100644 --- a/R/taxonomic.R +++ b/R/taxonomic.R @@ -15,6 +15,9 @@ taxonomic <- function(x) { return(NULL) } + # Replace NULL with NA + taxonomic_list <- replace_null_recursive(taxonomic_list) + # Convert list into a data.frame taxa <- purrr::map( diff --git a/R/utils-merge.R b/R/utils-merge.R new file mode 100644 index 00000000..ef7c678a --- /dev/null +++ b/R/utils-merge.R @@ -0,0 +1,148 @@ +#' Merge two character vectors and prefix duplicates +#' +#' Merges two character vectors and adds a prefix to any values in `a` that +#' occur in `b` and vice versa. +#' +#' @param a,b Character vectors. +#' @param prefixes Prefixes (e.g. `c("a", "b")`) to add to `a` and `b` values +#' respectively, as `_`. +#' @return `c(a, b)` with prefixed duplicates. +#' @family helper functions +#' @noRd +#' @examples +#' a <- c("k", "l", "m", "n", 1, 1, NA) +#' b <- c( "m", 1, 1, "o", NA) +#' merge_vectors(a, b, prefixes = c("a", "b")) +merge_vectors <- function(a, b, prefixes) { + a_prefix <- prefixes[1] + b_prefix <- prefixes[2] + a_prefixed <- purrr:::map2_chr(a, a %in% b, + ~ if (!is.na(.x) && .y) { paste(a_prefix, .x, sep = "_") } else { .x } + ) + b_prefixed <- purrr:::map2_chr(b, b %in% a, + ~ if (!is.na(.x) && .y) { paste(b_prefix, .x, sep = "_") } else { .x } + ) + c(a_prefixed, b_prefixed) +} + +#' Merge resources +#' +#' Merges the resources of Camera Trap Data Package `x` with the additional +#' resources of `y`. +#' Resource names that occur in both `x` and `y` get a prefix. +#' @inheritParams merge_camtrapdp +#' @param prefixes Prefixes (e.g. `c("x", "y")`) to add to duplicate values. +#' @return A list with resources. +#' @family helper functions +#' @noRd +merge_resources <- function(x, y, prefixes) { + # Combine all resources of x with the additional resources of y + x_resources <- x$resources + y_resources <- purrr::keep(y$resources, ~ .x$name %in% additional_resources(y)) + resources <- c(x_resources, y_resources) + + # Prefix duplicate resource names + resource_names <- merge_vectors( + purrr::map_chr(x_resources, "name"), + purrr::map_chr(y_resources, "name"), + prefixes + ) + update_name <- function(resource, name) { + resource$name <- name + return(resource) + } + purrr::map2(resources, resource_names, update_name) +} + +#' Merge deployments +#' +#' Merges the deployments of Camera Trap Data Packages `x` and `y`. +#' Values in `deploymentID` that occur in both `x` and `y` get a prefix. +#' @inheritParams merge_camtrapdp +#' @param prefixes Prefixes (e.g. `c("x", "y")`) to add to duplicate values. +#' @return A [tibble::tibble()] data frame with deployments. +#' @family helper functions +#' @noRd +merge_deployments <- function(x, y, prefixes) { + x_deployments <- deployments(x) + y_deployments <- deployments(y) + dplyr::bind_rows(x_deployments, y_deployments) %>% + dplyr::mutate( + deploymentID = merge_vectors( + purrr::chuck(x_deployments, "deploymentID"), + purrr::chuck(y_deployments, "deploymentID"), + prefixes + ) + ) +} + +#' Merge media +#' +#' Merges the media of Camera Trap Data Packages `x` and `y`. +#' Values in `mediaID`, `deploymentID` or `eventID` that occur in both `x` and +#' `y` get a prefix. +#' @inheritParams merge_camtrapdp +#' @param prefixes Prefixes (e.g. `c("x", "y")`) to add to duplicate values. +#' @return A [tibble::tibble()] data frame with media. +#' @family helper functions +#' @noRd +merge_media <- function(x, y, prefixes) { + x_media <- media(x) + y_media <- media(y) + dplyr::bind_rows(x_media, y_media) %>% + dplyr::mutate( + mediaID = merge_vectors( + purrr::chuck(x_media, "mediaID"), + purrr::chuck(y_media, "mediaID"), + prefixes + ), + deploymentID = merge_vectors( + purrr::chuck(x_media, "deploymentID"), + purrr::chuck(y_media, "deploymentID"), + prefixes + ), + eventID = merge_vectors( + purrr::pluck(x_media, "eventID"), + purrr::pluck(y_media, "eventID"), + prefixes + ) + ) +} + +#' Merge observations +#' +#' Merges the observations of Camera Trap Data Packages `x` and `y`. +#' Values in `observationID`, `deploymentID`, `mediaID` or `eventID` that occur +#' in both `x` and `y` get a prefix. +#' @inheritParams merge_camtrapdp +#' @param prefixes Prefixes (e.g. `c("x", "y")`) to add to duplicate values. +#' @return A [tibble::tibble()] data frame with observations. +#' @family helper functions +#' @noRd +merge_observations <- function(x, y, prefixes) { + x_observations <- observations(x) + y_observations <- observations(y) + dplyr::bind_rows(x_observations, y_observations) %>% + dplyr::mutate( + observationID = merge_vectors( + purrr::chuck(x_observations, "observationID"), + purrr::chuck(y_observations, "observationID"), + prefixes + ), + deploymentID = merge_vectors( + purrr::chuck(x_observations, "deploymentID"), + purrr::chuck(y_observations, "deploymentID"), + prefixes + ), + mediaID = merge_vectors( + purrr::pluck(x_observations, "mediaID"), + purrr::pluck(y_observations, "mediaID"), + prefixes + ), + eventID = merge_vectors( + purrr::pluck(x_observations, "eventID"), + purrr::pluck(y_observations, "eventID"), + prefixes + ) + ) +} diff --git a/R/utils.R b/R/utils.R index 246af43d..f4625a0a 100644 --- a/R/utils.R +++ b/R/utils.R @@ -37,7 +37,19 @@ expand_cols <- function(df, colnames) { return(df) } -#' Creates list of contributors in EML format +#' Lists the names of additional resources in a Camera Trap Data Package +#' +#' @inheritParams print.camtrapdp +#' @return Character vector with the additional resource names. +#' @family helper functions +#' @noRd +additional_resources <- function(x) { + camtrapdp_resource_names <- c("deployments", "media", "observations") + resource_names <- frictionless::resources(x) + resource_names[!resource_names %in% camtrapdp_resource_names] +} + +#' Create list of contributors in EML format #' #' @param contributor_list List of contributors #' @return List of contributors as emld responsibleParty objects. @@ -57,3 +69,23 @@ create_eml_contributors <- function(contributor_list) { onlineUrl = .$path )) } + +#' Replace NULL values recursively +#' +#' Replaces `NULL` values with `NA` by recursively iterating through each +#' element of the input list. +#' +#' @param list A nested list. +#' @return `x`, but with all `NULL` values replaced. +#' `NA`. +#' @family helper functions +#' @noRd +replace_null_recursive <- function(list) { + purrr::map(list, function(element) { + if (is.list(element) && !is.null(element)) { + replace_null_recursive(element) + } else { + ifelse(is.null(element), NA, element) + } + }) +} diff --git a/man/merge_camtrapdp.Rd b/man/merge_camtrapdp.Rd new file mode 100644 index 00000000..7d157ab2 --- /dev/null +++ b/man/merge_camtrapdp.Rd @@ -0,0 +1,93 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/merge_camtrapdp.R +\name{merge_camtrapdp} +\alias{merge_camtrapdp} +\title{Merge two Camera Trap Data Packages} +\usage{ +merge_camtrapdp(x, y) +} +\arguments{ +\item{x, y}{Camera Trap Data Package objects, as returned by +\code{\link[=read_camtrapdp]{read_camtrapdp()}}.} +} +\value{ +A single Camera Trap Data Package object that is the combination of +\code{x} and \code{y}. +} +\description{ +Merges two Camera Trap Data Package objects into one. +} +\section{Transformation details}{ + + +Both \code{x} and \code{y} must have a unique dataset name \code{x$name} and \code{y$name}. +This name is used to prefix identifiers in the data that occur in both +datasets. +For example: +\itemize{ +\item \code{x} contains \code{deploymentID}s \code{c("a", "b")}. +\item \code{y} contains \code{deploymentID}s \code{c("b", "c")}. +\item Then merged \code{xy} will contain \code{deploymentID}s \code{c("a", "x_b", "y_b", "c")}. +} + +Data are merged as follows: +\itemize{ +\item Deployments are combined, with \code{deploymentID} kept unique. +\item Media are combined, with \code{mediaID}, \code{deploymentID} and \code{eventID} kept +unique. +\item Observations are combined, with \code{observationID}, \code{deploymentID}, \code{mediaID} +and \code{eventID} kept unique. +\item Additional resources are retained, with the resource name kept unique. +} + +Metadata properties are merged as follows: +\itemize{ +\item \strong{name}: Removed. +\item \strong{id}: Removed. +\item \strong{created}: Set to current timestamp. +\item \strong{title}: Removed. +\item \strong{contributors}: Combined, with duplicates removed. +\item \strong{description}: Combined as two paragraphs. +\item \strong{version}: Set to \code{1.0}. +\item \strong{keywords}: Combined, with duplicates removed. +\item \strong{image}: Removed. +\item \strong{homepage}: Removed. +\item \strong{sources}: Combined, with duplicates removed. +\item \strong{licenses}: Combined, with duplicates removed. +\item \strong{bibliographicCitation}: Removed. +\item \strong{project$id}: Removed. +\item \strong{project$title}: Combined. +\item \strong{project$acronym}: Removed. +\item \strong{project$description}: Combined as two paragraphs. +\item \strong{project$path}: Removed. +\item \strong{project$samplingDesign}: Sampling design of \code{x}. +\item \strong{project$captureMethod}: Combined, with duplicates removed. +\item \strong{project$individuals}: \code{TRUE} if one of the datasets has \code{TRUE}. +\item \strong{project$observationLevel}: Combined, with duplicates removed. +\item \strong{coordinatePrecision}: Set to the least precise \code{coordinatePrecision}. +\item \strong{spatial}: Reset based on the new deployments. +\item \strong{temporal}: Reset based on the new deployments. +\item \strong{taxonomic}: Combined, with duplicates removed. +\item \strong{relatedIdentifiers}: Combined, with duplicates removed. +\item \strong{references}: Combined, with duplicates removed. +\item Custom properties of \code{x} are also retained. +} +} + +\examples{ +x <- example_dataset() \%>\% + filter_deployments(deploymentID \%in\% c("00a2c20d", "29b7d356")) +y <- example_dataset() \%>\% + filter_deployments(deploymentID \%in\% c("577b543a", "62c200a9")) +x$name <- "x" +y$name <- "y" +merge_camtrapdp(x, y) +} +\seealso{ +Other transformation functions: +\code{\link{round_coordinates}()}, +\code{\link{shift_time}()}, +\code{\link{write_dwc}()}, +\code{\link{write_eml}()} +} +\concept{transformation functions} diff --git a/man/round_coordinates.Rd b/man/round_coordinates.Rd index abf27855..496e6c22 100644 --- a/man/round_coordinates.Rd +++ b/man/round_coordinates.Rd @@ -88,6 +88,7 @@ deployments(x_rounded)[c("latitude", "longitude", "coordinateUncertainty")] } \seealso{ Other transformation functions: +\code{\link{merge_camtrapdp}()}, \code{\link{shift_time}()}, \code{\link{write_dwc}()}, \code{\link{write_eml}()} diff --git a/man/shift_time.Rd b/man/shift_time.Rd index db3d5b10..7aa9e112 100644 --- a/man/shift_time.Rd +++ b/man/shift_time.Rd @@ -50,6 +50,7 @@ deployments(x_shifted)[, c("deploymentID", "deploymentStart", "deploymentEnd")] } \seealso{ Other transformation functions: +\code{\link{merge_camtrapdp}()}, \code{\link{round_coordinates}()}, \code{\link{write_dwc}()}, \code{\link{write_eml}()} diff --git a/man/write_dwc.Rd b/man/write_dwc.Rd index 040a9b4a..dadea2d3 100644 --- a/man/write_dwc.Rd +++ b/man/write_dwc.Rd @@ -69,6 +69,7 @@ unlink("my_directory", recursive = TRUE) } \seealso{ Other transformation functions: +\code{\link{merge_camtrapdp}()}, \code{\link{round_coordinates}()}, \code{\link{shift_time}()}, \code{\link{write_eml}()} diff --git a/man/write_eml.Rd b/man/write_eml.Rd index 32034c60..ef20a880 100644 --- a/man/write_eml.Rd +++ b/man/write_eml.Rd @@ -74,6 +74,7 @@ unlink("my_directory", recursive = TRUE) } \seealso{ Other transformation functions: +\code{\link{merge_camtrapdp}()}, \code{\link{round_coordinates}()}, \code{\link{shift_time}()}, \code{\link{write_dwc}()} diff --git a/tests/testthat/_snaps/merge_camtrapdp/datapackage_different_xy.json b/tests/testthat/_snaps/merge_camtrapdp/datapackage_different_xy.json new file mode 100644 index 00000000..f1b9cbbf --- /dev/null +++ b/tests/testthat/_snaps/merge_camtrapdp/datapackage_different_xy.json @@ -0,0 +1,333 @@ +{ + "resources": [ + { + "name": "deployments", + "path": "deployments.csv", + "profile": "tabular-data-resource", + "format": "csv", + "mediatype": "text/csv", + "encoding": "utf-8", + "schema": "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/deployments-table-schema.json" + }, + { + "name": "media", + "path": "media.csv", + "profile": "tabular-data-resource", + "format": "csv", + "mediatype": "text/csv", + "encoding": "utf-8", + "schema": "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/media-table-schema.json" + }, + { + "name": "observations", + "path": "observations.csv", + "profile": "tabular-data-resource", + "format": "csv", + "mediatype": "text/csv", + "encoding": "utf-8", + "schema": "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/observations-table-schema.json" + }, + { + "name": "individuals", + "description": "Custom table/resource not part of the Camtrap DP model. Included to showcase that extending with more resources is possible.", + "data": [ + { + "id": 1, + "individualName": "Reinaert", + "scientificName": "Vulpes vulpes" + } + ] + } + ], + "profile": "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/camtrap-dp-profile.json", + "contributors": [ + { + "title": "Axel Neukermans", + "email": "axel.neukermans@inbo.be", + "path": "https://orcid.org/0000-0003-0272-9180", + "role": "contributor", + "organization": "Research Institute for Nature and Forest (INBO)" + }, + { + "title": "Danny Van der beeck", + "email": "daniel.vanderbeeck@gmail.com" + }, + { + "title": "Emma Cartuyvels", + "email": "emma.cartuyvels@inbo.be", + "role": "principalInvestigator", + "organization": "Research Institute for Nature and Forest (INBO)" + }, + { + "title": "Peter Desmet", + "email": "peter.desmet@inbo.be", + "path": "https://orcid.org/0000-0002-8442-8025", + "role": "contact", + "organization": "Research Institute for Nature and Forest (INBO)" + }, + { + "title": "Research Institute for Nature and Forest (INBO)", + "path": "https://inbo.be", + "role": "rightsHolder" + }, + { + "title": "Research Institute for Nature and Forest (INBO)", + "path": "https://inbo.be", + "role": "publisher" + }, + { + "title": "Julian Evans", + "email": "jevansbio@gmail.com", + "organization": "University of Amsterdam", + "role": "principalInvestigator", + "firstName": "Julian", + "lastName": "Evans" + }, + { + "title": "Rotem Zilber", + "email": "r.kadanzilber@uva.nl", + "organization": "University of Amsterdam", + "role": "principalInvestigator", + "firstName": "Rotem", + "lastName": "Zilber" + }, + { + "title": "W. Daniel Kissling", + "path": "https://www.danielkissling.de/", + "email": "wdkissling@gmail.com", + "organization": "University of Amsterdam", + "role": "principalInvestigator", + "firstName": "W. Daniel ", + "lastName": "Kissling" + } + ], + "description": "MICA - Muskrat and coypu camera trap observations in Belgium, the Netherlands and Germany is an occurrence dataset published by the Research Institute of Nature and Forest (INBO). It is part of the LIFE project MICA, in which innovative techniques are tested for a more efficient control of muskrat and coypu populations, both invasive species. This dataset is a sample of the original dataset and serves as an example of a Camera Trap Data Package (Camtrap DP)./nCamera trap pilot 2 was a test of the difference in species detection and data accumulation between a Snyper Commander camera with a regular lens (52°) and one with a wide lens (100°). The cameras were deployed at 30 cm above the ground within the herbivore exclosure Zeeveld Noord in the Amsterdam Water Supply Dunes from 14th of August 2021 to 24th of September 2021. During this pilot, a solar panel failure caused the cameras to stop recording data from the 24th of August 2021 to the 6th of September (14 days). During annotation, only days in which both cameras were operational were annotated. This led to a total of 1,113 images over 28 days from the two cameras. A detailed description of the dataset can be found in a data paper published in the journal Data in Brief (Evans et al. 2024, https://doi.org/10.1016/j.dib.2024.110544).", + "version": "1.0", + "keywords": ["camera traps", "public awareness campaign", "flood protection", "flood control", "damage prevention", "animal damage", "pest control", "invasive alien species", "muskrat", "coypu", "dune ecosystem", "mammals", "exclosure", "camera trap", "biodiversity monitoring", "Natura 2000", "herbivore", "wildlife camera"], + "sources": [ + { + "title": "Agouti", + "path": "https://www.agouti.eu", + "email": "agouti@wur.nl", + "version": "v3.21" + }, + { + "title": "Agouti", + "path": "https://www.agouti.eu", + "email": "agouti@wur.nl", + "version": "v4" + } + ], + "licenses": [ + { + "name": "CC0-1.0", + "scope": "data" + }, + { + "path": "http://creativecommons.org/licenses/by/4.0/", + "scope": "media" + }, + { + "name": "CC-BY-4.0", + "scope": "data" + }, + { + "name": "CC-BY-4.0", + "scope": "media" + } + ], + "project": { + "title": "Management of Invasive Coypu and muskrAt in Europe / Data from three camera trapping pilots in the Amsterdam Water Supply Dunes of the Netherlands", + "description": "Invasive alien species such as the coypu and muskrat pose a major threat to biodiversity and cost millions of euros annually. By feeding on rushes and reeds, these animals cause serious damage to the environment in which they live and endangered species suffer from habitat loss. The disappearance of reeds and digging in dikes represents a safety risk for humans in the lowland areas. With the LIFE project MICA (), the partners from the participating countries want to develop a transnational plan for the management of coypu and muskrat populations in Europe and aim to reduce their population. The objective of an effective population control of coypu and muskrat is to protect lowlands from flooding, to prevent crop damage and loss of biodiversity. The objective of the project is to serve as a pilot and demonstration project in which ‘best practices’ are tested and new techniques are developed for a more efficient control of muskrat and coypu populations. By involving organisations from Belgium, Germany and the Netherlands, the project also promotes international cooperation and knowledge exchange in the field of muskrat and coypu management./nThree pilot studies were conducted to test the autonomous deployment of wireless 4G wildlife cameras with solar panels and automated data transmission in the coastal dunes of the Amsterdam Water Supply Dunes, The Netherlands. Monitoring and management of grazing mammals such as the European rabbit (Oryctolagus cuniculus) and the European fallow deer (Dama dama) is of key interest in this nature reserve, as they slow down the rate of natural succession and alter plant species composition and vegetation structure through grazing and digging. These grazers as well as predators such as the red fox (Vulpes vulpes) have typically been monitored using traditional survey methods such as transect and areal counts, but only once or twice a year. The study was aimed to test the feasibility and methodology of running long-term autonomous monitoring networks with wildlife cameras. This involved the testing of power usage, data transmission, and data accumulation, and the robustness of cameras to herbivore damage. Furthermore, the pilots specifically tested how the detection of focal species (rabbits, deer, foxes) differs with deployment heights, camera lens types and the placement in different habitats. The pilot also provides labelled images for the development of deep learning algorithms to automatically identify species.\r\n\r\nWork was carried out as part of the development of the monitoring demonstration sites of a large-scale research infrastructure project ARISE.", + "samplingDesign": "targeted", + "captureMethod": ["activityDetection", "timeLapse"], + "individualAnimals": false, + "observationLevel": ["media", "event"], + "individuals": false + }, + "coordinatePrecision": 0.001, + "spatial": { + "type": "Polygon", + "coordinates": [ + [ + [4.013, 50.699], + [5.659, 50.699], + [5.659, 52.356], + [4.013, 52.356], + [4.013, 50.699] + ] + ] + }, + "temporal": { + "start": "2020-05-30", + "end": "2022-03-18" + }, + "taxonomic": [ + { + "scientificName": "Anas platyrhynchos", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/DGP6", + "taxonRank": "species", + "family": null, + "order.": null, + "vernacularNames": { + "eng": "mallard", + "nld": "wilde eend" + } + }, + { + "scientificName": "Anas strepera", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/DGPL", + "taxonRank": "species", + "family": null, + "order.": null, + "vernacularNames": { + "eng": "gadwall", + "nld": "krakeend" + } + }, + { + "scientificName": "Apodemus sylvaticus", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/FRJJ", + "taxonRank": "species", + "family": "Muridae", + "order.": "Rodentia", + "vernacularNames": { + "eng": "wood mouse", + "nld": "bosmuis" + } + }, + { + "scientificName": "Ardea", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/32FH", + "taxonRank": "genus", + "family": null, + "order.": null, + "vernacularNames": { + "eng": "great herons", + "nld": "reigers" + } + }, + { + "scientificName": "Ardea cinerea", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/GCHS", + "taxonRank": "species", + "family": null, + "order.": null, + "vernacularNames": { + "eng": "grey heron", + "nld": "blauwe reiger" + } + }, + { + "scientificName": "Aves", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/V2", + "taxonRank": "class", + "family": null, + "order.": null, + "vernacularNames": { + "eng": "bird sp.", + "nld": "vogel" + } + }, + { + "scientificName": "Corvus corone", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/YNHJ", + "taxonRank": "species", + "family": "Corvidae", + "order.": "Passeriformes", + "vernacularNames": { + "eng": "carrion crow", + "nld": "zwarte kraai" + } + }, + { + "scientificName": "Homo sapiens", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/6MB3T", + "taxonRank": "species", + "family": "Hominidae", + "order.": "Primates", + "vernacularNames": { + "eng": "human", + "nld": "mens" + } + }, + { + "scientificName": "Martes foina", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/3Y9VW", + "taxonRank": "species", + "family": null, + "order.": null, + "vernacularNames": { + "eng": "beech marten", + "nld": "steenmarter" + } + }, + { + "scientificName": "Mustela putorius", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/44QYC", + "taxonRank": "species", + "family": null, + "order.": null, + "vernacularNames": { + "eng": "European polecat", + "nld": "bunzing" + } + }, + { + "scientificName": "Oryctolagus cuniculus", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/74ZBP", + "taxonRank": "species", + "family": "Leporidae", + "order.": "Lagomorpha", + "vernacularNames": { + "eng": "European rabbit", + "nld": "Europees konijn" + } + }, + { + "scientificName": "Rattus norvegicus", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/4RM67", + "taxonRank": "species", + "family": null, + "order.": null, + "vernacularNames": { + "eng": "brown rat", + "nld": "bruine rat" + } + }, + { + "scientificName": "Vulpes vulpes", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/5BSG3", + "taxonRank": "species", + "family": "Canidae", + "order.": "Carnivora", + "vernacularNames": { + "eng": "red fox", + "nld": "vos" + } + } + ], + "relatedIdentifiers": [ + { + "relationType": "IsDerivedFrom", + "relatedIdentifier": "https://doi.org/10.15468/5tb6ze", + "resourceTypeGeneral": "Dataset", + "relatedIdentifierType": "DOI" + }, + { + "relationType": "IsSupplementTo", + "relatedIdentifier": "https://inbo.github.io/camtraptor/", + "resourceTypeGeneral": "Software", + "relatedIdentifierType": "URL" + }, + { + "relationType": "IsPublishedIn", + "relatedIdentifier": "https://doi.org/10.1016/j.dib.2024.110544", + "resourceTypeGeneral": "DataPaper", + "relatedIdentifierType": "DOI" + } + ], + "references": [ + "Evans, J.C., Zilber, R., & Kissling, W.D. (2024). Data from three camera trapping pilots in the Amsterdam Water Supply Dunes of the Netherlands. Data in Brief, 54, 110544. https://doi.org/10.1016/j.dib.2024.110544" + ] +} diff --git a/tests/testthat/_snaps/merge_camtrapdp/datapackage_identical_xy.json b/tests/testthat/_snaps/merge_camtrapdp/datapackage_identical_xy.json new file mode 100644 index 00000000..ae3c458f --- /dev/null +++ b/tests/testthat/_snaps/merge_camtrapdp/datapackage_identical_xy.json @@ -0,0 +1,244 @@ +{ + "resources": [ + { + "name": "deployments", + "path": "deployments.csv", + "profile": "tabular-data-resource", + "format": "csv", + "mediatype": "text/csv", + "encoding": "utf-8", + "schema": "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/deployments-table-schema.json" + }, + { + "name": "media", + "path": "media.csv", + "profile": "tabular-data-resource", + "format": "csv", + "mediatype": "text/csv", + "encoding": "utf-8", + "schema": "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/media-table-schema.json" + }, + { + "name": "observations", + "path": "observations.csv", + "profile": "tabular-data-resource", + "format": "csv", + "mediatype": "text/csv", + "encoding": "utf-8", + "schema": "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/observations-table-schema.json" + }, + { + "name": "x_individuals", + "description": "Custom table/resource not part of the Camtrap DP model. Included to showcase that extending with more resources is possible.", + "data": [ + { + "id": 1, + "individualName": "Reinaert", + "scientificName": "Vulpes vulpes" + } + ] + }, + { + "name": "y_individuals", + "description": "Custom table/resource not part of the Camtrap DP model. Included to showcase that extending with more resources is possible.", + "data": [ + { + "id": 1, + "individualName": "Reinaert", + "scientificName": "Vulpes vulpes" + } + ] + } + ], + "profile": "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/camtrap-dp-profile.json", + "contributors": [ + { + "title": "Axel Neukermans", + "email": "axel.neukermans@inbo.be", + "path": "https://orcid.org/0000-0003-0272-9180", + "role": "contributor", + "organization": "Research Institute for Nature and Forest (INBO)" + }, + { + "title": "Danny Van der beeck", + "email": "daniel.vanderbeeck@gmail.com" + }, + { + "title": "Emma Cartuyvels", + "email": "emma.cartuyvels@inbo.be", + "role": "principalInvestigator", + "organization": "Research Institute for Nature and Forest (INBO)" + }, + { + "title": "Peter Desmet", + "email": "peter.desmet@inbo.be", + "path": "https://orcid.org/0000-0002-8442-8025", + "role": "contact", + "organization": "Research Institute for Nature and Forest (INBO)" + }, + { + "title": "Research Institute for Nature and Forest (INBO)", + "path": "https://inbo.be", + "role": "rightsHolder" + }, + { + "title": "Research Institute for Nature and Forest (INBO)", + "path": "https://inbo.be", + "role": "publisher" + } + ], + "description": "MICA - Muskrat and coypu camera trap observations in Belgium, the Netherlands and Germany is an occurrence dataset published by the Research Institute of Nature and Forest (INBO). It is part of the LIFE project MICA, in which innovative techniques are tested for a more efficient control of muskrat and coypu populations, both invasive species. This dataset is a sample of the original dataset and serves as an example of a Camera Trap Data Package (Camtrap DP)./nMICA - Muskrat and coypu camera trap observations in Belgium, the Netherlands and Germany is an occurrence dataset published by the Research Institute of Nature and Forest (INBO). It is part of the LIFE project MICA, in which innovative techniques are tested for a more efficient control of muskrat and coypu populations, both invasive species. This dataset is a sample of the original dataset and serves as an example of a Camera Trap Data Package (Camtrap DP).", + "version": "1.0", + "keywords": ["camera traps", "public awareness campaign", "flood protection", "flood control", "damage prevention", "animal damage", "pest control", "invasive alien species", "muskrat", "coypu"], + "sources": [ + { + "title": "Agouti", + "path": "https://www.agouti.eu", + "email": "agouti@wur.nl", + "version": "v3.21" + } + ], + "licenses": [ + { + "name": "CC0-1.0", + "scope": "data" + }, + { + "path": "http://creativecommons.org/licenses/by/4.0/", + "scope": "media" + } + ], + "project": { + "title": "Management of Invasive Coypu and muskrAt in Europe / Management of Invasive Coypu and muskrAt in Europe", + "description": "Invasive alien species such as the coypu and muskrat pose a major threat to biodiversity and cost millions of euros annually. By feeding on rushes and reeds, these animals cause serious damage to the environment in which they live and endangered species suffer from habitat loss. The disappearance of reeds and digging in dikes represents a safety risk for humans in the lowland areas. With the LIFE project MICA (), the partners from the participating countries want to develop a transnational plan for the management of coypu and muskrat populations in Europe and aim to reduce their population. The objective of an effective population control of coypu and muskrat is to protect lowlands from flooding, to prevent crop damage and loss of biodiversity. The objective of the project is to serve as a pilot and demonstration project in which ‘best practices’ are tested and new techniques are developed for a more efficient control of muskrat and coypu populations. By involving organisations from Belgium, Germany and the Netherlands, the project also promotes international cooperation and knowledge exchange in the field of muskrat and coypu management./nInvasive alien species such as the coypu and muskrat pose a major threat to biodiversity and cost millions of euros annually. By feeding on rushes and reeds, these animals cause serious damage to the environment in which they live and endangered species suffer from habitat loss. The disappearance of reeds and digging in dikes represents a safety risk for humans in the lowland areas. With the LIFE project MICA (), the partners from the participating countries want to develop a transnational plan for the management of coypu and muskrat populations in Europe and aim to reduce their population. The objective of an effective population control of coypu and muskrat is to protect lowlands from flooding, to prevent crop damage and loss of biodiversity. The objective of the project is to serve as a pilot and demonstration project in which ‘best practices’ are tested and new techniques are developed for a more efficient control of muskrat and coypu populations. By involving organisations from Belgium, Germany and the Netherlands, the project also promotes international cooperation and knowledge exchange in the field of muskrat and coypu management.", + "samplingDesign": "targeted", + "captureMethod": ["activityDetection", "timeLapse"], + "individualAnimals": false, + "observationLevel": ["media", "event"], + "individuals": false + }, + "coordinatePrecision": 0.001, + "spatial": { + "type": "Polygon", + "coordinates": [ + [ + [4.013, 50.699], + [5.659, 50.699], + [5.659, 51.496], + [4.013, 51.496], + [4.013, 50.699] + ] + ] + }, + "temporal": { + "start": "2020-05-30", + "end": "2021-04-18" + }, + "taxonomic": [ + { + "scientificName": "Anas platyrhynchos", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/DGP6", + "taxonRank": "species", + "vernacularNames": { + "eng": "mallard", + "nld": "wilde eend" + } + }, + { + "scientificName": "Anas strepera", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/DGPL", + "taxonRank": "species", + "vernacularNames": { + "eng": "gadwall", + "nld": "krakeend" + } + }, + { + "scientificName": "Ardea", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/32FH", + "taxonRank": "genus", + "vernacularNames": { + "eng": "great herons", + "nld": "reigers" + } + }, + { + "scientificName": "Ardea cinerea", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/GCHS", + "taxonRank": "species", + "vernacularNames": { + "eng": "grey heron", + "nld": "blauwe reiger" + } + }, + { + "scientificName": "Aves", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/V2", + "taxonRank": "class", + "vernacularNames": { + "eng": "bird sp.", + "nld": "vogel" + } + }, + { + "scientificName": "Homo sapiens", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/6MB3T", + "taxonRank": "species", + "vernacularNames": { + "eng": "human", + "nld": "mens" + } + }, + { + "scientificName": "Martes foina", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/3Y9VW", + "taxonRank": "species", + "vernacularNames": { + "eng": "beech marten", + "nld": "steenmarter" + } + }, + { + "scientificName": "Mustela putorius", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/44QYC", + "taxonRank": "species", + "vernacularNames": { + "eng": "European polecat", + "nld": "bunzing" + } + }, + { + "scientificName": "Rattus norvegicus", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/4RM67", + "taxonRank": "species", + "vernacularNames": { + "eng": "brown rat", + "nld": "bruine rat" + } + }, + { + "scientificName": "Vulpes vulpes", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/5BSG3", + "taxonRank": "species", + "vernacularNames": { + "eng": "red fox", + "nld": "vos" + } + } + ], + "relatedIdentifiers": [ + { + "relationType": "IsDerivedFrom", + "relatedIdentifier": "https://doi.org/10.15468/5tb6ze", + "resourceTypeGeneral": "Dataset", + "relatedIdentifierType": "DOI" + }, + { + "relationType": "IsSupplementTo", + "relatedIdentifier": "https://inbo.github.io/camtraptor/", + "resourceTypeGeneral": "Software", + "relatedIdentifierType": "URL" + } + ], + "references": [] +} diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R new file mode 100644 index 00000000..b0075148 --- /dev/null +++ b/tests/testthat/test-merge_camtrapdp.R @@ -0,0 +1,204 @@ +test_that("merge_camtrapdp() returns a valid camtrapdp object", { + skip_if_offline() + x <- example_dataset() + x$name <- "x" + y <- x + y$name <- "y" + + expect_no_error(check_camtrapdp(merge_camtrapdp(x, y))) +}) + +test_that("merge_camtrapdp() returns error on missing/invalid/duplicate dataset + name(s)", { + skip_if_offline() + x <- example_dataset() + + # Duplicate identifiers + expect_error( + merge_camtrapdp(x, x), + class = "camtrapdp_error_name_duplicated" + ) + + # Invalid identifier + y <- x + x$name <- NULL + expect_error( + merge_camtrapdp(x, y), + class = "camtrapdp_error_name_invalid" + ) + x$name <- NA_character_ + expect_error( + merge_camtrapdp(x, y), + class = "camtrapdp_error_name_invalid" + ) + x$name <- 1 + expect_error( + merge_camtrapdp(x, y), + class = "camtrapdp_error_name_invalid" + ) + x$name <- "x" + y$name <- 1 + expect_error( + merge_camtrapdp(x, y), + class = "camtrapdp_error_name_invalid" + ) + y$name <- "y" + expect_no_error(merge_camtrapdp(x, y)) +}) + +test_that("merge_camtrapdp() adds prefixes to additional resource names to keep + them unique", { + skip_if_offline() + + # Merge datasets with overlapping resources: individuals and individuals, iris + x <- example_dataset() + x$name <- "x" + y <- x + y <- frictionless::add_resource(y, "iris", iris) + y$name <- "y" + xy <- merge_camtrapdp(x, y) + + expect_identical( + frictionless::resources(xy), + c( + "deployments", "media", "observations", "x_individuals", "y_individuals", + "iris" + ) + ) +}) + +test_that("merge_camtrapdp() adds prefixes to identifiers in the data to keep + them unique", { + skip_if_offline() + + # Merge datasets with overlapping deployments: a, b and b, c + x <- example_dataset() %>% + filter_deployments(deploymentID %in% c("00a2c20d", "29b7d356")) # a, b + x$name <- "x" + y <- example_dataset() %>% + filter_deployments(deploymentID %in% c("29b7d356", "577b543a")) # b, c + y$name <- "y" + xy <- merge_camtrapdp(x, y) + + # No duplicate primary keys + deployment_ids <- purrr::pluck(deployments(xy), "deploymentID") + media_ids <- purrr::pluck(media(xy), "mediaID") + observation_ids <- purrr::pluck(observations(xy), "observationID") + expect_false(any(duplicated(deployment_ids))) + expect_false(any(duplicated(media_ids))) + expect_false(any(duplicated(observation_ids))) + + # deploymentID + merged_deployment_ids <- c("00a2c20d", "x_29b7d356", "y_29b7d356", "577b543a") + expect_in(merged_deployment_ids, deployments(xy)$deploymentID) + expect_in(merged_deployment_ids, media(xy)$deploymentID) + expect_in(merged_deployment_ids, observations(xy)$deploymentID) + + # eventID + merged_event_ids <- c("4bb69c45", "x_8f5ffbf2", "y_8f5ffbf2", "5fbf69a4") + expect_in(merged_event_ids, media(xy)$eventID) + expect_in(merged_event_ids, observations(xy)$eventID) + + # mediaID + merged_media_ids <- c("07840dcc", "x_3e65dfaa", "y_3e65dfaa", "44201e9e") + expect_in(merged_media_ids, media(xy)$mediaID) + expect_in(merged_media_ids, observations(xy)$mediaID) + + # observationID + merged_observation_ids <- c("705e6036", "x_ef2f7140", "y_ef2f7140", "d350d2bc") + expect_in(merged_observation_ids, observations(xy)$observationID) +}) + +test_that("merge_camtrapdp() returns the expected datapackage.json when merging + identical datasets", { + skip_if_offline() + temp_dir <- tempdir() + on.exit(unlink(temp_dir, recursive = TRUE)) + + # Merge datasets that are identical except for name + x <- example_dataset() + x$name <- "x" + y <- x + y$name <- "y" + xy <- merge_camtrapdp(x, y) + xy$created <- NULL + + # Write to file + write_camtrapdp(xy, temp_dir) + file.rename( + file.path(temp_dir, "datapackage.json"), + file.path(temp_dir, "datapackage_identical_xy.json") + ) + + expect_snapshot_file(file.path(temp_dir, "datapackage_identical_xy.json")) + expect_no_error( + read_camtrapdp(file.path(temp_dir, "datapackage_identical_xy.json")) + ) +}) + +test_that("merge_camtrapdp() returns the expected datapackage.json when merging + different datasets", { + skip_if_offline() + temp_dir <- tempdir() + on.exit(unlink(temp_dir, recursive = TRUE)) + + # Merge datasets that are different: example_dataset + awd_pilot2 + x <- example_dataset() + x$name <- "x" + y_url <- "https://ipt.nlbif.nl/archive.do?r=awd_pilot2" + zip_file <- file.path(temp_dir, "dataset.zip") + datapackage_file <- file.path(temp_dir, "datapackage.json") + download.file(y_url, zip_file, mode = "wb", quiet = TRUE) + unzip(zip_file, exdir = temp_dir) + y <- read_camtrapdp(datapackage_file) + xy <- merge_camtrapdp(x, y) + xy$created <- NULL + + # Write to file + temp_dir_merged <- file.path(temp_dir, "merged") + write_camtrapdp(xy, temp_dir_merged) + file.rename( + file.path(temp_dir_merged, "datapackage.json"), + file.path(temp_dir_merged, "datapackage_different_xy.json") + ) + + expect_snapshot_file( + file.path(temp_dir_merged, "datapackage_different_xy.json") + ) + expect_no_error( + read_camtrapdp(file.path(temp_dir_merged, "datapackage_different_xy.json")) + ) +}) + +test_that("merge_camtrapdp() adds the DOI of the original datasets as related + identifiers", { + skip_if_offline() + + # Merge datasets that are identical except for name and DOI + x <- example_dataset() + x$name <- "x" + x$id <- "https://doi.org/x" + y <- x + y$name <- "y" + y$id <- "http://doi.org/y" + xy <- merge_camtrapdp(x, y) + + expect_identical( + xy$relatedIdentifiers[[3]], # 1 and 2 were present in x and y (identical) + list( + relationType = "isDerivedFrom", + relatedIdentifier = "https://doi.org/x", + resourceTypeGeneral = "Dataset", + relatedIdentifierType = "DOI" + ) + ) + expect_identical( + xy$relatedIdentifiers[[4]], + list( + relationType = "isDerivedFrom", + relatedIdentifier = "http://doi.org/y", + resourceTypeGeneral = "Dataset", + relatedIdentifierType = "DOI" + ) + ) +}) diff --git a/tests/testthat/test-print.R b/tests/testthat/test-print.R index 283a887f..a8ee4e70 100644 --- a/tests/testthat/test-print.R +++ b/tests/testthat/test-print.R @@ -16,7 +16,7 @@ test_that("print() informs about the number of tables, their rows and unclass()" expect_output( print(x_no_additional), regexp = paste( - "A Camera Trap Data Package with 3 tables:", + "A Camera Trap Data Package \"camtrap-dp-example-dataset\" with 3 tables:", "* deployments: 4 rows", "* media: 423 rows", "* observations: 549 rows", diff --git a/tests/testthat/test-write_camtrapdp.R b/tests/testthat/test-write_camtrapdp.R index d35ce671..a329a20b 100644 --- a/tests/testthat/test-write_camtrapdp.R +++ b/tests/testthat/test-write_camtrapdp.R @@ -17,7 +17,7 @@ test_that("write_camtrapdp() writes datapackage.json and CSV files to a test_that("write_camtrapdp() writes a (filtered) dataset that can be read", { skip_if_offline() x <- example_dataset() - temp_dir <- file.path(tempdir(), "package") + temp_dir <- tempdir() on.exit(unlink(temp_dir, recursive = TRUE)) # Filter deployments and write to disk write_camtrapdp(filter_deployments(x, deploymentID == "00a2c20d"), temp_dir) @@ -32,7 +32,7 @@ test_that("write_camtrapdp() writes a (filtered) dataset that can be read", { test_that("write_camtrapdp() writes the unaltered example dataset as is", { skip_if_offline() x <- example_dataset() - temp_dir <- file.path(tempdir(), "package") + temp_dir <- tempdir() on.exit(unlink(temp_dir, recursive = TRUE)) write_camtrapdp(x, temp_dir) @@ -81,7 +81,7 @@ test_that("write_camtrapdp() writes the unaltered example dataset as is", { test_that("write_camtrapdp() can write compressed files", { skip_if_offline() x <- example_dataset() - temp_dir <- file.path(tempdir(), "package") + temp_dir <- tempdir() on.exit(unlink(temp_dir, recursive = TRUE)) write_camtrapdp(x, temp_dir, compress = TRUE) diff --git a/tests/testthat/test-write_dwc.R b/tests/testthat/test-write_dwc.R index 3c985659..d09e706e 100644 --- a/tests/testthat/test-write_dwc.R +++ b/tests/testthat/test-write_dwc.R @@ -19,7 +19,7 @@ test_that("write_dwc() writes CSV and meta.xml files to a directory and test_that("write_dwc() returns the expected Darwin Core terms as columns", { skip_if_offline() x <- example_dataset() - temp_dir <- file.path(tempdir(), "dwc") + temp_dir <- tempdir() on.exit(unlink(temp_dir, recursive = TRUE)) result <- suppressMessages(write_dwc(x, temp_dir)) @@ -90,7 +90,7 @@ test_that("write_dwc() returns the expected Darwin Core mapping for the example dataset", { skip_if_offline() x <- example_dataset() - temp_dir <- file.path(tempdir(), "dwc") + temp_dir <- tempdir() on.exit(unlink(temp_dir, recursive = TRUE)) suppressMessages(write_dwc(x, temp_dir)) @@ -102,7 +102,7 @@ test_that("write_dwc() returns the expected Darwin Core mapping for the example test_that("write_dwc() returns files that comply with the info in meta.xml", { skip_if_offline() x <- example_dataset() - temp_dir <- file.path(tempdir(), "dwc") + temp_dir <- tempdir() on.exit(unlink(temp_dir, recursive = TRUE)) suppressMessages(write_dwc(x, temp_dir)) @@ -117,7 +117,7 @@ test_that("write_dwc() returns output when taxonID is missing", { optional_cols <- c("taxon.taxonID") observations(x) <- dplyr::select(observations(x), -dplyr::all_of(optional_cols)) - temp_dir <- file.path(tempdir(), "dwc") + temp_dir <- tempdir() on.exit(unlink(temp_dir, recursive = TRUE)) expect_no_error(suppressMessages(write_dwc(x, temp_dir))) diff --git a/tests/testthat/test-write_eml.R b/tests/testthat/test-write_eml.R index f40bfb49..e581946a 100644 --- a/tests/testthat/test-write_eml.R +++ b/tests/testthat/test-write_eml.R @@ -22,7 +22,7 @@ test_that("write_eml() returns the expected eml.xml file for the example dataset", { skip_if_offline() x <- example_dataset() - temp_dir <- file.path(tempdir(), "eml") + temp_dir <- tempdir() on.exit(unlink(temp_dir, recursive = TRUE)) result <- suppressMessages(write_eml(x, temp_dir)) result$packageId <- "random_uuid" # Overwrite generated UUID @@ -36,7 +36,7 @@ test_that("write_eml() returns the expected eml.xml file for the example test_that("write_eml() supports disabling the derived paragraph", { skip_if_offline() x <- example_dataset() - temp_dir <- file.path(tempdir(), "eml") + temp_dir <- tempdir() on.exit(unlink(temp_dir, recursive = TRUE)) result <- suppressMessages(write_eml(x, temp_dir, derived_paragraph = FALSE)) @@ -49,7 +49,7 @@ test_that("write_eml() sets contact/metadata provider to first creator if none x <- example_dataset() # Remove contributor with role == contact x$contributors[[4]] <- NULL - temp_dir <- file.path(tempdir(), "eml") + temp_dir <- tempdir() on.exit(unlink(temp_dir, recursive = TRUE)) result <- suppressMessages(write_eml(x, temp_dir))