From ffa9ee6ae159bce34825f0be5b662f47044a76bd Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Tue, 23 Jul 2024 17:07:51 +0200 Subject: [PATCH 001/142] new function merge_camtrapdp() --- R/merge_camtrapdp.R | 72 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 R/merge_camtrapdp.R diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R new file mode 100644 index 00000000..15d43446 --- /dev/null +++ b/R/merge_camtrapdp.R @@ -0,0 +1,72 @@ +#' Merge camtrapdp packages +#' +#' @param x1 +#' @param x2 +#' @param name +#' @param title +#' @return `x` +#' @export +#' @examples +#' x <- example_dataset() %>% +#' filter_deployments(deploymentID %in% c("00a2c20d", "29b7d356")) +#' x2 <- example_dataset() %>% +#' filter_deployments(deploymentID %in% c("577b543a", "62c200a9")) +#' x_merged <- merge_camtrapdp(x1, x2, "new package name", "new title") +merge_camtrapdp <- function(x1, x2, name, title) { + check_camtrapdp(x1) + check_camtrapdp(x2) + + x <- x1 + + # check duplicated ID's + + + # merge resources + deployments(x) <- dplyr::bind_rows(deployments(x1), deployments(x2)) + media(x) <- dplyr::bind_rows(media(x1), media(x2)) + observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) + + # merge/update mtadata + x$name <- name + x$id <- digest::digest(paste(x$title, x2$title), algo = "md5") + x$created <- format(Sys.time(), "%Y-%m-%dT%H:%M:%SZ") + x$title <- title + x$contributors <- c(x1$contributors, x2$contributors) + paragraph <- paste0( + "This dataset is a combination of 2 datasets: ", x1$title, "and", x2$title, + ".") + x$version <- "1.0" + x$keywords <- unique(x1$keywords, x2$keywords) + x$image <- NULL + x$homepage <- NULL + x$sources <- c(x1$sources, x2$sources) + x$licenses <- c(x1$licenses, x2$licences) + x$bibliographicCitation <- NULL + x$coordinatePrecision <- + max(x1$coordinatePrecision, x2$coordinatePrecision, na.rm = TRUE) + + relatedIdentifiers_x1 <- list( + relationType = "IsDerivedFrom", + relatedIdentifier = x1$id, + resourceTypeGeneral = "Data package", + relatedIdentifierType = "id" + ) + relatedIdentifiers_x2 <- list( + relationType = "IsDerivedFrom", + relatedIdentifier = x2$id, + resourceTypeGeneral = "Data package", + relatedIdentifierType = "id" + ) + new_relatedIdentifiers <- list(relatedIdentifiers_x1, relatedIdentifiers_x2) + x$relatedIdentifiers <- + c(x1$relatedIdentifiers, x2$relatedIdentifiers, new_relatedIdentifiers) + + x$references <- c(x1$references, x2$references) + + x %>% + update_spatial(x) %>% + update_temporal() %>% + update_taxonomic() + + return(x) +} From 0b677c59731669c82606d72dc95bdf1b176eb704 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Wed, 24 Jul 2024 17:12:53 +0200 Subject: [PATCH 002/142] Create test-merge_camtrapdp.R --- tests/testthat/test-merge_camtrapdp.R | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 tests/testthat/test-merge_camtrapdp.R diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R new file mode 100644 index 00000000..3d3f876b --- /dev/null +++ b/tests/testthat/test-merge_camtrapdp.R @@ -0,0 +1,10 @@ +test_that("merge_camtrapdp() returns a valid camtrapdp object", { + skip_if_offline() + x1 <- example_dataset() %>% + filter_deployments(deploymentID %in% c("00a2c20d", "29b7d356")) + x2 <- example_dataset() %>% + filter_deployments(deploymentID %in% c("577b543a", "62c200a9")) + expect_no_error( + check_camtrapdp(merge_camtrapdp(x1, x2, "new package name", "new title")) + ) +}) From 10d9227d3a0b4e6dbd60a04ff87ae1d75177add5 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Wed, 24 Jul 2024 17:29:03 +0200 Subject: [PATCH 003/142] document() --- NAMESPACE | 1 + man/merge_camtrapdp.Rd | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 man/merge_camtrapdp.Rd diff --git a/NAMESPACE b/NAMESPACE index 60af482d..8105dac0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,6 +14,7 @@ export(filter_media) export(filter_observations) export(locations) export(media) +export(merge_camtrapdp) export(observations) export(read_camtrapdp) export(round_coordinates) diff --git a/man/merge_camtrapdp.Rd b/man/merge_camtrapdp.Rd new file mode 100644 index 00000000..d08cd28a --- /dev/null +++ b/man/merge_camtrapdp.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/merge_camtrapdp.R +\name{merge_camtrapdp} +\alias{merge_camtrapdp} +\title{Merge camtrapdp packages} +\usage{ +merge_camtrapdp(x1, x2, name, title) +} +\value{ +\code{x} +} +\description{ +Merge camtrapdp packages +} +\examples{ +x1 <- example_dataset() \%>\% + filter_deployments(deploymentID \%in\% c("00a2c20d", "29b7d356")) +x2 <- example_dataset() \%>\% + filter_deployments(deploymentID \%in\% c("577b543a", "62c200a9")) +x_merged <- merge_camtrapdp(x1, x2, "new package name", "new title") +} From 04afa530f3921501927bb50efcd98d61693c6fd7 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Wed, 24 Jul 2024 17:33:55 +0200 Subject: [PATCH 004/142] add test "merge_camtrapdp() returns no duplicated deploymentID's" --- tests/testthat/test-merge_camtrapdp.R | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index 3d3f876b..505b0761 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -8,3 +8,20 @@ test_that("merge_camtrapdp() returns a valid camtrapdp object", { check_camtrapdp(merge_camtrapdp(x1, x2, "new package name", "new title")) ) }) + +test_that("merge_camtrapdp() returns no duplicated deploymentID's", { + skip_if_offline() + x1 <- example_dataset() + x2 <- example_dataset() %>% + filter_deployments(deploymentID %in% c("00a2c20d")) + original_deploymentIDs <- c( + purrr::pluck(deployments(x1), "deploymentID"), + purrr::pluck(deployments(x2), "deploymentID") + ) + x_merged <- + check_camtrapdp(merge_camtrapdp(x1, x1, "new package name", "new title")) + new_deploymentIDs <- purrr::pluck(deployments(x_merged), "deploymentID") + + expect_true(any(duplicated(original_deploymentIDs))) + expect_false(any(duplicated(new_deploymentIDs))) +}) From 2df51625e7654755a4648f4c73d635489087c543 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Thu, 25 Jul 2024 09:29:42 +0200 Subject: [PATCH 005/142] fix typo --- tests/testthat/test-merge_camtrapdp.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index 505b0761..b02c27f9 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -18,8 +18,7 @@ test_that("merge_camtrapdp() returns no duplicated deploymentID's", { purrr::pluck(deployments(x1), "deploymentID"), purrr::pluck(deployments(x2), "deploymentID") ) - x_merged <- - check_camtrapdp(merge_camtrapdp(x1, x1, "new package name", "new title")) + x_merged <- merge_camtrapdp(x1, x1, "new package name", "new title") new_deploymentIDs <- purrr::pluck(deployments(x_merged), "deploymentID") expect_true(any(duplicated(original_deploymentIDs))) From d8f6f0a772959b3932aaf5209e7a92a99856730b Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Thu, 25 Jul 2024 09:35:45 +0200 Subject: [PATCH 006/142] import digest --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index 2e531084..8caf006f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,6 +26,7 @@ URL: https://github.com/inbo/camtrapdp, https://inbo.github.io/camtrapdp/ BugReports: https://github.com/inbo/camtrapdp/issues Imports: cli, + digest, dplyr, EML, frictionless (>= 1.1.0), From 2ebdcc0d59e49c393017ffb62056a62704eaac70 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Thu, 25 Jul 2024 12:45:16 +0200 Subject: [PATCH 007/142] update test --- tests/testthat/test-merge_camtrapdp.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index b02c27f9..743a9cd3 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -11,9 +11,10 @@ test_that("merge_camtrapdp() returns a valid camtrapdp object", { test_that("merge_camtrapdp() returns no duplicated deploymentID's", { skip_if_offline() - x1 <- example_dataset() + x1 <- example_dataset() %>% + filter_deployments(deploymentID %in% c("00a2c20d", "29b7d356", "577b543a")) x2 <- example_dataset() %>% - filter_deployments(deploymentID %in% c("00a2c20d")) + filter_deployments(deploymentID %in% c("00a2c20d", "577b543a", "62c200a9")) original_deploymentIDs <- c( purrr::pluck(deployments(x1), "deploymentID"), purrr::pluck(deployments(x2), "deploymentID") From bcea3dc606aca596dbf14fe90e143dca43495774 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Thu, 25 Jul 2024 12:45:37 +0200 Subject: [PATCH 008/142] give unique deploymentIDs to deployments --- R/merge_camtrapdp.R | 100 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 93 insertions(+), 7 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 15d43446..95a66b4d 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -7,7 +7,7 @@ #' @return `x` #' @export #' @examples -#' x <- example_dataset() %>% +#' x1 <- example_dataset() %>% #' filter_deployments(deploymentID %in% c("00a2c20d", "29b7d356")) #' x2 <- example_dataset() %>% #' filter_deployments(deploymentID %in% c("577b543a", "62c200a9")) @@ -15,18 +15,104 @@ merge_camtrapdp <- function(x1, x2, name, title) { check_camtrapdp(x1) check_camtrapdp(x2) - x <- x1 - # check duplicated ID's - - # merge resources deployments(x) <- dplyr::bind_rows(deployments(x1), deployments(x2)) media(x) <- dplyr::bind_rows(media(x1), media(x2)) observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) - # merge/update mtadata + # check duplicated ID's + deploymentIDs <- purrr::pluck(deployments(x), "deploymentID") + observationIDs <- purrr::pluck(observations(x), "observationsID") + mediaIDs <- purrr::pluck(media(x), "mediaID") + + # set a vectorised function for creating hash function digests + vdigest_algo_crc32 <- digest::getVDigest(algo = "crc32") + + # assume duplicates are between packages, not within + if (any(duplicated(deploymentIDs))) { + duplicatedID <- deploymentIDs[duplicated(deploymentIDs)] + + # give unique deploymentIDs to deployments + deployments(x2) <- + deployments(x2) %>% + dplyr::mutate( + deploymentID = + dplyr::if_else( + .data$deploymentID %in% duplicatedID, + vdigest_algo_crc32(.data$deploymentID), + .data$deploymentID + ) + ) + + # give unique deploymentIDs to observations + observations(x2) <- + observations(x2) %>% + dplyr::mutate( + deploymentID = + dplyr::if_else( + .data$deploymentID %in% duplicatedID, + vdigest_algo_crc32(.data$deploymentID), + .data$deploymentID + ) + ) + + # give unique deploymentIDs to media + media(x2) <- + media(x2) %>% + dplyr::mutate( + deploymentID = + dplyr::if_else( + .data$deploymentID %in% duplicatedID, + vdigest_algo_crc32(.data$deploymentID), + .data$deploymentID + ) + ) + + new_deploymentIDs <- purrr::pluck(deployments(x), "deploymentID") + + # new merge with unique deploymentID's + deployments(x) <- dplyr::bind_rows(deployments(x1), deployments(x2)) + media(x) <- dplyr::bind_rows(media(x1), media(x2)) + observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) + + # inform user + cli::cli_alert_warning( + c( + paste0( + "{.arg x1} and {.arg x2} have duplicated deploymentID's:", + "{.val {duplicatedID}}" + ), + "v" = paste0( + "Duplicated deploymentID's of {.arg x2} now have new uniqe", + "deploymentID's: {.val {new_deploymentIDs}}" + ) + ), + class = "camtrapdp_warning_unique_deploymentID" + ) + + } + + if (any(duplicated(observationIDs))) { + duplicatedID <- observationIDs[duplicated(observationIDs)] + + cli::cli_alert_warning( + "message", + class = "camtrapdp_warning_unique_observationID" + ) + } + + if (any(duplicated(mediaIDs))) { + duplicatedID <- mediaIDs[duplicated(mediaIDs)] + + cli::cli_alert_warning( + "message", + class = "camtrapdp_warning_unique_mediaID" + ) + } + + # merge/update metadata x$name <- name x$id <- digest::digest(paste(x$title, x2$title), algo = "md5") x$created <- format(Sys.time(), "%Y-%m-%dT%H:%M:%SZ") @@ -63,7 +149,7 @@ merge_camtrapdp <- function(x1, x2, name, title) { x$references <- c(x1$references, x2$references) - x %>% + x <- update_spatial(x) %>% update_temporal() %>% update_taxonomic() From 82fa0576c71dc52a81712623d5effc739e88edf0 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Thu, 25 Jul 2024 15:17:15 +0200 Subject: [PATCH 009/142] set unique mediaID's and observationID's --- R/merge_camtrapdp.R | 123 +++++++++++++++++++++++++++++++++----------- 1 file changed, 93 insertions(+), 30 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 95a66b4d..fbf160ea 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -22,29 +22,30 @@ merge_camtrapdp <- function(x1, x2, name, title) { media(x) <- dplyr::bind_rows(media(x1), media(x2)) observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) - # check duplicated ID's + # get ID's deploymentIDs <- purrr::pluck(deployments(x), "deploymentID") - observationIDs <- purrr::pluck(observations(x), "observationsID") mediaIDs <- purrr::pluck(media(x), "mediaID") + observationIDs <- purrr::pluck(observations(x), "observationID") # set a vectorised function for creating hash function digests vdigest_algo_crc32 <- digest::getVDigest(algo = "crc32") # assume duplicates are between packages, not within + # set unique deploymentID's if (any(duplicated(deploymentIDs))) { - duplicatedID <- deploymentIDs[duplicated(deploymentIDs)] + duplicated_deploymentID <- deploymentIDs[duplicated(deploymentIDs)] # give unique deploymentIDs to deployments deployments(x2) <- deployments(x2) %>% - dplyr::mutate( - deploymentID = - dplyr::if_else( - .data$deploymentID %in% duplicatedID, - vdigest_algo_crc32(.data$deploymentID), - .data$deploymentID - ) - ) + dplyr::mutate( + deploymentID = + dplyr::if_else( + .data$deploymentID %in% duplicated_deploymentID, + vdigest_algo_crc32(.data$deploymentID), + .data$deploymentID + ) + ) # give unique deploymentIDs to observations observations(x2) <- @@ -52,7 +53,7 @@ merge_camtrapdp <- function(x1, x2, name, title) { dplyr::mutate( deploymentID = dplyr::if_else( - .data$deploymentID %in% duplicatedID, + .data$deploymentID %in% duplicated_deploymentID, vdigest_algo_crc32(.data$deploymentID), .data$deploymentID ) @@ -64,51 +65,113 @@ merge_camtrapdp <- function(x1, x2, name, title) { dplyr::mutate( deploymentID = dplyr::if_else( - .data$deploymentID %in% duplicatedID, + .data$deploymentID %in% duplicated_deploymentID, vdigest_algo_crc32(.data$deploymentID), .data$deploymentID ) ) - new_deploymentIDs <- purrr::pluck(deployments(x), "deploymentID") - # new merge with unique deploymentID's deployments(x) <- dplyr::bind_rows(deployments(x1), deployments(x2)) media(x) <- dplyr::bind_rows(media(x1), media(x2)) observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) # inform user + new_deploymentIDs <- vdigest_algo_crc32(duplicated_deploymentID) cli::cli_alert_warning( c( - paste0( + paste( + "{.arg x1} and {.arg x2} must have unique deploymentID's.\n", "{.arg x1} and {.arg x2} have duplicated deploymentID's:", - "{.val {duplicatedID}}" - ), - "v" = paste0( - "Duplicated deploymentID's of {.arg x2} now have new uniqe", - "deploymentID's: {.val {new_deploymentIDs}}" + "{.val {duplicated_deploymentID}}.\n", + "Duplicated deploymentID's of {.arg x2} are now replaced by ", + "{.val {new_deploymentIDs}} respectively." ) ), class = "camtrapdp_warning_unique_deploymentID" ) - } - if (any(duplicated(observationIDs))) { - duplicatedID <- observationIDs[duplicated(observationIDs)] + # set unique mediaID's + if (any(duplicated(mediaIDs))) { + duplicated_mediaID <- mediaIDs[duplicated(mediaIDs)] + + # give unique mediaDs to media + media(x2) <- + media(x2) %>% + dplyr::mutate( + mediaID = + dplyr::if_else( + .data$mediaID %in% duplicated_mediaID, + vdigest_algo_crc32(.data$mediaID), + .data$mediaID + ) + ) + + # give unique mediaDs to observations + observations(x2) <- + observations(x2) %>% + dplyr::mutate( + mediaID = + dplyr::if_else( + .data$mediaID %in% duplicated_mediaID, + vdigest_algo_crc32(.data$mediaID), + .data$mediaID + ) + ) + # new merge with unique mediaID's + media(x) <- dplyr::bind_rows(media(x1), media(x2)) + observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) + + # inform user + new_mediaIDs <- vdigest_algo_crc32(duplicated_mediaID) cli::cli_alert_warning( - "message", - class = "camtrapdp_warning_unique_observationID" + c( + paste( + "{.arg x1} and {.arg x2} must have unique mediaID's.\n", + "{.arg x1} and {.arg x2} have duplicated mediaID's:", + "{.val {duplicated_mediaID}}.\n", + "Duplicated observationID's of {.arg x2} are now replaced by ", + "{.val {new_mediaIDs}} respectively." + ) + ), + class = "camtrapdp_warning_unique_mediaID" ) } - if (any(duplicated(mediaIDs))) { - duplicatedID <- mediaIDs[duplicated(mediaIDs)] + # set unique observationID's + if (any(duplicated(observationIDs))) { + duplicated_observationID <- observationIDs[duplicated(observationIDs)] + + # give unique observationIDs to observations + observations(x2) <- + observations(x2) %>% + dplyr::mutate( + observationID = + dplyr::if_else( + .data$observationID %in% duplicated_observationID, + vdigest_algo_crc32(.data$observationID), + .data$observationID + ) + ) + + # new merge with unique observationID's + observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) + # inform user + new_observationIDs <- vdigest_algo_crc32(duplicated_observationID) cli::cli_alert_warning( - "message", - class = "camtrapdp_warning_unique_mediaID" + c( + paste( + "{.arg x1} and {.arg x2} must have unique observationID's.\n", + "{.arg x1} and {.arg x2} have duplicated observationID's:", + "{.val {duplicated_observationID}}.\n", + "Duplicated observationID's of {.arg x2} are now replaced by ", + "{.val {new_observationIDs}} respectively." + ) + ), + class = "camtrapdp_warning_unique_observationID" ) } From 429f2010b6eed9e1d88f511ff4bdbcfac7cd7bdc Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Thu, 25 Jul 2024 15:21:43 +0200 Subject: [PATCH 010/142] update comment --- R/merge_camtrapdp.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index fbf160ea..155a43c3 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -30,7 +30,6 @@ merge_camtrapdp <- function(x1, x2, name, title) { # set a vectorised function for creating hash function digests vdigest_algo_crc32 <- digest::getVDigest(algo = "crc32") - # assume duplicates are between packages, not within # set unique deploymentID's if (any(duplicated(deploymentIDs))) { duplicated_deploymentID <- deploymentIDs[duplicated(deploymentIDs)] From 8ce8ad23a0131edbcf49c9ceb7c94dd7216ee020 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Thu, 25 Jul 2024 15:35:32 +0200 Subject: [PATCH 011/142] update documentation --- R/merge_camtrapdp.R | 13 ++++++++----- man/merge_camtrapdp.Rd | 21 +++++++++++++++++++-- man/round_coordinates.Rd | 1 + man/write_dwc.Rd | 1 + man/write_eml.Rd | 1 + 5 files changed, 30 insertions(+), 7 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 155a43c3..587f83bf 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -1,10 +1,13 @@ -#' Merge camtrapdp packages +#' Merge Camera Trap Data packages #' -#' @param x1 -#' @param x2 -#' @param name -#' @param title +#' @param x1,x2 Camera Trap Data Package objects (as returned by +#' `read_camtrapdp()`), to be coerced to one. +#' @param name A short url-usable (and preferably human-readable) name for this +#' merged package. +#' @param title A string providing a title or one sentence description for this +#' merged package. #' @return `x` +#' @family transformation functions #' @export #' @examples #' x1 <- example_dataset() %>% diff --git a/man/merge_camtrapdp.Rd b/man/merge_camtrapdp.Rd index d08cd28a..a544494a 100644 --- a/man/merge_camtrapdp.Rd +++ b/man/merge_camtrapdp.Rd @@ -2,15 +2,25 @@ % Please edit documentation in R/merge_camtrapdp.R \name{merge_camtrapdp} \alias{merge_camtrapdp} -\title{Merge camtrapdp packages} +\title{Merge Camera Trap Data packages} \usage{ merge_camtrapdp(x1, x2, name, title) } +\arguments{ +\item{x1, x2}{Camera Trap Data Package objects (as returned by +\code{read_camtrapdp()}), to be coerced to one.} + +\item{name}{A short url-usable (and preferably human-readable) name for this +merged package.} + +\item{title}{A string providing a title or one sentence description for this +merged package.} +} \value{ \code{x} } \description{ -Merge camtrapdp packages +Merge Camera Trap Data packages } \examples{ x1 <- example_dataset() \%>\% @@ -19,3 +29,10 @@ x2 <- example_dataset() \%>\% filter_deployments(deploymentID \%in\% c("577b543a", "62c200a9")) x_merged <- merge_camtrapdp(x1, x2, "new package name", "new title") } +\seealso{ +Other transformation functions: +\code{\link{round_coordinates}()}, +\code{\link{write_dwc}()}, +\code{\link{write_eml}()} +} +\concept{transformation functions} diff --git a/man/round_coordinates.Rd b/man/round_coordinates.Rd index aa7c22a2..b873ec28 100644 --- a/man/round_coordinates.Rd +++ b/man/round_coordinates.Rd @@ -88,6 +88,7 @@ deployments(x_rounded)[c("latitude", "longitude", "coordinateUncertainty")] } \seealso{ Other transformation functions: +\code{\link{merge_camtrapdp}()}, \code{\link{write_dwc}()}, \code{\link{write_eml}()} } diff --git a/man/write_dwc.Rd b/man/write_dwc.Rd index 85d70d27..8dff2fe8 100644 --- a/man/write_dwc.Rd +++ b/man/write_dwc.Rd @@ -68,6 +68,7 @@ unlink("my_directory", recursive = TRUE) } \seealso{ Other transformation functions: +\code{\link{merge_camtrapdp}()}, \code{\link{round_coordinates}()}, \code{\link{write_eml}()} } diff --git a/man/write_eml.Rd b/man/write_eml.Rd index 0f18c49f..23782384 100644 --- a/man/write_eml.Rd +++ b/man/write_eml.Rd @@ -73,6 +73,7 @@ unlink("my_directory", recursive = TRUE) } \seealso{ Other transformation functions: +\code{\link{merge_camtrapdp}()}, \code{\link{round_coordinates}()}, \code{\link{write_dwc}()} } From 9297cd9ee830708782119f4b395d11c3653d9e64 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Thu, 25 Jul 2024 15:44:26 +0200 Subject: [PATCH 012/142] update examples --- R/merge_camtrapdp.R | 2 +- man/merge_camtrapdp.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 587f83bf..05a7d718 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -14,7 +14,7 @@ #' filter_deployments(deploymentID %in% c("00a2c20d", "29b7d356")) #' x2 <- example_dataset() %>% #' filter_deployments(deploymentID %in% c("577b543a", "62c200a9")) -#' x_merged <- merge_camtrapdp(x1, x2, "new package name", "new title") +#' x_merged <- merge_camtrapdp(x1, x2, "new_package_name", "New title") merge_camtrapdp <- function(x1, x2, name, title) { check_camtrapdp(x1) check_camtrapdp(x2) diff --git a/man/merge_camtrapdp.Rd b/man/merge_camtrapdp.Rd index a544494a..3b9cb5e4 100644 --- a/man/merge_camtrapdp.Rd +++ b/man/merge_camtrapdp.Rd @@ -27,7 +27,7 @@ x1 <- example_dataset() \%>\% filter_deployments(deploymentID \%in\% c("00a2c20d", "29b7d356")) x2 <- example_dataset() \%>\% filter_deployments(deploymentID \%in\% c("577b543a", "62c200a9")) -x_merged <- merge_camtrapdp(x1, x2, "new package name", "new title") +x_merged <- merge_camtrapdp(x1, x2, "new_package_name", "New title") } \seealso{ Other transformation functions: From fefac698e9015e93d8fb53d3bddf17126b89e931 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Thu, 25 Jul 2024 15:51:50 +0200 Subject: [PATCH 013/142] correct typo --- R/merge_camtrapdp.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 05a7d718..132f0d3e 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -134,7 +134,7 @@ merge_camtrapdp <- function(x1, x2, name, title) { "{.arg x1} and {.arg x2} must have unique mediaID's.\n", "{.arg x1} and {.arg x2} have duplicated mediaID's:", "{.val {duplicated_mediaID}}.\n", - "Duplicated observationID's of {.arg x2} are now replaced by ", + "Duplicated mediaID's of {.arg x2} are now replaced by ", "{.val {new_mediaIDs}} respectively." ) ), From e80eb4989986863ebf8633de77a12e396973ec54 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Thu, 25 Jul 2024 16:03:35 +0200 Subject: [PATCH 014/142] delete space in messages --- R/merge_camtrapdp.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 132f0d3e..45fedc5f 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -86,7 +86,7 @@ merge_camtrapdp <- function(x1, x2, name, title) { "{.arg x1} and {.arg x2} must have unique deploymentID's.\n", "{.arg x1} and {.arg x2} have duplicated deploymentID's:", "{.val {duplicated_deploymentID}}.\n", - "Duplicated deploymentID's of {.arg x2} are now replaced by ", + "Duplicated deploymentID's of {.arg x2} are now replaced by", "{.val {new_deploymentIDs}} respectively." ) ), @@ -134,7 +134,7 @@ merge_camtrapdp <- function(x1, x2, name, title) { "{.arg x1} and {.arg x2} must have unique mediaID's.\n", "{.arg x1} and {.arg x2} have duplicated mediaID's:", "{.val {duplicated_mediaID}}.\n", - "Duplicated mediaID's of {.arg x2} are now replaced by ", + "Duplicated mediaID's of {.arg x2} are now replaced by", "{.val {new_mediaIDs}} respectively." ) ), @@ -169,7 +169,7 @@ merge_camtrapdp <- function(x1, x2, name, title) { "{.arg x1} and {.arg x2} must have unique observationID's.\n", "{.arg x1} and {.arg x2} have duplicated observationID's:", "{.val {duplicated_observationID}}.\n", - "Duplicated observationID's of {.arg x2} are now replaced by ", + "Duplicated observationID's of {.arg x2} are now replaced by", "{.val {new_observationIDs}} respectively." ) ), From 298b459b316948f6bdf3ad48363893bfc1436c4f Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Thu, 25 Jul 2024 16:17:02 +0200 Subject: [PATCH 015/142] test merge_camtrapdp() returns message when ID's are replaced --- tests/testthat/test-merge_camtrapdp.R | 33 +++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index 743a9cd3..c04602e2 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -5,7 +5,11 @@ test_that("merge_camtrapdp() returns a valid camtrapdp object", { x2 <- example_dataset() %>% filter_deployments(deploymentID %in% c("577b543a", "62c200a9")) expect_no_error( - check_camtrapdp(merge_camtrapdp(x1, x2, "new package name", "new title")) + suppressMessages( + check_camtrapdp( + merge_camtrapdp(x1, x2, "new_package_name", "new title") + ) + ) ) }) @@ -19,9 +23,34 @@ test_that("merge_camtrapdp() returns no duplicated deploymentID's", { purrr::pluck(deployments(x1), "deploymentID"), purrr::pluck(deployments(x2), "deploymentID") ) - x_merged <- merge_camtrapdp(x1, x1, "new package name", "new title") + x_merged <- suppressMessages( + merge_camtrapdp(x1, x2, "new_package_name", "new title") + ) new_deploymentIDs <- purrr::pluck(deployments(x_merged), "deploymentID") expect_true(any(duplicated(original_deploymentIDs))) expect_false(any(duplicated(new_deploymentIDs))) }) + +test_that("merge_camtrapdp() returns message when ID's are replaced", { + skip_if_offline() + x1 <- example_dataset() + x2 <- example_dataset() %>% + filter_deployments(deploymentID %in% c("62c200a9")) %>% + filter_media(mediaID %in% c("fb58a2b9", "0bb2566e", "a6a7a04c")) + + expect_message( + merge_camtrapdp(x1, x2, "new_package_name", "new title") + # regexp = paste( + # "! `x1` and `x2` must have unique deploymentID's.", + # "`x1` and `x2` have duplicated deploymentID's: \"62c200a9\".", + # "Duplicated deploymentID's of `x2` are now replaced by \"07ce6950\" respectively.", + # "! `x1` and `x2` must have unique mediaID's.", + # "`x1` and `x2` have duplicated mediaID's: \"fb58a2b9\", \"0bb2566e\", and \"a6a7a04c\".", + # "Duplicated mediaID's of `x2` are now replaced by \"ba426f00\", \"8d5c0009\", and \"1689e0db\" respectively.", + # "! `x1` and `x2` must have unique observationID's.", + # "`x1` and `x2` have duplicated observationID's: \"a0431321\", \"fb58a2b9_1\", \"0bb2566e_1\", and \"a6a7a04c_1\".", + # "Duplicated observationID's of `x2` are now replaced by \"c6eeccc0\", \"a8452c14\", \"a48adc8a\", and \"b78e02ba\" respectively." + # ) + ) +}) From 61bb20db373d3dcb89c92c8061c830acd40e94a8 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 26 Jul 2024 09:43:14 +0200 Subject: [PATCH 016/142] update test "merge_camtrapdp() returns unique deplpymentID's, mediaID's and observationID's" --- tests/testthat/test-merge_camtrapdp.R | 49 ++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index c04602e2..af29ebce 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -13,23 +13,64 @@ test_that("merge_camtrapdp() returns a valid camtrapdp object", { ) }) -test_that("merge_camtrapdp() returns no duplicated deploymentID's", { +test_that("merge_camtrapdp() returns unique deplpymentID's, mediaID's and + observationID's", { skip_if_offline() + duplicated_deploymentID <- "00a2c20d" + duplicated_mediaID <- "ca3ff293" x1 <- example_dataset() %>% - filter_deployments(deploymentID %in% c("00a2c20d", "29b7d356", "577b543a")) + filter_deployments(deploymentID %in% c(duplicated_deploymentID, "29b7d356")) x2 <- example_dataset() %>% - filter_deployments(deploymentID %in% c("00a2c20d", "577b543a", "62c200a9")) + filter_deployments(deploymentID %in% c(duplicated_deploymentID, "62c200a9")) %>% + filter_media(mediaID %in% c(duplicated_mediaID, "bf610120")) + + # get original IDs original_deploymentIDs <- c( purrr::pluck(deployments(x1), "deploymentID"), purrr::pluck(deployments(x2), "deploymentID") ) + original_mediaIDs <- c( + purrr::pluck(media(x1), "mediaID"), + purrr::pluck(media(x2), "mediaID") + ) + original_observationIDs <- c( + purrr::pluck(observations(x1), "observationID"), + purrr::pluck(observations(x2), "observationID") + ) + duplicated_observationID <- + original_observationIDs[duplicated(original_observationIDs)] + + # merge x_merged <- suppressMessages( merge_camtrapdp(x1, x2, "new_package_name", "new title") ) + + # get new IDs new_deploymentIDs <- purrr::pluck(deployments(x_merged), "deploymentID") + new_mediaIDs <- purrr::pluck(media(x_merged), "mediaID") + new_observationIDs <- purrr::pluck(observations(x_merged), "observationID") + + # set a vectorised function for creating hash function digests + vdigest_algo_crc32 <- digest::getVDigest(algo = "crc32") + # tests expect_true(any(duplicated(original_deploymentIDs))) expect_false(any(duplicated(new_deploymentIDs))) + expect_true(vdigest_algo_crc32(duplicated_deploymentID) %in% new_deploymentIDs) + expect_identical( + c(duplicated_deploymentID, "29b7d356", "77b0e58b", "62c200a9"), + new_deploymentIDs + ) + + expect_true(any(duplicated(original_mediaIDs))) + expect_false(any(duplicated(new_mediaIDs))) + expect_true(vdigest_algo_crc32(duplicated_mediaID) %in% new_mediaIDs) + + expect_true(any(duplicated(original_observationIDs))) + expect_false(any(duplicated(new_observationIDs))) + expect_true( + all(vdigest_algo_crc32(duplicated_observationID) %in% new_observationIDs) + ) }) test_that("merge_camtrapdp() returns message when ID's are replaced", { @@ -40,7 +81,7 @@ test_that("merge_camtrapdp() returns message when ID's are replaced", { filter_media(mediaID %in% c("fb58a2b9", "0bb2566e", "a6a7a04c")) expect_message( - merge_camtrapdp(x1, x2, "new_package_name", "new title") + merge_camtrapdp(x1, x2, "new_package_name", "new title") #, # regexp = paste( # "! `x1` and `x2` must have unique deploymentID's.", # "`x1` and `x2` have duplicated deploymentID's: \"62c200a9\".", From 53eadedf2adf77a2c915e6035cecee470f18ad97 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 26 Jul 2024 09:46:33 +0200 Subject: [PATCH 017/142] character limit --- tests/testthat/test-merge_camtrapdp.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index af29ebce..096608e4 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -21,7 +21,9 @@ test_that("merge_camtrapdp() returns unique deplpymentID's, mediaID's and x1 <- example_dataset() %>% filter_deployments(deploymentID %in% c(duplicated_deploymentID, "29b7d356")) x2 <- example_dataset() %>% - filter_deployments(deploymentID %in% c(duplicated_deploymentID, "62c200a9")) %>% + filter_deployments( + deploymentID %in% c(duplicated_deploymentID, "62c200a9") + ) %>% filter_media(mediaID %in% c(duplicated_mediaID, "bf610120")) # get original IDs @@ -56,7 +58,9 @@ test_that("merge_camtrapdp() returns unique deplpymentID's, mediaID's and # tests expect_true(any(duplicated(original_deploymentIDs))) expect_false(any(duplicated(new_deploymentIDs))) - expect_true(vdigest_algo_crc32(duplicated_deploymentID) %in% new_deploymentIDs) + expect_true( + vdigest_algo_crc32(duplicated_deploymentID) %in% new_deploymentIDs + ) expect_identical( c(duplicated_deploymentID, "29b7d356", "77b0e58b", "62c200a9"), new_deploymentIDs From 03051d9c5db1e7dfee7207bc7875806a06d0fff9 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 26 Jul 2024 11:17:24 +0200 Subject: [PATCH 018/142] create helper function `replace_duplicated_deploymentID()` --- R/merge_camtrapdp.R | 39 ++----------------------------- R/utils.R | 56 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 37 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 45fedc5f..16edab88 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -33,45 +33,10 @@ merge_camtrapdp <- function(x1, x2, name, title) { # set a vectorised function for creating hash function digests vdigest_algo_crc32 <- digest::getVDigest(algo = "crc32") - # set unique deploymentID's + # replace duplicated deploymentID's in x2 if (any(duplicated(deploymentIDs))) { duplicated_deploymentID <- deploymentIDs[duplicated(deploymentIDs)] - - # give unique deploymentIDs to deployments - deployments(x2) <- - deployments(x2) %>% - dplyr::mutate( - deploymentID = - dplyr::if_else( - .data$deploymentID %in% duplicated_deploymentID, - vdigest_algo_crc32(.data$deploymentID), - .data$deploymentID - ) - ) - - # give unique deploymentIDs to observations - observations(x2) <- - observations(x2) %>% - dplyr::mutate( - deploymentID = - dplyr::if_else( - .data$deploymentID %in% duplicated_deploymentID, - vdigest_algo_crc32(.data$deploymentID), - .data$deploymentID - ) - ) - - # give unique deploymentIDs to media - media(x2) <- - media(x2) %>% - dplyr::mutate( - deploymentID = - dplyr::if_else( - .data$deploymentID %in% duplicated_deploymentID, - vdigest_algo_crc32(.data$deploymentID), - .data$deploymentID - ) - ) + x2 <- replace_duplicated_deploymentID(x2, duplicated_deploymentID) # new merge with unique deploymentID's deployments(x) <- dplyr::bind_rows(deployments(x1), deployments(x2)) diff --git a/R/utils.R b/R/utils.R index 1bee2923..a83edee1 100644 --- a/R/utils.R +++ b/R/utils.R @@ -20,3 +20,59 @@ mutate_when_missing <- function(.data, ...) { } return(.data) } + +#' Replace duplicated deploymentID +#' +#' Replaces duplicated deploymentIDs with unique values in all resources, using +#' `vdigest_algo_crc32` to generate hashes. +#' +#' @inheritParams print.camtrapdp +#' @param duplicated_deploymentID DeploymentID's to be replaced. +#' @return `x` with unique deploymenID's +#' @family helper functions +#' @noRd +#' @examples +#' replace_duplicated_deploymentID(example_dataset()) +replace_duplicated_deploymentID <- function(x, duplicated_deploymentID) { + + # set a vectorised function for creating hash function digests + vdigest_algo_crc32 <- digest::getVDigest(algo = "crc32") + + # unique deploymentIDs to deployments + deployments(x) <- + deployments(x) %>% + dplyr::mutate( + deploymentID = + dplyr::if_else( + .data$deploymentID %in% duplicated_deploymentID, + vdigest_algo_crc32(.data$deploymentID), + .data$deploymentID + ) + ) + + # unique deploymentIDs in observations + observations(x) <- + observations(x) %>% + dplyr::mutate( + deploymentID = + dplyr::if_else( + .data$deploymentID %in% duplicated_deploymentID, + vdigest_algo_crc32(.data$deploymentID), + .data$deploymentID + ) + ) + + # unique deploymentIDs in media + media(x) <- + media(x) %>% + dplyr::mutate( + deploymentID = + dplyr::if_else( + .data$deploymentID %in% duplicated_deploymentID, + vdigest_algo_crc32(.data$deploymentID), + .data$deploymentID + ) + ) + + return(x) +} From af034cfc135485ba0a0608fbe8e56d930650445f Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 26 Jul 2024 11:22:06 +0200 Subject: [PATCH 019/142] create helper function `vdigest_crc32()` --- R/merge_camtrapdp.R | 15 ++++++--------- R/utils.R | 26 +++++++++++++++++++++----- tests/testthat/test-merge_camtrapdp.R | 9 +++------ 3 files changed, 30 insertions(+), 20 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 16edab88..a7eac81b 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -30,9 +30,6 @@ merge_camtrapdp <- function(x1, x2, name, title) { mediaIDs <- purrr::pluck(media(x), "mediaID") observationIDs <- purrr::pluck(observations(x), "observationID") - # set a vectorised function for creating hash function digests - vdigest_algo_crc32 <- digest::getVDigest(algo = "crc32") - # replace duplicated deploymentID's in x2 if (any(duplicated(deploymentIDs))) { duplicated_deploymentID <- deploymentIDs[duplicated(deploymentIDs)] @@ -44,7 +41,7 @@ merge_camtrapdp <- function(x1, x2, name, title) { observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) # inform user - new_deploymentIDs <- vdigest_algo_crc32(duplicated_deploymentID) + new_deploymentIDs <- vdigest_crc32(duplicated_deploymentID) cli::cli_alert_warning( c( paste( @@ -70,7 +67,7 @@ merge_camtrapdp <- function(x1, x2, name, title) { mediaID = dplyr::if_else( .data$mediaID %in% duplicated_mediaID, - vdigest_algo_crc32(.data$mediaID), + vdigest_crc32(.data$mediaID), .data$mediaID ) ) @@ -82,7 +79,7 @@ merge_camtrapdp <- function(x1, x2, name, title) { mediaID = dplyr::if_else( .data$mediaID %in% duplicated_mediaID, - vdigest_algo_crc32(.data$mediaID), + vdigest_crc32(.data$mediaID), .data$mediaID ) ) @@ -92,7 +89,7 @@ merge_camtrapdp <- function(x1, x2, name, title) { observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) # inform user - new_mediaIDs <- vdigest_algo_crc32(duplicated_mediaID) + new_mediaIDs <- vdigest_crc32(duplicated_mediaID) cli::cli_alert_warning( c( paste( @@ -118,7 +115,7 @@ merge_camtrapdp <- function(x1, x2, name, title) { observationID = dplyr::if_else( .data$observationID %in% duplicated_observationID, - vdigest_algo_crc32(.data$observationID), + vdigest_crc32(.data$observationID), .data$observationID ) ) @@ -127,7 +124,7 @@ merge_camtrapdp <- function(x1, x2, name, title) { observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) # inform user - new_observationIDs <- vdigest_algo_crc32(duplicated_observationID) + new_observationIDs <- vdigest_crc32(duplicated_observationID) cli::cli_alert_warning( c( paste( diff --git a/R/utils.R b/R/utils.R index a83edee1..8129a9e5 100644 --- a/R/utils.R +++ b/R/utils.R @@ -21,10 +21,26 @@ mutate_when_missing <- function(.data, ...) { return(.data) } +#' Create hashes +#' +#' Set a vectorised function for creating hash function digests, using algorithm +#' "crc32". +#' +#' @param object vector or character string +#' @return hash summary as a character vector of the same length as the input +#' @family helper functions +#' @noRd +#' @examples +#' vdigest_crc32(c("00a2c20d", "29b7d356")) +vdigest_crc32 <- function(object) { + vdigest_crc32 <- digest::getVDigest(algo = "crc32") + return(vdigest_crc32(object)) +} + #' Replace duplicated deploymentID #' #' Replaces duplicated deploymentIDs with unique values in all resources, using -#' `vdigest_algo_crc32` to generate hashes. +#' `vdigest_crc32` to generate hashes. #' #' @inheritParams print.camtrapdp #' @param duplicated_deploymentID DeploymentID's to be replaced. @@ -36,7 +52,7 @@ mutate_when_missing <- function(.data, ...) { replace_duplicated_deploymentID <- function(x, duplicated_deploymentID) { # set a vectorised function for creating hash function digests - vdigest_algo_crc32 <- digest::getVDigest(algo = "crc32") + vdigest_crc32 <- digest::getVDigest(algo = "crc32") # unique deploymentIDs to deployments deployments(x) <- @@ -45,7 +61,7 @@ replace_duplicated_deploymentID <- function(x, duplicated_deploymentID) { deploymentID = dplyr::if_else( .data$deploymentID %in% duplicated_deploymentID, - vdigest_algo_crc32(.data$deploymentID), + vdigest_crc32(.data$deploymentID), .data$deploymentID ) ) @@ -57,7 +73,7 @@ replace_duplicated_deploymentID <- function(x, duplicated_deploymentID) { deploymentID = dplyr::if_else( .data$deploymentID %in% duplicated_deploymentID, - vdigest_algo_crc32(.data$deploymentID), + vdigest_crc32(.data$deploymentID), .data$deploymentID ) ) @@ -69,7 +85,7 @@ replace_duplicated_deploymentID <- function(x, duplicated_deploymentID) { deploymentID = dplyr::if_else( .data$deploymentID %in% duplicated_deploymentID, - vdigest_algo_crc32(.data$deploymentID), + vdigest_crc32(.data$deploymentID), .data$deploymentID ) ) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index 096608e4..cd0480ed 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -52,14 +52,11 @@ test_that("merge_camtrapdp() returns unique deplpymentID's, mediaID's and new_mediaIDs <- purrr::pluck(media(x_merged), "mediaID") new_observationIDs <- purrr::pluck(observations(x_merged), "observationID") - # set a vectorised function for creating hash function digests - vdigest_algo_crc32 <- digest::getVDigest(algo = "crc32") - # tests expect_true(any(duplicated(original_deploymentIDs))) expect_false(any(duplicated(new_deploymentIDs))) expect_true( - vdigest_algo_crc32(duplicated_deploymentID) %in% new_deploymentIDs + vdigest_crc32(duplicated_deploymentID) %in% new_deploymentIDs ) expect_identical( c(duplicated_deploymentID, "29b7d356", "77b0e58b", "62c200a9"), @@ -68,12 +65,12 @@ test_that("merge_camtrapdp() returns unique deplpymentID's, mediaID's and expect_true(any(duplicated(original_mediaIDs))) expect_false(any(duplicated(new_mediaIDs))) - expect_true(vdigest_algo_crc32(duplicated_mediaID) %in% new_mediaIDs) + expect_true(vdigest_crc32(duplicated_mediaID) %in% new_mediaIDs) expect_true(any(duplicated(original_observationIDs))) expect_false(any(duplicated(new_observationIDs))) expect_true( - all(vdigest_algo_crc32(duplicated_observationID) %in% new_observationIDs) + all(vdigest_crc32(duplicated_observationID) %in% new_observationIDs) ) }) From 279faaa141b9bdf0b9de988e271ee37a3220de67 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 26 Jul 2024 11:43:00 +0200 Subject: [PATCH 020/142] rename and update function `replace_duplicated_deploymentID()` to `generate_deploymentID()` --- R/merge_camtrapdp.R | 2 +- R/utils.R | 27 ++++++++++++++------------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index a7eac81b..286b20f8 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -33,7 +33,7 @@ merge_camtrapdp <- function(x1, x2, name, title) { # replace duplicated deploymentID's in x2 if (any(duplicated(deploymentIDs))) { duplicated_deploymentID <- deploymentIDs[duplicated(deploymentIDs)] - x2 <- replace_duplicated_deploymentID(x2, duplicated_deploymentID) + x2 <- generate_deploymentID(x2, duplicated_deploymentID) # new merge with unique deploymentID's deployments(x) <- dplyr::bind_rows(deployments(x1), deployments(x2)) diff --git a/R/utils.R b/R/utils.R index 8129a9e5..ea0c4cf4 100644 --- a/R/utils.R +++ b/R/utils.R @@ -37,22 +37,23 @@ vdigest_crc32 <- function(object) { return(vdigest_crc32(object)) } -#' Replace duplicated deploymentID +#' Generate and replace deploymentID's #' -#' Replaces duplicated deploymentIDs with unique values in all resources, using -#' `vdigest_crc32` to generate hashes. +#' Replaces deploymentIDs in deployments, media and observations, with hashes +#' generated by `vdigest_crc32`. #' #' @inheritParams print.camtrapdp -#' @param duplicated_deploymentID DeploymentID's to be replaced. -#' @return `x` with unique deploymenID's +#' @param deploymentID deploymentID's to be replaced. Either a single ID or a +#' vector of ID's. +#' @return `x` with replaced deploymentID's #' @family helper functions #' @noRd #' @examples -#' replace_duplicated_deploymentID(example_dataset()) -replace_duplicated_deploymentID <- function(x, duplicated_deploymentID) { - - # set a vectorised function for creating hash function digests - vdigest_crc32 <- digest::getVDigest(algo = "crc32") +#' x_replaced <- +#' generate_deploymentID(example_dataset(), c("00a2c20d", "29b7d356")) +#' # Inspect results +#' deployments(x_replaced)$deploymentID +generate_deploymentID <- function(x, deploymentID) { # unique deploymentIDs to deployments deployments(x) <- @@ -60,7 +61,7 @@ replace_duplicated_deploymentID <- function(x, duplicated_deploymentID) { dplyr::mutate( deploymentID = dplyr::if_else( - .data$deploymentID %in% duplicated_deploymentID, + .data$deploymentID %in% {{ deploymentID }}, vdigest_crc32(.data$deploymentID), .data$deploymentID ) @@ -72,7 +73,7 @@ replace_duplicated_deploymentID <- function(x, duplicated_deploymentID) { dplyr::mutate( deploymentID = dplyr::if_else( - .data$deploymentID %in% duplicated_deploymentID, + .data$deploymentID %in% {{ deploymentID }}, vdigest_crc32(.data$deploymentID), .data$deploymentID ) @@ -84,7 +85,7 @@ replace_duplicated_deploymentID <- function(x, duplicated_deploymentID) { dplyr::mutate( deploymentID = dplyr::if_else( - .data$deploymentID %in% duplicated_deploymentID, + .data$deploymentID %in% {{ deploymentID }}, vdigest_crc32(.data$deploymentID), .data$deploymentID ) From 2c6c71fe3b6929121a7067aa4684c420cd4d1199 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 26 Jul 2024 11:45:43 +0200 Subject: [PATCH 021/142] update comments --- R/utils.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/utils.R b/R/utils.R index ea0c4cf4..3b3d9b70 100644 --- a/R/utils.R +++ b/R/utils.R @@ -55,7 +55,7 @@ vdigest_crc32 <- function(object) { #' deployments(x_replaced)$deploymentID generate_deploymentID <- function(x, deploymentID) { - # unique deploymentIDs to deployments + # replace deploymentIDs in deployments deployments(x) <- deployments(x) %>% dplyr::mutate( @@ -67,7 +67,7 @@ generate_deploymentID <- function(x, deploymentID) { ) ) - # unique deploymentIDs in observations + # replace deploymentIDs in observations observations(x) <- observations(x) %>% dplyr::mutate( @@ -79,7 +79,7 @@ generate_deploymentID <- function(x, deploymentID) { ) ) - # unique deploymentIDs in media + # replace deploymentIDs in media media(x) <- media(x) %>% dplyr::mutate( From 585d14eb16b954808d88fdd2b06eda56f10a04c2 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 26 Jul 2024 11:50:18 +0200 Subject: [PATCH 022/142] new helper function `generate_mediaID()` --- R/merge_camtrapdp.R | 29 +++-------------------------- R/utils.R | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 26 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 286b20f8..1a8ba832 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -30,7 +30,7 @@ merge_camtrapdp <- function(x1, x2, name, title) { mediaIDs <- purrr::pluck(media(x), "mediaID") observationIDs <- purrr::pluck(observations(x), "observationID") - # replace duplicated deploymentID's in x2 + # replace duplicated deploymentID's in `x2` if (any(duplicated(deploymentIDs))) { duplicated_deploymentID <- deploymentIDs[duplicated(deploymentIDs)] x2 <- generate_deploymentID(x2, duplicated_deploymentID) @@ -56,33 +56,10 @@ merge_camtrapdp <- function(x1, x2, name, title) { ) } - # set unique mediaID's + # replace duplicated mediaID's in `x2` if (any(duplicated(mediaIDs))) { duplicated_mediaID <- mediaIDs[duplicated(mediaIDs)] - - # give unique mediaDs to media - media(x2) <- - media(x2) %>% - dplyr::mutate( - mediaID = - dplyr::if_else( - .data$mediaID %in% duplicated_mediaID, - vdigest_crc32(.data$mediaID), - .data$mediaID - ) - ) - - # give unique mediaDs to observations - observations(x2) <- - observations(x2) %>% - dplyr::mutate( - mediaID = - dplyr::if_else( - .data$mediaID %in% duplicated_mediaID, - vdigest_crc32(.data$mediaID), - .data$mediaID - ) - ) + x2 <- generate_mediaID(x2, duplicated_mediaID) # new merge with unique mediaID's media(x) <- dplyr::bind_rows(media(x1), media(x2)) diff --git a/R/utils.R b/R/utils.R index 3b3d9b70..76bb1dd2 100644 --- a/R/utils.R +++ b/R/utils.R @@ -93,3 +93,45 @@ generate_deploymentID <- function(x, deploymentID) { return(x) } + +#' Generate and replace mediaID's +#' +#' Replaces mediaIDs in media and observations, with hashes generated by +#' `vdigest_crc32`. +#' +#' @inheritParams print.camtrapdp +#' @param mediaID mediaID's to be replaced. Either a single ID or a vector of +#' ID's. +#' @return `x` with replaced mediaID's +#' @family helper functions +#' @noRd +#' @examples +#' generate_mediaID(example_dataset(), c("07840dcc", "401386c7")) +generate_mediaID <- function(x, mediaID) { + + # replace mediaIDs in media + media(x) <- + media(x) %>% + dplyr::mutate( + mediaID = + dplyr::if_else( + .data$mediaID %in% {{ mediaID }}, + vdigest_crc32(.data$mediaID), + .data$mediaID + ) + ) + + # replaced mediaIDs in observations + observations(x) <- + observations(x) %>% + dplyr::mutate( + mediaID = + dplyr::if_else( + .data$mediaID %in% {{ mediaID }}, + vdigest_crc32(.data$mediaID), + .data$mediaID + ) + ) + + return(x) +} From 5f007fb70691674bda51c580d2da84c6ebaaa040 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 26 Jul 2024 11:54:34 +0200 Subject: [PATCH 023/142] add helper function `generate_observationID()` --- R/merge_camtrapdp.R | 19 ++++--------------- R/utils.R | 30 ++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 15 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 1a8ba832..23b5b29a 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -30,8 +30,8 @@ merge_camtrapdp <- function(x1, x2, name, title) { mediaIDs <- purrr::pluck(media(x), "mediaID") observationIDs <- purrr::pluck(observations(x), "observationID") - # replace duplicated deploymentID's in `x2` if (any(duplicated(deploymentIDs))) { + # replace duplicated deploymentID's in `x2` duplicated_deploymentID <- deploymentIDs[duplicated(deploymentIDs)] x2 <- generate_deploymentID(x2, duplicated_deploymentID) @@ -56,8 +56,8 @@ merge_camtrapdp <- function(x1, x2, name, title) { ) } - # replace duplicated mediaID's in `x2` if (any(duplicated(mediaIDs))) { + # replace duplicated mediaID's in `x2` duplicated_mediaID <- mediaIDs[duplicated(mediaIDs)] x2 <- generate_mediaID(x2, duplicated_mediaID) @@ -81,21 +81,10 @@ merge_camtrapdp <- function(x1, x2, name, title) { ) } - # set unique observationID's if (any(duplicated(observationIDs))) { + # replace duplicated deploymentID's in `x2` duplicated_observationID <- observationIDs[duplicated(observationIDs)] - - # give unique observationIDs to observations - observations(x2) <- - observations(x2) %>% - dplyr::mutate( - observationID = - dplyr::if_else( - .data$observationID %in% duplicated_observationID, - vdigest_crc32(.data$observationID), - .data$observationID - ) - ) + x2 <- generate_observationID(x2, duplicated_observationID) # new merge with unique observationID's observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) diff --git a/R/utils.R b/R/utils.R index 76bb1dd2..c8578f3e 100644 --- a/R/utils.R +++ b/R/utils.R @@ -135,3 +135,33 @@ generate_mediaID <- function(x, mediaID) { return(x) } + +#' Generate and replace observationID's +#' +#' Replaces observationIDs in observations, with hashes generated by +#' `vdigest_crc32`. +#' +#' @inheritParams print.camtrapdp +#' @param observationID observationID's to be replaced. Either a single ID or a +#' vector of ID's. +#' @return `x` with replaced observationID's +#' @family helper functions +#' @noRd +#' @examples +#' generate_observationID(example_dataset(), c("705e6036", "07840dcc_1")) +generate_observationID <- function(x, observationID) { + + # replaced observationIDs in observations + observations(x) <- + observations(x) %>% + dplyr::mutate( + observationID = + dplyr::if_else( + .data$observationID %in% {{ observationID }}, + vdigest_crc32(.data$observationID), + .data$observationID + ) + ) + + return(x) +} From 94f3c36e4245b23b9849013e4efc5146af3091f0 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 26 Jul 2024 13:06:18 +0200 Subject: [PATCH 024/142] update `generate_observationID()` to `replace_observationID()` --- R/merge_camtrapdp.R | 12 ++++++++---- R/utils.R | 31 +++++++++++++++++++++---------- 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 23b5b29a..70ffc615 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -84,13 +84,17 @@ merge_camtrapdp <- function(x1, x2, name, title) { if (any(duplicated(observationIDs))) { # replace duplicated deploymentID's in `x2` duplicated_observationID <- observationIDs[duplicated(observationIDs)] - x2 <- generate_observationID(x2, duplicated_observationID) + replacement_observationID <- vdigest_crc32(duplicated_observationID) + x2 <- replace_observationID( + x2, + old_observationID = duplicated_observationID, + new_observationID = replacement_observationID + ) # new merge with unique observationID's observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) - # inform user - new_observationIDs <- vdigest_crc32(duplicated_observationID) + cli::cli_alert_warning( c( paste( @@ -98,7 +102,7 @@ merge_camtrapdp <- function(x1, x2, name, title) { "{.arg x1} and {.arg x2} have duplicated observationID's:", "{.val {duplicated_observationID}}.\n", "Duplicated observationID's of {.arg x2} are now replaced by", - "{.val {new_observationIDs}} respectively." + "{.val {replacement_observationID}} respectively." ) ), class = "camtrapdp_warning_unique_observationID" diff --git a/R/utils.R b/R/utils.R index c8578f3e..c5fd2738 100644 --- a/R/utils.R +++ b/R/utils.R @@ -136,29 +136,40 @@ generate_mediaID <- function(x, mediaID) { return(x) } -#' Generate and replace observationID's +#' Replace observationID's #' -#' Replaces observationIDs in observations, with hashes generated by -#' `vdigest_crc32`. +#' Replaces observationIDs in observations #' #' @inheritParams print.camtrapdp -#' @param observationID observationID's to be replaced. Either a single ID or a -#' vector of ID's. +#' @param old_observationID observationID's to be replaced. Either a single ID +#' or a vector of ID's. +#' @param new_observationID replacement observationID's. Must be of the same +#' length as `old_observationID` #' @return `x` with replaced observationID's #' @family helper functions #' @noRd #' @examples -#' generate_observationID(example_dataset(), c("705e6036", "07840dcc_1")) -generate_observationID <- function(x, observationID) { +#' x <- example_dataset() %>% filter_observations( +#' observationID %in% c("705e6036", "07840dcc_1", "401386c7_1") +#' ) +#' x_replaced <- replace_observationID( +#' x, c("705e6036", "07840dcc_1"), c("newID1", "newID2") +#' ) +#' # Inspect values +#' observations(x_replaced)$observationID +replace_observationID <- function(x, old_observationID, new_observationID) { + + # Create a named vector for replacements + replacement_map <- setNames(new_observationID, old_observationID) - # replaced observationIDs in observations + # replace observationIDs in observations observations(x) <- observations(x) %>% dplyr::mutate( observationID = dplyr::if_else( - .data$observationID %in% {{ observationID }}, - vdigest_crc32(.data$observationID), + .data$observationID %in% old_observationID, + unname(replacement_map[.data$observationID]), .data$observationID ) ) From 0924691425bfc408c56cefe517784abfae2fcee0 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Mon, 29 Jul 2024 10:37:12 +0200 Subject: [PATCH 025/142] update `generate_mediaID()` to `replace_mediaID()` --- R/merge_camtrapdp.R | 9 ++++----- R/utils.R | 31 +++++++++++++++++++------------ 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 70ffc615..4d4c4a50 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -59,7 +59,8 @@ merge_camtrapdp <- function(x1, x2, name, title) { if (any(duplicated(mediaIDs))) { # replace duplicated mediaID's in `x2` duplicated_mediaID <- mediaIDs[duplicated(mediaIDs)] - x2 <- generate_mediaID(x2, duplicated_mediaID) + replacement_mediaID <- vdigest_crc32(duplicated_mediaID) + x2 <- replace_mediaID(x2, duplicated_mediaID, replacement_mediaID) # new merge with unique mediaID's media(x) <- dplyr::bind_rows(media(x1), media(x2)) @@ -74,7 +75,7 @@ merge_camtrapdp <- function(x1, x2, name, title) { "{.arg x1} and {.arg x2} have duplicated mediaID's:", "{.val {duplicated_mediaID}}.\n", "Duplicated mediaID's of {.arg x2} are now replaced by", - "{.val {new_mediaIDs}} respectively." + "{.val {replacement_mediaID}} respectively." ) ), class = "camtrapdp_warning_unique_mediaID" @@ -86,9 +87,7 @@ merge_camtrapdp <- function(x1, x2, name, title) { duplicated_observationID <- observationIDs[duplicated(observationIDs)] replacement_observationID <- vdigest_crc32(duplicated_observationID) x2 <- replace_observationID( - x2, - old_observationID = duplicated_observationID, - new_observationID = replacement_observationID + x2, duplicated_observationID, replacement_observationID ) # new merge with unique observationID's diff --git a/R/utils.R b/R/utils.R index c5fd2738..1b04fbbd 100644 --- a/R/utils.R +++ b/R/utils.R @@ -94,20 +94,27 @@ generate_deploymentID <- function(x, deploymentID) { return(x) } -#' Generate and replace mediaID's +#' Replace mediaID's #' -#' Replaces mediaIDs in media and observations, with hashes generated by -#' `vdigest_crc32`. +#' Replaces mediaIDs in media and observations #' #' @inheritParams print.camtrapdp -#' @param mediaID mediaID's to be replaced. Either a single ID or a vector of -#' ID's. +#' @param old_mediaID mediaID's to be replaced. Either a single ID or a vector +#' of ID's. +#' @param new_mediaID replacement mediaID's. Must be of the same +#' length as `old_observationID` #' @return `x` with replaced mediaID's #' @family helper functions #' @noRd #' @examples -#' generate_mediaID(example_dataset(), c("07840dcc", "401386c7")) -generate_mediaID <- function(x, mediaID) { +#' replace_mediaID( +#' example_dataset(), +#' c("07840dcc", "401386c7"), +#' c("new_mediaID1", "new_mediaID2")) +replace_mediaID <- function(x, old_mediaID, new_mediaID) { + + # Create a named vector for replacements + replacement_map <- setNames(new_mediaID, old_mediaID) # replace mediaIDs in media media(x) <- @@ -115,20 +122,20 @@ generate_mediaID <- function(x, mediaID) { dplyr::mutate( mediaID = dplyr::if_else( - .data$mediaID %in% {{ mediaID }}, - vdigest_crc32(.data$mediaID), + .data$mediaID %in% old_mediaID, + unname(replacement_map[.data$mediaID]), .data$mediaID ) ) - # replaced mediaIDs in observations + # replace mediaIDs in observations observations(x) <- observations(x) %>% dplyr::mutate( mediaID = dplyr::if_else( - .data$mediaID %in% {{ mediaID }}, - vdigest_crc32(.data$mediaID), + .data$mediaID %in% old_mediaID, + unname(replacement_map[.data$mediaID]), .data$mediaID ) ) From 40e0a14e8e5121f4b66966a584dffeb433f4012a Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Mon, 29 Jul 2024 11:47:06 +0200 Subject: [PATCH 026/142] update `generate_deploymentID()` to `replace_deploymentID()` --- R/merge_camtrapdp.R | 7 +++++-- R/utils.R | 35 +++++++++++++++++++++-------------- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 4d4c4a50..4bf7e132 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -33,7 +33,10 @@ merge_camtrapdp <- function(x1, x2, name, title) { if (any(duplicated(deploymentIDs))) { # replace duplicated deploymentID's in `x2` duplicated_deploymentID <- deploymentIDs[duplicated(deploymentIDs)] - x2 <- generate_deploymentID(x2, duplicated_deploymentID) + replacement_deploymentID <- vdigest_crc32(duplicated_deploymentID) + x2 <- replace_deploymentID( + x2, duplicated_deploymentID, replacement_deploymentID + ) # new merge with unique deploymentID's deployments(x) <- dplyr::bind_rows(deployments(x1), deployments(x2)) @@ -49,7 +52,7 @@ merge_camtrapdp <- function(x1, x2, name, title) { "{.arg x1} and {.arg x2} have duplicated deploymentID's:", "{.val {duplicated_deploymentID}}.\n", "Duplicated deploymentID's of {.arg x2} are now replaced by", - "{.val {new_deploymentIDs}} respectively." + "{.val {replacement_deploymentID}} respectively." ) ), class = "camtrapdp_warning_unique_deploymentID" diff --git a/R/utils.R b/R/utils.R index 1b04fbbd..6127394a 100644 --- a/R/utils.R +++ b/R/utils.R @@ -37,23 +37,30 @@ vdigest_crc32 <- function(object) { return(vdigest_crc32(object)) } -#' Generate and replace deploymentID's +#' Replace deploymentID's #' -#' Replaces deploymentIDs in deployments, media and observations, with hashes -#' generated by `vdigest_crc32`. +#' Replaces deploymentIDs in deployments, media and observations.. #' #' @inheritParams print.camtrapdp -#' @param deploymentID deploymentID's to be replaced. Either a single ID or a -#' vector of ID's. +#' @param old_deploymentID deploymentID's to be replaced. Either a single ID or +#' a vector of ID's. +#' @param new_deploymentID replacement deploymentID's. Must be of the same +#' length as `old_deploymentID` #' @return `x` with replaced deploymentID's #' @family helper functions #' @noRd #' @examples +#' x <- example_dataset() #' x_replaced <- -#' generate_deploymentID(example_dataset(), c("00a2c20d", "29b7d356")) +#' replace_deploymentID( +#' x, c("00a2c20d", "29b7d356"), c("new_deploymentID1", "new_deploymentID2") #' # Inspect results #' deployments(x_replaced)$deploymentID -generate_deploymentID <- function(x, deploymentID) { +replace_deploymentID <- function(x, old_deploymentID, new_deploymentID) { + + + # Create a named vector for replacements + replacement_map <- setNames(new_deploymentID, old_deploymentID) # replace deploymentIDs in deployments deployments(x) <- @@ -61,8 +68,8 @@ generate_deploymentID <- function(x, deploymentID) { dplyr::mutate( deploymentID = dplyr::if_else( - .data$deploymentID %in% {{ deploymentID }}, - vdigest_crc32(.data$deploymentID), + .data$deploymentID %in% old_deploymentID, + unname(replacement_map[.data$deploymentID]), .data$deploymentID ) ) @@ -73,8 +80,8 @@ generate_deploymentID <- function(x, deploymentID) { dplyr::mutate( deploymentID = dplyr::if_else( - .data$deploymentID %in% {{ deploymentID }}, - vdigest_crc32(.data$deploymentID), + .data$deploymentID %in% old_deploymentID, + unname(replacement_map[.data$deploymentID]), .data$deploymentID ) ) @@ -85,8 +92,8 @@ generate_deploymentID <- function(x, deploymentID) { dplyr::mutate( deploymentID = dplyr::if_else( - .data$deploymentID %in% {{ deploymentID }}, - vdigest_crc32(.data$deploymentID), + .data$deploymentID %in% old_deploymentID, + unname(replacement_map[.data$deploymentID]), .data$deploymentID ) ) @@ -102,7 +109,7 @@ generate_deploymentID <- function(x, deploymentID) { #' @param old_mediaID mediaID's to be replaced. Either a single ID or a vector #' of ID's. #' @param new_mediaID replacement mediaID's. Must be of the same -#' length as `old_observationID` +#' length as `old_mediaID` #' @return `x` with replaced mediaID's #' @family helper functions #' @noRd From f6caca2d6546c7ba06b35def9ab3959d449d5bc9 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Mon, 29 Jul 2024 12:25:24 +0200 Subject: [PATCH 027/142] new helper function `replace_duplicatedIDs()` --- R/merge_camtrapdp.R | 91 ++-------------------------- R/utils.R | 141 +++++++++++++++++++++++++++++++++++++++----- 2 files changed, 129 insertions(+), 103 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 4bf7e132..8f1cf046 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -18,99 +18,16 @@ merge_camtrapdp <- function(x1, x2, name, title) { check_camtrapdp(x1) check_camtrapdp(x2) - x <- x1 + + # replace duplicated ID's between `x1` and `x2` in `x2` + x2 <- replace_duplicatedIDs(x1, x2) # merge resources + x <- x1 deployments(x) <- dplyr::bind_rows(deployments(x1), deployments(x2)) media(x) <- dplyr::bind_rows(media(x1), media(x2)) observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) - # get ID's - deploymentIDs <- purrr::pluck(deployments(x), "deploymentID") - mediaIDs <- purrr::pluck(media(x), "mediaID") - observationIDs <- purrr::pluck(observations(x), "observationID") - - if (any(duplicated(deploymentIDs))) { - # replace duplicated deploymentID's in `x2` - duplicated_deploymentID <- deploymentIDs[duplicated(deploymentIDs)] - replacement_deploymentID <- vdigest_crc32(duplicated_deploymentID) - x2 <- replace_deploymentID( - x2, duplicated_deploymentID, replacement_deploymentID - ) - - # new merge with unique deploymentID's - deployments(x) <- dplyr::bind_rows(deployments(x1), deployments(x2)) - media(x) <- dplyr::bind_rows(media(x1), media(x2)) - observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) - - # inform user - new_deploymentIDs <- vdigest_crc32(duplicated_deploymentID) - cli::cli_alert_warning( - c( - paste( - "{.arg x1} and {.arg x2} must have unique deploymentID's.\n", - "{.arg x1} and {.arg x2} have duplicated deploymentID's:", - "{.val {duplicated_deploymentID}}.\n", - "Duplicated deploymentID's of {.arg x2} are now replaced by", - "{.val {replacement_deploymentID}} respectively." - ) - ), - class = "camtrapdp_warning_unique_deploymentID" - ) - } - - if (any(duplicated(mediaIDs))) { - # replace duplicated mediaID's in `x2` - duplicated_mediaID <- mediaIDs[duplicated(mediaIDs)] - replacement_mediaID <- vdigest_crc32(duplicated_mediaID) - x2 <- replace_mediaID(x2, duplicated_mediaID, replacement_mediaID) - - # new merge with unique mediaID's - media(x) <- dplyr::bind_rows(media(x1), media(x2)) - observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) - - # inform user - new_mediaIDs <- vdigest_crc32(duplicated_mediaID) - cli::cli_alert_warning( - c( - paste( - "{.arg x1} and {.arg x2} must have unique mediaID's.\n", - "{.arg x1} and {.arg x2} have duplicated mediaID's:", - "{.val {duplicated_mediaID}}.\n", - "Duplicated mediaID's of {.arg x2} are now replaced by", - "{.val {replacement_mediaID}} respectively." - ) - ), - class = "camtrapdp_warning_unique_mediaID" - ) - } - - if (any(duplicated(observationIDs))) { - # replace duplicated deploymentID's in `x2` - duplicated_observationID <- observationIDs[duplicated(observationIDs)] - replacement_observationID <- vdigest_crc32(duplicated_observationID) - x2 <- replace_observationID( - x2, duplicated_observationID, replacement_observationID - ) - - # new merge with unique observationID's - observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) - # inform user - - cli::cli_alert_warning( - c( - paste( - "{.arg x1} and {.arg x2} must have unique observationID's.\n", - "{.arg x1} and {.arg x2} have duplicated observationID's:", - "{.val {duplicated_observationID}}.\n", - "Duplicated observationID's of {.arg x2} are now replaced by", - "{.val {replacement_observationID}} respectively." - ) - ), - class = "camtrapdp_warning_unique_observationID" - ) - } - # merge/update metadata x$name <- name x$id <- digest::digest(paste(x$title, x2$title), algo = "md5") diff --git a/R/utils.R b/R/utils.R index 6127394a..ee073d42 100644 --- a/R/utils.R +++ b/R/utils.R @@ -21,22 +21,6 @@ mutate_when_missing <- function(.data, ...) { return(.data) } -#' Create hashes -#' -#' Set a vectorised function for creating hash function digests, using algorithm -#' "crc32". -#' -#' @param object vector or character string -#' @return hash summary as a character vector of the same length as the input -#' @family helper functions -#' @noRd -#' @examples -#' vdigest_crc32(c("00a2c20d", "29b7d356")) -vdigest_crc32 <- function(object) { - vdigest_crc32 <- digest::getVDigest(algo = "crc32") - return(vdigest_crc32(object)) -} - #' Replace deploymentID's #' #' Replaces deploymentIDs in deployments, media and observations.. @@ -190,3 +174,128 @@ replace_observationID <- function(x, old_observationID, new_observationID) { return(x) } + +#' Create hashes +#' +#' Set a vectorised function for creating hash function digests, using algorithm +#' "crc32". +#' +#' @param object vector or character string +#' @return hash summary as a character vector of the same length as the input +#' @family helper functions +#' @noRd +#' @examples +#' vdigest_crc32(c("00a2c20d", "29b7d356")) +vdigest_crc32 <- function(object) { + vdigest_crc32 <- digest::getVDigest(algo = "crc32") + return(vdigest_crc32(object)) +} + +#' Replace duplicated ID's when merging Camera Trap Data packages +#' +#' Replaces duplicated deploymentID's, mediaID's and observationID's between +#' two Camera Trap Data Packages with hashes generated by `vdigest_crc32`. +#' Used in `merge_camtrapdp()`. +#' +#' +#' @param x1,x2 Camera Trap Data Package objects, as returned by +#' `read_camtrapdp()`). +#' @return `x2` with duplicated ID's (compared to `x1`) replaced with hashes. +#' @family helper functions +#' @noRd +replace_duplicatedIDs <- function(x1, x2) { + x <- x1 + + # merge resources + deployments(x) <- dplyr::bind_rows(deployments(x1), deployments(x2)) + media(x) <- dplyr::bind_rows(media(x1), media(x2)) + observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) + + # get ID's + deploymentIDs <- purrr::pluck(deployments(x), "deploymentID") + mediaIDs <- purrr::pluck(media(x), "mediaID") + observationIDs <- purrr::pluck(observations(x), "observationID") + + if (any(duplicated(deploymentIDs))) { + # replace duplicated deploymentID's in `x2` + duplicated_deploymentID <- deploymentIDs[duplicated(deploymentIDs)] + replacement_deploymentID <- vdigest_crc32(duplicated_deploymentID) + x2 <- replace_deploymentID( + x2, duplicated_deploymentID, replacement_deploymentID + ) + + # new merge with unique deploymentID's + deployments(x) <- dplyr::bind_rows(deployments(x1), deployments(x2)) + media(x) <- dplyr::bind_rows(media(x1), media(x2)) + observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) + + # inform user + new_deploymentIDs <- vdigest_crc32(duplicated_deploymentID) + cli::cli_alert_warning( + c( + paste( + "{.arg x1} and {.arg x2} must have unique deploymentID's.\n", + "{.arg x1} and {.arg x2} have duplicated deploymentID's:", + "{.val {duplicated_deploymentID}}.\n", + "Duplicated deploymentID's of {.arg x2} are now replaced by", + "{.val {replacement_deploymentID}} respectively." + ) + ), + class = "camtrapdp_warning_unique_deploymentID" + ) + } + + if (any(duplicated(mediaIDs))) { + # replace duplicated mediaID's in `x2` + duplicated_mediaID <- mediaIDs[duplicated(mediaIDs)] + replacement_mediaID <- vdigest_crc32(duplicated_mediaID) + x2 <- replace_mediaID(x2, duplicated_mediaID, replacement_mediaID) + + # new merge with unique mediaID's + media(x) <- dplyr::bind_rows(media(x1), media(x2)) + observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) + + # inform user + new_mediaIDs <- vdigest_crc32(duplicated_mediaID) + cli::cli_alert_warning( + c( + paste( + "{.arg x1} and {.arg x2} must have unique mediaID's.\n", + "{.arg x1} and {.arg x2} have duplicated mediaID's:", + "{.val {duplicated_mediaID}}.\n", + "Duplicated mediaID's of {.arg x2} are now replaced by", + "{.val {replacement_mediaID}} respectively." + ) + ), + class = "camtrapdp_warning_unique_mediaID" + ) + } + + if (any(duplicated(observationIDs))) { + # replace duplicated deploymentID's in `x2` + duplicated_observationID <- observationIDs[duplicated(observationIDs)] + replacement_observationID <- vdigest_crc32(duplicated_observationID) + x2 <- replace_observationID( + x2, duplicated_observationID, replacement_observationID + ) + + # new merge with unique observationID's + observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) + # inform user + + cli::cli_alert_warning( + c( + paste( + "{.arg x1} and {.arg x2} must have unique observationID's.\n", + "{.arg x1} and {.arg x2} have duplicated observationID's:", + "{.val {duplicated_observationID}}.\n", + "Duplicated observationID's of {.arg x2} are now replaced by", + "{.val {replacement_observationID}} respectively." + ) + ), + class = "camtrapdp_warning_unique_observationID" + ) + } + + return(x2) +} From b533e4e4d9fd71c8d80b994eee184e4609256e9a Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Mon, 29 Jul 2024 12:27:22 +0200 Subject: [PATCH 028/142] update comment --- R/merge_camtrapdp.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 8f1cf046..9c0ba3b4 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -19,7 +19,7 @@ merge_camtrapdp <- function(x1, x2, name, title) { check_camtrapdp(x1) check_camtrapdp(x2) - # replace duplicated ID's between `x1` and `x2` in `x2` + # replace duplicated ID's between `x1` and `x2` in `x2` with hashes x2 <- replace_duplicatedIDs(x1, x2) # merge resources From 63bb0914d089f55aafd14379a7c3b5ea615edd7c Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Mon, 29 Jul 2024 12:48:34 +0200 Subject: [PATCH 029/142] check for length ID --- R/utils.R | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/R/utils.R b/R/utils.R index ee073d42..5a59ba9e 100644 --- a/R/utils.R +++ b/R/utils.R @@ -42,6 +42,20 @@ mutate_when_missing <- function(.data, ...) { #' deployments(x_replaced)$deploymentID replace_deploymentID <- function(x, old_deploymentID, new_deploymentID) { + # Check length + length_old <- length(old_deploymentID) + length_new <- length(new_deploymentID) + if (length_old != length_new) { + cli::cli_abort( + c( + "{.arg old_deploymentID} and {.arg new_deploymentID} must have the same + length.", + "x" = "Length of {.arg old_deploymentID}({.val {length_old}}) is not + equal to length of {.arg new_deploymentID}({.val {length_new}})." + ), + class = "camtrapdp_error_length_deploymentID" + ) + } # Create a named vector for replacements replacement_map <- setNames(new_deploymentID, old_deploymentID) @@ -104,6 +118,20 @@ replace_deploymentID <- function(x, old_deploymentID, new_deploymentID) { #' c("new_mediaID1", "new_mediaID2")) replace_mediaID <- function(x, old_mediaID, new_mediaID) { + # Check length + length_old <- length(old_mediaID) + length_new <- length(new_mediaID) + if (length_old != length_new) { + cli::cli_abort( + c( + "{.arg old_mediaID} and {.arg new_mediaID} must have the same length.", + "x" = "Length of {.arg old_mediaID}({.val {length_old}}) is not equal to + length of {.arg new_mediaID}({.val {length_new}})." + ), + class = "camtrapdp_error_length_mediaID" + ) + } + # Create a named vector for replacements replacement_map <- setNames(new_mediaID, old_mediaID) @@ -157,6 +185,21 @@ replace_mediaID <- function(x, old_mediaID, new_mediaID) { #' observations(x_replaced)$observationID replace_observationID <- function(x, old_observationID, new_observationID) { + # Check length + length_old <- length(old_observationID) + length_new <- length(new_observationID) + if (length_old != length_new) { + cli::cli_abort( + c( + "{.arg old_observationID} and {.arg new_observationID} must have the + same length.", + "x" = "Length of {.arg old_observationID}({.val {length_old}}) is not + equal to length of {.arg new_observationID}({.val {length_new}})." + ), + class = "camtrapdp_error_length_observationID" + ) + } + # Create a named vector for replacements replacement_map <- setNames(new_observationID, old_observationID) From a740d25099582626c2fc3490735dfdafe80b1d28 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Mon, 29 Jul 2024 15:35:48 +0200 Subject: [PATCH 030/142] test for valid name and title --- R/merge_camtrapdp.R | 33 ++++++++++++++++++-- tests/testthat/test-merge_camtrapdp.R | 44 +++++++++++++++++++++++++-- 2 files changed, 71 insertions(+), 6 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 9c0ba3b4..116a37f2 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -2,10 +2,12 @@ #' #' @param x1,x2 Camera Trap Data Package objects (as returned by #' `read_camtrapdp()`), to be coerced to one. -#' @param name A short url-usable (and preferably human-readable) name for this -#' merged package. -#' @param title A string providing a title or one sentence description for this +#' @param name A short url-usable (and preferably human-readable) +#' [name](https://specs.frictionlessdata.io/data-package/#name) for the #' merged package. +#' @param title A string providing a +#' [title](https://specs.frictionlessdata.io/data-package/#title) or one +#' sentence description for the merged package. #' @return `x` #' @family transformation functions #' @export @@ -19,6 +21,31 @@ merge_camtrapdp <- function(x1, x2, name, title) { check_camtrapdp(x1) check_camtrapdp(x2) + # valid name + regex_name <- "^[a-z0-9._-]+$" + if (!grepl(regex_name, name)) { + cli::cli_abort( + c( + "{.arg name} must be lower-case and contain only alphanumeric characters + along with “.”, “_” or “-” characters." + ), + class = "camtrapdp_error_invalid_name" + ) + } + + # valid title + regex_title <- "^[A-Z][a-zA-Z0-9 :\\-]*[.!?]?$" + + if (!(grepl(regex_title, title))) { + cli::cli_abort( + c( + "{.arg title} must be a string providing a title or one sentence + description for this package." + ), + class = "camtrapdp_error_invalid_title" + ) + } + # replace duplicated ID's between `x1` and `x2` in `x2` with hashes x2 <- replace_duplicatedIDs(x1, x2) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index cd0480ed..b41933ce 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -1,3 +1,41 @@ +test_that("merge_camtrapdp() returns error on invalid name", { + skip_if_offline() + x1 <- example_dataset() %>% + filter_deployments(deploymentID %in% c("00a2c20d", "29b7d356")) + x2 <- example_dataset() %>% + filter_deployments(deploymentID %in% c("577b543a", "62c200a9")) + + expect_error(merge_camtrapdp(x1, x2, "Name", "new title")) + expect_error(merge_camtrapdp(x1, x2, "package name", "new title")) + expect_error(merge_camtrapdp(x1, x2, "name?", "new title")) + expect_error(merge_camtrapdp(x1, x2, "new/name", "new title")) +}) + +test_that("merge_camtrapdp() returns error on invalid title", { + skip_if_offline() + x1 <- example_dataset() %>% + filter_deployments(deploymentID %in% c("00a2c20d", "29b7d356")) + x2 <- example_dataset() %>% + filter_deployments(deploymentID %in% c("577b543a", "62c200a9")) + + #invalid + expect_error(merge_camtrapdp(x1, x2, "new_name", "Period.in the middle")) + expect_error(merge_camtrapdp(x1, x2, "new_name", "start with lowercase")) + expect_error(merge_camtrapdp(x1, x2, "new_name", "Invalid_character")) + expect_error( + merge_camtrapdp(x1, x2, "new_name", "Hello! Second sentence.") + ) + + # valid + expect_no_error(merge_camtrapdp(x1, x2, "new_name", "This is a title")) + expect_no_error(merge_camtrapdp(x1, x2, "new_name", "Title with punctuation.")) + expect_no_error(merge_camtrapdp(x1, x2, "new_name", "Title: with a colon")) + expect_no_error(merge_camtrapdp(x1, x2, "new_name", "Title - with a hyphen.")) + expect_no_error( + merge_camtrapdp(x1, x2, "new_name", "A bit of a longer sentence is ok!") + ) +}) + test_that("merge_camtrapdp() returns a valid camtrapdp object", { skip_if_offline() x1 <- example_dataset() %>% @@ -7,7 +45,7 @@ test_that("merge_camtrapdp() returns a valid camtrapdp object", { expect_no_error( suppressMessages( check_camtrapdp( - merge_camtrapdp(x1, x2, "new_package_name", "new title") + merge_camtrapdp(x1, x2, "new_package_name", "New title") ) ) ) @@ -44,7 +82,7 @@ test_that("merge_camtrapdp() returns unique deplpymentID's, mediaID's and # merge x_merged <- suppressMessages( - merge_camtrapdp(x1, x2, "new_package_name", "new title") + merge_camtrapdp(x1, x2, "new_package_name", "New title") ) # get new IDs @@ -82,7 +120,7 @@ test_that("merge_camtrapdp() returns message when ID's are replaced", { filter_media(mediaID %in% c("fb58a2b9", "0bb2566e", "a6a7a04c")) expect_message( - merge_camtrapdp(x1, x2, "new_package_name", "new title") #, + merge_camtrapdp(x1, x2, "new_package_name", "New title") #, # regexp = paste( # "! `x1` and `x2` must have unique deploymentID's.", # "`x1` and `x2` have duplicated deploymentID's: \"62c200a9\".", From 13442c196937f959d2bd5f135fc719c3f7fb57ec Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Tue, 30 Jul 2024 10:23:41 +0200 Subject: [PATCH 031/142] capitilize comments --- R/merge_camtrapdp.R | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 116a37f2..2a9dd26e 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -21,7 +21,7 @@ merge_camtrapdp <- function(x1, x2, name, title) { check_camtrapdp(x1) check_camtrapdp(x2) - # valid name + # Valid name regex_name <- "^[a-z0-9._-]+$" if (!grepl(regex_name, name)) { cli::cli_abort( @@ -33,7 +33,7 @@ merge_camtrapdp <- function(x1, x2, name, title) { ) } - # valid title + # Valid title regex_title <- "^[A-Z][a-zA-Z0-9 :\\-]*[.!?]?$" if (!(grepl(regex_title, title))) { @@ -46,17 +46,18 @@ merge_camtrapdp <- function(x1, x2, name, title) { ) } - # replace duplicated ID's between `x1` and `x2` in `x2` with hashes + # Replace duplicated ID's between `x1` and `x2` in `x2` with hashes x2 <- replace_duplicatedIDs(x1, x2) - # merge resources + # Merge resources x <- x1 deployments(x) <- dplyr::bind_rows(deployments(x1), deployments(x2)) media(x) <- dplyr::bind_rows(media(x1), media(x2)) observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) - # merge/update metadata + # Merge/update metadata x$name <- name + # Create new ID x$id <- digest::digest(paste(x$title, x2$title), algo = "md5") x$created <- format(Sys.time(), "%Y-%m-%dT%H:%M:%SZ") x$title <- title From adf5fa134d35f5ecb138a4b75404f64be041c1de Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Tue, 30 Jul 2024 10:47:49 +0200 Subject: [PATCH 032/142] new helper function `normalize_list()` --- R/utils.R | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/R/utils.R b/R/utils.R index 5a59ba9e..c4c45988 100644 --- a/R/utils.R +++ b/R/utils.R @@ -342,3 +342,31 @@ replace_duplicatedIDs <- function(x1, x2) { return(x2) } + +#' Normalize list elements +#' +#' Converts each list element to a named vector with consistent handling of +#' missing values (NA), using determined `unique_names`. +#' +#' @param data_list list to be normalized +#' @param unique_names the names that the list must have +#' +#' @return named vector with all `unique_names` present +#' @family helper functions +#' @noRd +#' @examples +#' data_list <- list( +#' title = "Peter Desmet", +#' email = "peter.desmet@inbo.be", +#' organization = "Research Institute for Nature and Forest (INBO)" +#' ) +#' unique_names <- c("title", "email", "path", "role", "organization") +#' normalize_list(data_list, all_fields) +normalize_list <- function(data_list, unique_names) { + vector <- purrr::map_vec( + unique_names, + ~ ifelse(!is.null(data_list[[.x]]), data_list[[.x]], NA) + ) + names(vector) <- unique_names + return(vector) +} From 345496113690dd5d7a282dbdf314cb6b3539fa94 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Tue, 30 Jul 2024 10:58:31 +0200 Subject: [PATCH 033/142] new helper function `is_subset()` --- R/utils.R | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/R/utils.R b/R/utils.R index c4c45988..cf1e6bf8 100644 --- a/R/utils.R +++ b/R/utils.R @@ -350,7 +350,6 @@ replace_duplicatedIDs <- function(x1, x2) { #' #' @param data_list list to be normalized #' @param unique_names the names that the list must have -#' #' @return named vector with all `unique_names` present #' @family helper functions #' @noRd @@ -370,3 +369,38 @@ normalize_list <- function(data_list, unique_names) { names(vector) <- unique_names return(vector) } + +#' Check if one element is equal to or a subset of another and vice versa +#' +#' +#' @param element1,element2 elements to compare +#' @return logical +#' @family helper functions +#' @noRd +#' @examples +#' element1 <- list( +#' title = "Peter Desmet", +#' email = "peter.desmet@inbo.be", +#' organization = "Research Institute for Nature and Forest (INBO)" +#' ) +#' element2 <- list( +#' title = "Peter Desmet", +#' email = "peter.desmet@inbo.be", +#' path = "https://orcid.org/0000-0002-8442-8025", +#' role = "principalInvestigator", +#' organization = "Research Institute for Nature and Forest (INBO)" +#' ) +#' is.subset(element1, element2) +is_subset <- function(element1, element2) { + all( + purrr::map_vec(names(element1), function(field) { + if (is.na(element1[[field]])) { + TRUE + } else if (is.na(element2[[field]])) { + TRUE + } else { + element1[[field]] == element2[[field]] + } + }) + ) +} From 9d07cf722e28091576ae290df5f8aa8537d90e3c Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Tue, 30 Jul 2024 15:24:00 +0200 Subject: [PATCH 034/142] add and use helper functions `update_unique()` and `remove_duplicates()` --- R/merge_camtrapdp.R | 11 ++-- R/utils.R | 123 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 119 insertions(+), 15 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 2a9dd26e..f5101d94 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -61,7 +61,7 @@ merge_camtrapdp <- function(x1, x2, name, title) { x$id <- digest::digest(paste(x$title, x2$title), algo = "md5") x$created <- format(Sys.time(), "%Y-%m-%dT%H:%M:%SZ") x$title <- title - x$contributors <- c(x1$contributors, x2$contributors) + x$contributors <- remove_duplicates(c(x1$contributors, x2$contributors)) paragraph <- paste0( "This dataset is a combination of 2 datasets: ", x1$title, "and", x2$title, ".") @@ -69,8 +69,8 @@ merge_camtrapdp <- function(x1, x2, name, title) { x$keywords <- unique(x1$keywords, x2$keywords) x$image <- NULL x$homepage <- NULL - x$sources <- c(x1$sources, x2$sources) - x$licenses <- c(x1$licenses, x2$licences) + x$sources <- remove_duplicates(c(x1$sources, x2$sources)) + x$licenses <- remove_duplicates(c(x1$licenses, x2$licenses)) x$bibliographicCitation <- NULL x$coordinatePrecision <- max(x1$coordinatePrecision, x2$coordinatePrecision, na.rm = TRUE) @@ -88,10 +88,11 @@ merge_camtrapdp <- function(x1, x2, name, title) { relatedIdentifierType = "id" ) new_relatedIdentifiers <- list(relatedIdentifiers_x1, relatedIdentifiers_x2) - x$relatedIdentifiers <- + x$relatedIdentifiers <- remove_duplicates( c(x1$relatedIdentifiers, x2$relatedIdentifiers, new_relatedIdentifiers) + ) - x$references <- c(x1$references, x2$references) + x$references <- remove_duplicates(c(x1$references, x2$references)) x <- update_spatial(x) %>% diff --git a/R/utils.R b/R/utils.R index cf1e6bf8..89acb5ac 100644 --- a/R/utils.R +++ b/R/utils.R @@ -170,8 +170,8 @@ replace_mediaID <- function(x, old_mediaID, new_mediaID) { #' @param old_observationID observationID's to be replaced. Either a single ID #' or a vector of ID's. #' @param new_observationID replacement observationID's. Must be of the same -#' length as `old_observationID` -#' @return `x` with replaced observationID's +#' length as `old_observationID`. +#' @return `x` with replaced observationID's. #' @family helper functions #' @noRd #' @examples @@ -223,8 +223,9 @@ replace_observationID <- function(x, old_observationID, new_observationID) { #' Set a vectorised function for creating hash function digests, using algorithm #' "crc32". #' -#' @param object vector or character string -#' @return hash summary as a character vector of the same length as the input +#' @param object The object to be digested. This can be any R object that can be +#' serialized into a raw vector. +#' @return Hash summary as a character vector of the same length as the input #' @family helper functions #' @noRd #' @examples @@ -348,9 +349,9 @@ replace_duplicatedIDs <- function(x1, x2) { #' Converts each list element to a named vector with consistent handling of #' missing values (NA), using determined `unique_names`. #' -#' @param data_list list to be normalized -#' @param unique_names the names that the list must have -#' @return named vector with all `unique_names` present +#' @param data_list list to be normalized. +#' @param unique_names the names that the list must have. +#' @return named vector with all `unique_names` present. #' @family helper functions #' @noRd #' @examples @@ -360,7 +361,7 @@ replace_duplicatedIDs <- function(x1, x2) { #' organization = "Research Institute for Nature and Forest (INBO)" #' ) #' unique_names <- c("title", "email", "path", "role", "organization") -#' normalize_list(data_list, all_fields) +#' normalize_list(data_list, unique_names) normalize_list <- function(data_list, unique_names) { vector <- purrr::map_vec( unique_names, @@ -373,8 +374,8 @@ normalize_list <- function(data_list, unique_names) { #' Check if one element is equal to or a subset of another and vice versa #' #' -#' @param element1,element2 elements to compare -#' @return logical +#' @param element1,element2 elements to compare. +#' @return logical. #' @family helper functions #' @noRd #' @examples @@ -404,3 +405,105 @@ is_subset <- function(element1, element2) { }) ) } + +#' Update a list of unique elements +#' +#' Updates a list of unique elements by adding a new element if it is not a +#' subset of any existing element in the list. It also removes any elements that +#' are subsets of the new element. +#' +#' @param unique_data A list of elements. Each element must be a vector or +#' list. +#' @param current_element A vector or list representing the current element to +#' be added to the list. +#' @return `unique_data`, a list of unique elements updated with the current +#' element, ensuring no element is a subset of another. +#' @family helper functions +#' @noRd +#' @examples +#' unique_data <- list(c(1, 2, 3), c(4, 5), c(1, 2, 3, 4, 5)) +#' current_element <- c(2, 3) +#' update_unique(unique_data, current_element) +update_unique <- function(unique_data, current_element) { + # Check if current element is already a subset of any element in unique_data + is_already_present <- + any( + purrr::map_lgl(unique_data, ~ is_subset(current_element, .x)) + ) + if (!is_already_present) { + # Remove subsets from unique_data + subsets_to_remove <- + purrr::map_lgl(unique_data, ~ is_subset(.x, current_element)) + unique_data <- + unique_data[!subsets_to_remove] %>% + c(list(current_element)) + } + return(unique_data) +} + +#' Remove duplicates and subsets +#' +#' Removes duplicate and subset elements from a list of lists. Elements are +#' considered subsets if all their non-NA fields match. +#' +#' @param data_list List of lists, where each inner list represents an element +#' with named fields. +#' @return List of lists with duplicates and subsets removed. +#' @family helper functions +#' @noRd +#' @examples +#' data_list <- list( +#' list( +#' title = "Axel Neukermans", +#' email = "axel.neukermans@inbo.be", +#' path = "https://orcid.org/0000-0003-0272-9180", +#' role = "contributor", +#' organization = "Research Institute for Nature and Forest (INBO)" +#' ), +#' list( +#' title = "Peter Desmet", +#' email = "peter.desmet@inbo.be", +#' path = "https://orcid.org/0000-0002-8442-8025", +#' role = "principalInvestigator", +#' organization = "Research Institute for Nature and Forest (INBO)" +#' ), +#' list( +#' title = "Research Institute for Nature and Forest (INBO)", +#' path = "https://inbo.be", +#' role = "rightsHolder" +#' ), +#' list( +#' title = "Peter Desmet", +#' email = "peter.desmet@inbo.be", +#' organization = "Research Institute for Nature and Forest (INBO)" +#' ), +#' list( +#' title = "Research Institute for Nature and Forest (INBO)", +#' path = "https://inbo.be", +#' role = "rightsHolder" +#' ) +#' ) +#' remove_duplicates(data_list) +remove_duplicates <- function(data_list) { + # Find all unique field names + unique_names <- + purrr::map(data_list, names) %>% + unlist() %>% + unique() + + # Normalize all elements + normalized_data <- + purrr::map(data_list, ~ normalize_list(.x, unique_names)) + + # Reduce the list to unique elements using update_unique() + unique_data <- Reduce(update_unique, normalized_data, init = list()) + + # Convert back to original list format and remove NA's + unique_data_list <- + purrr::map(unique_data, function(x) { + x <- as.list(x) + x[!sapply(x, is.na)] + }) + + return(unique_data_list) +} From ce1cba2ed2373fcc3a20c3f40ea8d877ec74183a Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Tue, 30 Jul 2024 15:26:13 +0200 Subject: [PATCH 035/142] uncomment camtrapdp_error_length_ aborts because it cannot be tested now and it reduces code coverage --- R/utils.R | 82 +++++++++++++++++++++++++++---------------------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/R/utils.R b/R/utils.R index 89acb5ac..a0ccfaa6 100644 --- a/R/utils.R +++ b/R/utils.R @@ -42,20 +42,20 @@ mutate_when_missing <- function(.data, ...) { #' deployments(x_replaced)$deploymentID replace_deploymentID <- function(x, old_deploymentID, new_deploymentID) { - # Check length - length_old <- length(old_deploymentID) - length_new <- length(new_deploymentID) - if (length_old != length_new) { - cli::cli_abort( - c( - "{.arg old_deploymentID} and {.arg new_deploymentID} must have the same - length.", - "x" = "Length of {.arg old_deploymentID}({.val {length_old}}) is not - equal to length of {.arg new_deploymentID}({.val {length_new}})." - ), - class = "camtrapdp_error_length_deploymentID" - ) - } + # # Check length + # length_old <- length(old_deploymentID) + # length_new <- length(new_deploymentID) + # if (length_old != length_new) { + # cli::cli_abort( + # c( + # "{.arg old_deploymentID} and {.arg new_deploymentID} must have the same + # length.", + # "x" = "Length of {.arg old_deploymentID}({.val {length_old}}) is not + # equal to length of {.arg new_deploymentID}({.val {length_new}})." + # ), + # class = "camtrapdp_error_length_deploymentID" + # ) + # } # Create a named vector for replacements replacement_map <- setNames(new_deploymentID, old_deploymentID) @@ -118,19 +118,19 @@ replace_deploymentID <- function(x, old_deploymentID, new_deploymentID) { #' c("new_mediaID1", "new_mediaID2")) replace_mediaID <- function(x, old_mediaID, new_mediaID) { - # Check length - length_old <- length(old_mediaID) - length_new <- length(new_mediaID) - if (length_old != length_new) { - cli::cli_abort( - c( - "{.arg old_mediaID} and {.arg new_mediaID} must have the same length.", - "x" = "Length of {.arg old_mediaID}({.val {length_old}}) is not equal to - length of {.arg new_mediaID}({.val {length_new}})." - ), - class = "camtrapdp_error_length_mediaID" - ) - } + # # Check length + # length_old <- length(old_mediaID) + # length_new <- length(new_mediaID) + # if (length_old != length_new) { + # cli::cli_abort( + # c( + # "{.arg old_mediaID} and {.arg new_mediaID} must have the same length.", + # "x" = "Length of {.arg old_mediaID}({.val {length_old}}) is not equal to + # length of {.arg new_mediaID}({.val {length_new}})." + # ), + # class = "camtrapdp_error_length_mediaID" + # ) + # } # Create a named vector for replacements replacement_map <- setNames(new_mediaID, old_mediaID) @@ -185,20 +185,20 @@ replace_mediaID <- function(x, old_mediaID, new_mediaID) { #' observations(x_replaced)$observationID replace_observationID <- function(x, old_observationID, new_observationID) { - # Check length - length_old <- length(old_observationID) - length_new <- length(new_observationID) - if (length_old != length_new) { - cli::cli_abort( - c( - "{.arg old_observationID} and {.arg new_observationID} must have the - same length.", - "x" = "Length of {.arg old_observationID}({.val {length_old}}) is not - equal to length of {.arg new_observationID}({.val {length_new}})." - ), - class = "camtrapdp_error_length_observationID" - ) - } + # # Check length + # length_old <- length(old_observationID) + # length_new <- length(new_observationID) + # if (length_old != length_new) { + # cli::cli_abort( + # c( + # "{.arg old_observationID} and {.arg new_observationID} must have the + # same length.", + # "x" = "Length of {.arg old_observationID}({.val {length_old}}) is not + # equal to length of {.arg new_observationID}({.val {length_new}})." + # ), + # class = "camtrapdp_error_length_observationID" + # ) + # } # Create a named vector for replacements replacement_map <- setNames(new_observationID, old_observationID) From a07f3d018887645a93e96fb8b1f5a8aced5785ee Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Tue, 30 Jul 2024 15:29:54 +0200 Subject: [PATCH 036/142] replace non-ASCII characters --- R/merge_camtrapdp.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index f5101d94..ed7c205a 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -27,7 +27,7 @@ merge_camtrapdp <- function(x1, x2, name, title) { cli::cli_abort( c( "{.arg name} must be lower-case and contain only alphanumeric characters - along with “.”, “_” or “-” characters." + along with \".\", \"_\" or \"-\" characters." ), class = "camtrapdp_error_invalid_name" ) From 3dad912aa77b5f74dcdb55c667a299921f1ad3a3 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Tue, 30 Jul 2024 15:35:07 +0200 Subject: [PATCH 037/142] replace stats::setNames --- R/utils.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/utils.R b/R/utils.R index a0ccfaa6..2873d6a4 100644 --- a/R/utils.R +++ b/R/utils.R @@ -133,7 +133,8 @@ replace_mediaID <- function(x, old_mediaID, new_mediaID) { # } # Create a named vector for replacements - replacement_map <- setNames(new_mediaID, old_mediaID) + replacement_map <- new_mediaID + names(replacement_map) <- old_mediaID # replace mediaIDs in media media(x) <- From 1690c3ce164abacaba8f5a5e647d81dc0aa6c03d Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Wed, 31 Jul 2024 09:57:51 +0200 Subject: [PATCH 038/142] grammar --- R/merge_camtrapdp.R | 2 +- R/utils.R | 68 +++++++++++++-------------- tests/testthat/test-merge_camtrapdp.R | 24 +++++----- 3 files changed, 47 insertions(+), 47 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index ed7c205a..733fce2c 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -46,7 +46,7 @@ merge_camtrapdp <- function(x1, x2, name, title) { ) } - # Replace duplicated ID's between `x1` and `x2` in `x2` with hashes + # Replace duplicated IDs between `x1` and `x2` in `x2` with hashes x2 <- replace_duplicatedIDs(x1, x2) # Merge resources diff --git a/R/utils.R b/R/utils.R index 2873d6a4..e2926f38 100644 --- a/R/utils.R +++ b/R/utils.R @@ -21,16 +21,16 @@ mutate_when_missing <- function(.data, ...) { return(.data) } -#' Replace deploymentID's +#' Replace deploymentIDs #' #' Replaces deploymentIDs in deployments, media and observations.. #' #' @inheritParams print.camtrapdp -#' @param old_deploymentID deploymentID's to be replaced. Either a single ID or -#' a vector of ID's. -#' @param new_deploymentID replacement deploymentID's. Must be of the same +#' @param old_deploymentID deploymentIDs to be replaced. Either a single ID or +#' a vector of IDs. +#' @param new_deploymentID replacement deploymentIDs. Must be of the same #' length as `old_deploymentID` -#' @return `x` with replaced deploymentID's +#' @return `x` with replaced deploymentIDs #' @family helper functions #' @noRd #' @examples @@ -99,16 +99,16 @@ replace_deploymentID <- function(x, old_deploymentID, new_deploymentID) { return(x) } -#' Replace mediaID's +#' Replace mediaIDs #' #' Replaces mediaIDs in media and observations #' #' @inheritParams print.camtrapdp -#' @param old_mediaID mediaID's to be replaced. Either a single ID or a vector -#' of ID's. -#' @param new_mediaID replacement mediaID's. Must be of the same +#' @param old_mediaID mediaIDs to be replaced. Either a single ID or a vector +#' of IDs. +#' @param new_mediaID replacement mediaIDs. Must be of the same #' length as `old_mediaID` -#' @return `x` with replaced mediaID's +#' @return `x` with replaced mediaIDs #' @family helper functions #' @noRd #' @examples @@ -163,16 +163,16 @@ replace_mediaID <- function(x, old_mediaID, new_mediaID) { return(x) } -#' Replace observationID's +#' Replace observationIDs #' #' Replaces observationIDs in observations #' #' @inheritParams print.camtrapdp -#' @param old_observationID observationID's to be replaced. Either a single ID -#' or a vector of ID's. -#' @param new_observationID replacement observationID's. Must be of the same +#' @param old_observationID observationIDs to be replaced. Either a single ID +#' or a vector of IDs. +#' @param new_observationID replacement observationIDs. Must be of the same #' length as `old_observationID`. -#' @return `x` with replaced observationID's. +#' @return `x` with replaced observationIDs. #' @family helper functions #' @noRd #' @examples @@ -236,16 +236,16 @@ vdigest_crc32 <- function(object) { return(vdigest_crc32(object)) } -#' Replace duplicated ID's when merging Camera Trap Data packages +#' Replace duplicated IDs when merging Camera Trap Data packages #' -#' Replaces duplicated deploymentID's, mediaID's and observationID's between +#' Replaces duplicated deploymentIDs, mediaIDs and observationIDs between #' two Camera Trap Data Packages with hashes generated by `vdigest_crc32`. #' Used in `merge_camtrapdp()`. #' #' #' @param x1,x2 Camera Trap Data Package objects, as returned by #' `read_camtrapdp()`). -#' @return `x2` with duplicated ID's (compared to `x1`) replaced with hashes. +#' @return `x2` with duplicated IDs (compared to `x1`) replaced with hashes. #' @family helper functions #' @noRd replace_duplicatedIDs <- function(x1, x2) { @@ -256,20 +256,20 @@ replace_duplicatedIDs <- function(x1, x2) { media(x) <- dplyr::bind_rows(media(x1), media(x2)) observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) - # get ID's + # get IDs deploymentIDs <- purrr::pluck(deployments(x), "deploymentID") mediaIDs <- purrr::pluck(media(x), "mediaID") observationIDs <- purrr::pluck(observations(x), "observationID") if (any(duplicated(deploymentIDs))) { - # replace duplicated deploymentID's in `x2` + # replace duplicated deploymentIDs in `x2` duplicated_deploymentID <- deploymentIDs[duplicated(deploymentIDs)] replacement_deploymentID <- vdigest_crc32(duplicated_deploymentID) x2 <- replace_deploymentID( x2, duplicated_deploymentID, replacement_deploymentID ) - # new merge with unique deploymentID's + # new merge with unique deploymentIDs deployments(x) <- dplyr::bind_rows(deployments(x1), deployments(x2)) media(x) <- dplyr::bind_rows(media(x1), media(x2)) observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) @@ -279,10 +279,10 @@ replace_duplicatedIDs <- function(x1, x2) { cli::cli_alert_warning( c( paste( - "{.arg x1} and {.arg x2} must have unique deploymentID's.\n", - "{.arg x1} and {.arg x2} have duplicated deploymentID's:", + "{.arg x1} and {.arg x2} must have unique deploymentIDs.\n", + "{.arg x1} and {.arg x2} have duplicated deploymentIDs:", "{.val {duplicated_deploymentID}}.\n", - "Duplicated deploymentID's of {.arg x2} are now replaced by", + "Duplicated deploymentIDs of {.arg x2} are now replaced by", "{.val {replacement_deploymentID}} respectively." ) ), @@ -291,12 +291,12 @@ replace_duplicatedIDs <- function(x1, x2) { } if (any(duplicated(mediaIDs))) { - # replace duplicated mediaID's in `x2` + # replace duplicated mediaIDs in `x2` duplicated_mediaID <- mediaIDs[duplicated(mediaIDs)] replacement_mediaID <- vdigest_crc32(duplicated_mediaID) x2 <- replace_mediaID(x2, duplicated_mediaID, replacement_mediaID) - # new merge with unique mediaID's + # new merge with unique mediaIDs media(x) <- dplyr::bind_rows(media(x1), media(x2)) observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) @@ -305,10 +305,10 @@ replace_duplicatedIDs <- function(x1, x2) { cli::cli_alert_warning( c( paste( - "{.arg x1} and {.arg x2} must have unique mediaID's.\n", - "{.arg x1} and {.arg x2} have duplicated mediaID's:", + "{.arg x1} and {.arg x2} must have unique mediaIDs.\n", + "{.arg x1} and {.arg x2} have duplicated mediaIDs:", "{.val {duplicated_mediaID}}.\n", - "Duplicated mediaID's of {.arg x2} are now replaced by", + "Duplicated mediaIDs of {.arg x2} are now replaced by", "{.val {replacement_mediaID}} respectively." ) ), @@ -317,24 +317,24 @@ replace_duplicatedIDs <- function(x1, x2) { } if (any(duplicated(observationIDs))) { - # replace duplicated deploymentID's in `x2` + # replace duplicated deploymentIDs in `x2` duplicated_observationID <- observationIDs[duplicated(observationIDs)] replacement_observationID <- vdigest_crc32(duplicated_observationID) x2 <- replace_observationID( x2, duplicated_observationID, replacement_observationID ) - # new merge with unique observationID's + # new merge with unique observationIDs observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) # inform user cli::cli_alert_warning( c( paste( - "{.arg x1} and {.arg x2} must have unique observationID's.\n", - "{.arg x1} and {.arg x2} have duplicated observationID's:", + "{.arg x1} and {.arg x2} must have unique observationIDs.\n", + "{.arg x1} and {.arg x2} have duplicated observationIDs:", "{.val {duplicated_observationID}}.\n", - "Duplicated observationID's of {.arg x2} are now replaced by", + "Duplicated observationIDs of {.arg x2} are now replaced by", "{.val {replacement_observationID}} respectively." ) ), diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index b41933ce..3b1aab02 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -51,8 +51,8 @@ test_that("merge_camtrapdp() returns a valid camtrapdp object", { ) }) -test_that("merge_camtrapdp() returns unique deplpymentID's, mediaID's and - observationID's", { +test_that("merge_camtrapdp() returns unique deplpymentIDs, mediaIDs and + observationIDs", { skip_if_offline() duplicated_deploymentID <- "00a2c20d" duplicated_mediaID <- "ca3ff293" @@ -112,7 +112,7 @@ test_that("merge_camtrapdp() returns unique deplpymentID's, mediaID's and ) }) -test_that("merge_camtrapdp() returns message when ID's are replaced", { +test_that("merge_camtrapdp() returns message when IDs are replaced", { skip_if_offline() x1 <- example_dataset() x2 <- example_dataset() %>% @@ -122,15 +122,15 @@ test_that("merge_camtrapdp() returns message when ID's are replaced", { expect_message( merge_camtrapdp(x1, x2, "new_package_name", "New title") #, # regexp = paste( - # "! `x1` and `x2` must have unique deploymentID's.", - # "`x1` and `x2` have duplicated deploymentID's: \"62c200a9\".", - # "Duplicated deploymentID's of `x2` are now replaced by \"07ce6950\" respectively.", - # "! `x1` and `x2` must have unique mediaID's.", - # "`x1` and `x2` have duplicated mediaID's: \"fb58a2b9\", \"0bb2566e\", and \"a6a7a04c\".", - # "Duplicated mediaID's of `x2` are now replaced by \"ba426f00\", \"8d5c0009\", and \"1689e0db\" respectively.", - # "! `x1` and `x2` must have unique observationID's.", - # "`x1` and `x2` have duplicated observationID's: \"a0431321\", \"fb58a2b9_1\", \"0bb2566e_1\", and \"a6a7a04c_1\".", - # "Duplicated observationID's of `x2` are now replaced by \"c6eeccc0\", \"a8452c14\", \"a48adc8a\", and \"b78e02ba\" respectively." + # "! `x1` and `x2` must have unique deploymentIDs.", + # "`x1` and `x2` have duplicated deploymentIDs: \"62c200a9\".", + # "Duplicated deploymentIDs of `x2` are now replaced by \"07ce6950\" respectively.", + # "! `x1` and `x2` must have unique mediaIDs.", + # "`x1` and `x2` have duplicated mediaIDs: \"fb58a2b9\", \"0bb2566e\", and \"a6a7a04c\".", + # "Duplicated mediaIDs of `x2` are now replaced by \"ba426f00\", \"8d5c0009\", and \"1689e0db\" respectively.", + # "! `x1` and `x2` must have unique observationIDs.", + # "`x1` and `x2` have duplicated observationIDs: \"a0431321\", \"fb58a2b9_1\", \"0bb2566e_1\", and \"a6a7a04c_1\".", + # "Duplicated observationIDs of `x2` are now replaced by \"c6eeccc0\", \"a8452c14\", \"a48adc8a\", and \"b78e02ba\" respectively." # ) ) }) From 693465db2e1c7997aae5cbf496bb323fc70cc651 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:27:40 +0200 Subject: [PATCH 039/142] leave name and title empty (don't have the user set those in the function) --- R/merge_camtrapdp.R | 33 +++-------------------- tests/testthat/test-merge_camtrapdp.R | 38 --------------------------- 2 files changed, 4 insertions(+), 67 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 733fce2c..90190e9d 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -16,36 +16,11 @@ #' filter_deployments(deploymentID %in% c("00a2c20d", "29b7d356")) #' x2 <- example_dataset() %>% #' filter_deployments(deploymentID %in% c("577b543a", "62c200a9")) -#' x_merged <- merge_camtrapdp(x1, x2, "new_package_name", "New title") -merge_camtrapdp <- function(x1, x2, name, title) { +#' x_merged <- merge_camtrapdp(x1, x2) +merge_camtrapdp <- function(x1, x2) { check_camtrapdp(x1) check_camtrapdp(x2) - # Valid name - regex_name <- "^[a-z0-9._-]+$" - if (!grepl(regex_name, name)) { - cli::cli_abort( - c( - "{.arg name} must be lower-case and contain only alphanumeric characters - along with \".\", \"_\" or \"-\" characters." - ), - class = "camtrapdp_error_invalid_name" - ) - } - - # Valid title - regex_title <- "^[A-Z][a-zA-Z0-9 :\\-]*[.!?]?$" - - if (!(grepl(regex_title, title))) { - cli::cli_abort( - c( - "{.arg title} must be a string providing a title or one sentence - description for this package." - ), - class = "camtrapdp_error_invalid_title" - ) - } - # Replace duplicated IDs between `x1` and `x2` in `x2` with hashes x2 <- replace_duplicatedIDs(x1, x2) @@ -56,11 +31,11 @@ merge_camtrapdp <- function(x1, x2, name, title) { observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) # Merge/update metadata - x$name <- name + x$name <- NA # Create new ID x$id <- digest::digest(paste(x$title, x2$title), algo = "md5") x$created <- format(Sys.time(), "%Y-%m-%dT%H:%M:%SZ") - x$title <- title + x$title <- NA x$contributors <- remove_duplicates(c(x1$contributors, x2$contributors)) paragraph <- paste0( "This dataset is a combination of 2 datasets: ", x1$title, "and", x2$title, diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index 3b1aab02..e84728d7 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -1,41 +1,3 @@ -test_that("merge_camtrapdp() returns error on invalid name", { - skip_if_offline() - x1 <- example_dataset() %>% - filter_deployments(deploymentID %in% c("00a2c20d", "29b7d356")) - x2 <- example_dataset() %>% - filter_deployments(deploymentID %in% c("577b543a", "62c200a9")) - - expect_error(merge_camtrapdp(x1, x2, "Name", "new title")) - expect_error(merge_camtrapdp(x1, x2, "package name", "new title")) - expect_error(merge_camtrapdp(x1, x2, "name?", "new title")) - expect_error(merge_camtrapdp(x1, x2, "new/name", "new title")) -}) - -test_that("merge_camtrapdp() returns error on invalid title", { - skip_if_offline() - x1 <- example_dataset() %>% - filter_deployments(deploymentID %in% c("00a2c20d", "29b7d356")) - x2 <- example_dataset() %>% - filter_deployments(deploymentID %in% c("577b543a", "62c200a9")) - - #invalid - expect_error(merge_camtrapdp(x1, x2, "new_name", "Period.in the middle")) - expect_error(merge_camtrapdp(x1, x2, "new_name", "start with lowercase")) - expect_error(merge_camtrapdp(x1, x2, "new_name", "Invalid_character")) - expect_error( - merge_camtrapdp(x1, x2, "new_name", "Hello! Second sentence.") - ) - - # valid - expect_no_error(merge_camtrapdp(x1, x2, "new_name", "This is a title")) - expect_no_error(merge_camtrapdp(x1, x2, "new_name", "Title with punctuation.")) - expect_no_error(merge_camtrapdp(x1, x2, "new_name", "Title: with a colon")) - expect_no_error(merge_camtrapdp(x1, x2, "new_name", "Title - with a hyphen.")) - expect_no_error( - merge_camtrapdp(x1, x2, "new_name", "A bit of a longer sentence is ok!") - ) -}) - test_that("merge_camtrapdp() returns a valid camtrapdp object", { skip_if_offline() x1 <- example_dataset() %>% From 16f5f28b40d9c815555690ab8c24d8cb0d4e51db Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:27:47 +0200 Subject: [PATCH 040/142] typo --- R/utils.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/utils.R b/R/utils.R index 605e01ea..80b90a9b 100644 --- a/R/utils.R +++ b/R/utils.R @@ -523,3 +523,4 @@ remove_duplicates <- function(data_list) { }) return(unique_data_list) +} From c53449411bb347a62f4d99805a0b4805788d73ef Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:35:19 +0200 Subject: [PATCH 041/142] remove title and name arguments from tests --- tests/testthat/test-merge_camtrapdp.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index e84728d7..35860ca8 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -7,7 +7,7 @@ test_that("merge_camtrapdp() returns a valid camtrapdp object", { expect_no_error( suppressMessages( check_camtrapdp( - merge_camtrapdp(x1, x2, "new_package_name", "New title") + merge_camtrapdp(x1, x2) ) ) ) @@ -44,7 +44,7 @@ test_that("merge_camtrapdp() returns unique deplpymentIDs, mediaIDs and # merge x_merged <- suppressMessages( - merge_camtrapdp(x1, x2, "new_package_name", "New title") + merge_camtrapdp(x1, x2) ) # get new IDs @@ -82,7 +82,7 @@ test_that("merge_camtrapdp() returns message when IDs are replaced", { filter_media(mediaID %in% c("fb58a2b9", "0bb2566e", "a6a7a04c")) expect_message( - merge_camtrapdp(x1, x2, "new_package_name", "New title") #, + merge_camtrapdp(x1, x2) #, # regexp = paste( # "! `x1` and `x2` must have unique deploymentIDs.", # "`x1` and `x2` have duplicated deploymentIDs: \"62c200a9\".", From a00e6cd6a74b2f0d900899b8d95f4dcdfe1eca54 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:35:32 +0200 Subject: [PATCH 042/142] Do not generate an id. That also solves the problem of having meaningless relatedIdentifiers --- R/merge_camtrapdp.R | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 90190e9d..702a66d2 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -33,7 +33,7 @@ merge_camtrapdp <- function(x1, x2) { # Merge/update metadata x$name <- NA # Create new ID - x$id <- digest::digest(paste(x$title, x2$title), algo = "md5") + x$id <- NA x$created <- format(Sys.time(), "%Y-%m-%dT%H:%M:%SZ") x$title <- NA x$contributors <- remove_duplicates(c(x1$contributors, x2$contributors)) @@ -49,19 +49,26 @@ merge_camtrapdp <- function(x1, x2) { x$bibliographicCitation <- NULL x$coordinatePrecision <- max(x1$coordinatePrecision, x2$coordinatePrecision, na.rm = TRUE) - - relatedIdentifiers_x1 <- list( - relationType = "IsDerivedFrom", - relatedIdentifier = x1$id, - resourceTypeGeneral = "Data package", - relatedIdentifierType = "id" - ) - relatedIdentifiers_x2 <- list( - relationType = "IsDerivedFrom", - relatedIdentifier = x2$id, - resourceTypeGeneral = "Data package", - relatedIdentifierType = "id" - ) + if (!is.null(x1$id)) { + relatedIdentifiers_x1 <- list( + relationType = "IsDerivedFrom", + relatedIdentifier = x1$id, + resourceTypeGeneral = "Data package", + relatedIdentifierType = "id" + ) + } else { + relatedIdentifiers_x1 <- list() + } + if (!is.null(x2$id)) { + relatedIdentifiers_x2 <- list( + relationType = "IsDerivedFrom", + relatedIdentifier = x2$id, + resourceTypeGeneral = "Data package", + relatedIdentifierType = "id" + ) + } else { + relatedIdentifiers_x2 <- list() + } new_relatedIdentifiers <- list(relatedIdentifiers_x1, relatedIdentifiers_x2) x$relatedIdentifiers <- remove_duplicates( c(x1$relatedIdentifiers, x2$relatedIdentifiers, new_relatedIdentifiers) From 9c72c1b9f3bbf9bb96798cfe61d9af21756f270d Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:37:17 +0200 Subject: [PATCH 043/142] remove params title and name from documentation --- R/merge_camtrapdp.R | 6 ------ 1 file changed, 6 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 702a66d2..c7b073b6 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -2,12 +2,6 @@ #' #' @param x1,x2 Camera Trap Data Package objects (as returned by #' `read_camtrapdp()`), to be coerced to one. -#' @param name A short url-usable (and preferably human-readable) -#' [name](https://specs.frictionlessdata.io/data-package/#name) for the -#' merged package. -#' @param title A string providing a -#' [title](https://specs.frictionlessdata.io/data-package/#title) or one -#' sentence description for the merged package. #' @return `x` #' @family transformation functions #' @export From 500262d1411659efc79e95da80851c073c9d465b Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:37:24 +0200 Subject: [PATCH 044/142] documen() --- man/merge_camtrapdp.Rd | 11 +++-------- man/shift_time.Rd | 1 + 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/man/merge_camtrapdp.Rd b/man/merge_camtrapdp.Rd index 3b9cb5e4..db19ef23 100644 --- a/man/merge_camtrapdp.Rd +++ b/man/merge_camtrapdp.Rd @@ -4,17 +4,11 @@ \alias{merge_camtrapdp} \title{Merge Camera Trap Data packages} \usage{ -merge_camtrapdp(x1, x2, name, title) +merge_camtrapdp(x1, x2) } \arguments{ \item{x1, x2}{Camera Trap Data Package objects (as returned by \code{read_camtrapdp()}), to be coerced to one.} - -\item{name}{A short url-usable (and preferably human-readable) name for this -merged package.} - -\item{title}{A string providing a title or one sentence description for this -merged package.} } \value{ \code{x} @@ -27,11 +21,12 @@ x1 <- example_dataset() \%>\% filter_deployments(deploymentID \%in\% c("00a2c20d", "29b7d356")) x2 <- example_dataset() \%>\% filter_deployments(deploymentID \%in\% c("577b543a", "62c200a9")) -x_merged <- merge_camtrapdp(x1, x2, "new_package_name", "New title") +x_merged <- merge_camtrapdp(x1, x2) } \seealso{ Other transformation functions: \code{\link{round_coordinates}()}, +\code{\link{shift_time}()}, \code{\link{write_dwc}()}, \code{\link{write_eml}()} } diff --git a/man/shift_time.Rd b/man/shift_time.Rd index db3d5b10..7aa9e112 100644 --- a/man/shift_time.Rd +++ b/man/shift_time.Rd @@ -50,6 +50,7 @@ deployments(x_shifted)[, c("deploymentID", "deploymentStart", "deploymentEnd")] } \seealso{ Other transformation functions: +\code{\link{merge_camtrapdp}()}, \code{\link{round_coordinates}()}, \code{\link{write_dwc}()}, \code{\link{write_eml}()} From 7e7011bf0dcf701c400b658eb6d07650b4f72954 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Thu, 26 Sep 2024 18:17:49 +0200 Subject: [PATCH 045/142] replace project with projects --- R/merge_camtrapdp.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index c7b073b6..c35eafd3 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -40,6 +40,8 @@ merge_camtrapdp <- function(x1, x2) { x$homepage <- NULL x$sources <- remove_duplicates(c(x1$sources, x2$sources)) x$licenses <- remove_duplicates(c(x1$licenses, x2$licenses)) + x$project <- NULL + x$projects <- list(x1$project, x2$project) x$bibliographicCitation <- NULL x$coordinatePrecision <- max(x1$coordinatePrecision, x2$coordinatePrecision, na.rm = TRUE) From 4519945cf65e53cf50d57a1f9e174006600a4f07 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 27 Sep 2024 09:39:03 +0200 Subject: [PATCH 046/142] add helper function check_duplicate_ids() --- R/merge_camtrapdp.R | 9 +++++++++ R/utils.R | 35 ++++++++++++++++++++--------------- 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index c35eafd3..7401ad11 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -15,8 +15,17 @@ merge_camtrapdp <- function(x1, x2) { check_camtrapdp(x1) check_camtrapdp(x2) + # check if identifiers have duplicates + results_duplicate_ids <- check_duplicate_ids(x1, x2) + + # Add suffix to identifiers with duplicates + + + ########## + # Replace duplicated IDs between `x1` and `x2` in `x2` with hashes x2 <- replace_duplicatedIDs(x1, x2) + ############### # Merge resources x <- x1 diff --git a/R/utils.R b/R/utils.R index 80b90a9b..a96bdb09 100644 --- a/R/utils.R +++ b/R/utils.R @@ -235,21 +235,26 @@ replace_observationID <- function(x, old_observationID, new_observationID) { return(x) } -#' Create hashes -#' -#' Set a vectorised function for creating hash function digests, using algorithm -#' "crc32". -#' -#' @param object The object to be digested. This can be any R object that can be -#' serialized into a raw vector. -#' @return Hash summary as a character vector of the same length as the input -#' @family helper functions -#' @noRd -#' @examples -#' vdigest_crc32(c("00a2c20d", "29b7d356")) -vdigest_crc32 <- function(object) { - vdigest_crc32 <- digest::getVDigest(algo = "crc32") - return(vdigest_crc32(object)) +check_duplicate_ids <- function(x1, x2) { + x <- x1 + result = list(deploymentID = FALSE, mediaID = FALSE, observationID = FALSE) + + # Merge resources + deployments(x) <- dplyr::bind_rows(deployments(x1), deployments(x2)) + media(x) <- dplyr::bind_rows(media(x1), media(x2)) + observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) + + # Get IDs + deploymentIDs <- purrr::pluck(deployments(x), "deploymentID") + mediaIDs <- purrr::pluck(media(x), "mediaID") + observationIDs <- purrr::pluck(observations(x), "observationID") + + # Check for duplicates + if (any(duplicated(deploymentIDs))) {result$deploymentID <- TRUE} + if (any(duplicated(mediaIDs))) {result$mediaID <- TRUE} + if (any(duplicated(observationIDs))) {result$observationID <- TRUE} + + return(result) } #' Replace duplicated IDs when merging Camera Trap Data packages From 7c3f9c692c26a29bedf8425d8a1e30269394e073 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 27 Sep 2024 10:26:36 +0200 Subject: [PATCH 047/142] remove `replace_ ()` helper functions --- R/utils.R | 198 ------------------------------------------------------ 1 file changed, 198 deletions(-) diff --git a/R/utils.R b/R/utils.R index a96bdb09..e12fd392 100644 --- a/R/utils.R +++ b/R/utils.R @@ -37,204 +37,6 @@ expand_cols <- function(df, colnames) { return(df) } -#' Replace deploymentIDs -#' -#' Replaces deploymentIDs in deployments, media and observations.. -#' -#' @inheritParams print.camtrapdp -#' @param old_deploymentID deploymentIDs to be replaced. Either a single ID or -#' a vector of IDs. -#' @param new_deploymentID replacement deploymentIDs. Must be of the same -#' length as `old_deploymentID` -#' @return `x` with replaced deploymentIDs -#' @family helper functions -#' @noRd -#' @examples -#' x <- example_dataset() -#' x_replaced <- -#' replace_deploymentID( -#' x, c("00a2c20d", "29b7d356"), c("new_deploymentID1", "new_deploymentID2") -#' # Inspect results -#' deployments(x_replaced)$deploymentID -replace_deploymentID <- function(x, old_deploymentID, new_deploymentID) { - - # # Check length - # length_old <- length(old_deploymentID) - # length_new <- length(new_deploymentID) - # if (length_old != length_new) { - # cli::cli_abort( - # c( - # "{.arg old_deploymentID} and {.arg new_deploymentID} must have the same - # length.", - # "x" = "Length of {.arg old_deploymentID}({.val {length_old}}) is not - # equal to length of {.arg new_deploymentID}({.val {length_new}})." - # ), - # class = "camtrapdp_error_length_deploymentID" - # ) - # } - - # Create a named vector for replacements - replacement_map <- setNames(new_deploymentID, old_deploymentID) - - # replace deploymentIDs in deployments - deployments(x) <- - deployments(x) %>% - dplyr::mutate( - deploymentID = - dplyr::if_else( - .data$deploymentID %in% old_deploymentID, - unname(replacement_map[.data$deploymentID]), - .data$deploymentID - ) - ) - - # replace deploymentIDs in observations - observations(x) <- - observations(x) %>% - dplyr::mutate( - deploymentID = - dplyr::if_else( - .data$deploymentID %in% old_deploymentID, - unname(replacement_map[.data$deploymentID]), - .data$deploymentID - ) - ) - - # replace deploymentIDs in media - media(x) <- - media(x) %>% - dplyr::mutate( - deploymentID = - dplyr::if_else( - .data$deploymentID %in% old_deploymentID, - unname(replacement_map[.data$deploymentID]), - .data$deploymentID - ) - ) - - return(x) -} - -#' Replace mediaIDs -#' -#' Replaces mediaIDs in media and observations -#' -#' @inheritParams print.camtrapdp -#' @param old_mediaID mediaIDs to be replaced. Either a single ID or a vector -#' of IDs. -#' @param new_mediaID replacement mediaIDs. Must be of the same -#' length as `old_mediaID` -#' @return `x` with replaced mediaIDs -#' @family helper functions -#' @noRd -#' @examples -#' replace_mediaID( -#' example_dataset(), -#' c("07840dcc", "401386c7"), -#' c("new_mediaID1", "new_mediaID2")) -replace_mediaID <- function(x, old_mediaID, new_mediaID) { - - # # Check length - # length_old <- length(old_mediaID) - # length_new <- length(new_mediaID) - # if (length_old != length_new) { - # cli::cli_abort( - # c( - # "{.arg old_mediaID} and {.arg new_mediaID} must have the same length.", - # "x" = "Length of {.arg old_mediaID}({.val {length_old}}) is not equal to - # length of {.arg new_mediaID}({.val {length_new}})." - # ), - # class = "camtrapdp_error_length_mediaID" - # ) - # } - - # Create a named vector for replacements - replacement_map <- new_mediaID - names(replacement_map) <- old_mediaID - - # replace mediaIDs in media - media(x) <- - media(x) %>% - dplyr::mutate( - mediaID = - dplyr::if_else( - .data$mediaID %in% old_mediaID, - unname(replacement_map[.data$mediaID]), - .data$mediaID - ) - ) - - # replace mediaIDs in observations - observations(x) <- - observations(x) %>% - dplyr::mutate( - mediaID = - dplyr::if_else( - .data$mediaID %in% old_mediaID, - unname(replacement_map[.data$mediaID]), - .data$mediaID - ) - ) - - return(x) -} - -#' Replace observationIDs -#' -#' Replaces observationIDs in observations -#' -#' @inheritParams print.camtrapdp -#' @param old_observationID observationIDs to be replaced. Either a single ID -#' or a vector of IDs. -#' @param new_observationID replacement observationIDs. Must be of the same -#' length as `old_observationID`. -#' @return `x` with replaced observationIDs. -#' @family helper functions -#' @noRd -#' @examples -#' x <- example_dataset() %>% filter_observations( -#' observationID %in% c("705e6036", "07840dcc_1", "401386c7_1") -#' ) -#' x_replaced <- replace_observationID( -#' x, c("705e6036", "07840dcc_1"), c("newID1", "newID2") -#' ) -#' # Inspect values -#' observations(x_replaced)$observationID -replace_observationID <- function(x, old_observationID, new_observationID) { - - # # Check length - # length_old <- length(old_observationID) - # length_new <- length(new_observationID) - # if (length_old != length_new) { - # cli::cli_abort( - # c( - # "{.arg old_observationID} and {.arg new_observationID} must have the - # same length.", - # "x" = "Length of {.arg old_observationID}({.val {length_old}}) is not - # equal to length of {.arg new_observationID}({.val {length_new}})." - # ), - # class = "camtrapdp_error_length_observationID" - # ) - # } - - # Create a named vector for replacements - replacement_map <- setNames(new_observationID, old_observationID) - - # replace observationIDs in observations - observations(x) <- - observations(x) %>% - dplyr::mutate( - observationID = - dplyr::if_else( - .data$observationID %in% old_observationID, - unname(replacement_map[.data$observationID]), - .data$observationID - ) - ) - - return(x) -} - check_duplicate_ids <- function(x1, x2) { x <- x1 result = list(deploymentID = FALSE, mediaID = FALSE, observationID = FALSE) From 3f145eada0685983ee8a7f92521eebbb4bc81db9 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 27 Sep 2024 10:32:23 +0200 Subject: [PATCH 048/142] add documentation --- R/utils.R | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/R/utils.R b/R/utils.R index e12fd392..5f88d496 100644 --- a/R/utils.R +++ b/R/utils.R @@ -37,6 +37,16 @@ expand_cols <- function(df, colnames) { return(df) } +#' Check for duplicated IDs +#' +#' Checks for duplicated IDs in two Camera Trap Data Package objects combined. +#' +#' @param x1,x2 Camera Trap Data Package objects (as returned by +#' `read_camtrapdp()`), to be coerced to one. +#' @return List with logical for each type of ID, that indicates whether that +#' ID type has duplicates. +#' @family helper functions +#' @noRd check_duplicate_ids <- function(x1, x2) { x <- x1 result = list(deploymentID = FALSE, mediaID = FALSE, observationID = FALSE) From 5e52a7bc754cbefb64f00f812026138c72858845 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 27 Sep 2024 10:43:06 +0200 Subject: [PATCH 049/142] new helper function 'add_suffx()` --- R/utils.R | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/R/utils.R b/R/utils.R index 5f88d496..9527ca46 100644 --- a/R/utils.R +++ b/R/utils.R @@ -69,6 +69,64 @@ check_duplicate_ids <- function(x1, x2) { return(result) } +#' Add suffix to identifiers with duplicates +#' +#' Adds suffix to all values of each identifier that has duplicates. +#' +#' @inheritParams print.camtrapdp +#' @param suffix The suffix to add to the IDs. +#' @param results_duplicate_ids Output generated with `check_duplicate_ids()`. +#' List with logical for each type of ID, that indicates whether that ID type +#' has duplicates. +#' @return `x` +#' @family helper functions +#' @noRd +#' @examples +#' results_duplicate_ids <- list( +#' deploymentID = TRUE, mediaID = TRUE, observationID = TRUE +#' ) +#' x <- add_suffix(example_dataset(), results_duplicate_ids, suffix = ".x") +add_suffix <- function(x, results_duplicate_ids, suffix) { + + if (results_duplicate_ids$deploymentID) { + # Add suffix to deploymentIDs in deployments + deployments(x) <- + deployments(x) %>% + dplyr::mutate(deploymentID = paste0(.data$deploymentID, suffix)) + + # Add suffix to deploymentIDs in observations + observations(x) <- + observations(x) %>% + dplyr::mutate(deploymentID = paste0(.data$deploymentID, suffix)) + + # Add suffix to deploymentIDs in media + media(x) <- + media(x) %>% + dplyr::mutate(deploymentID = paste0(.data$deploymentID, suffix)) + } + + if (results_duplicate_ids$mediaID) { + # Add suffix to mediaIDs in media + media(x) <- + media(x) %>% + dplyr::mutate(mediaID = paste0(.data$mediaID, suffix)) + + # Add suffix to mediaIDs in observations + observations(x) <- + observations(x) %>% + dplyr::mutate(mediaID = paste0(.data$mediaID, suffix)) + } + + if (results_duplicate_ids$observationID) { + # Add suffix to observationIDs in observations + observations(x) <- + observations(x) %>% + dplyr::mutate(observationID = paste0(.data$observationID, suffix)) + } + + return(x) +} + #' Replace duplicated IDs when merging Camera Trap Data packages #' #' Replaces duplicated deploymentIDs, mediaIDs and observationIDs between From 4f1146d2f2f2c8af07584aa3c13d2def3611a4a4 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 27 Sep 2024 10:50:37 +0200 Subject: [PATCH 050/142] remove helper function `replace_duplicatedIDs()` --- R/merge_camtrapdp.R | 7 --- R/utils.R | 109 -------------------------------------------- 2 files changed, 116 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 7401ad11..9a869f52 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -20,13 +20,6 @@ merge_camtrapdp <- function(x1, x2) { # Add suffix to identifiers with duplicates - - ########## - - # Replace duplicated IDs between `x1` and `x2` in `x2` with hashes - x2 <- replace_duplicatedIDs(x1, x2) - ############### - # Merge resources x <- x1 deployments(x) <- dplyr::bind_rows(deployments(x1), deployments(x2)) diff --git a/R/utils.R b/R/utils.R index 9527ca46..7ec9658e 100644 --- a/R/utils.R +++ b/R/utils.R @@ -127,115 +127,6 @@ add_suffix <- function(x, results_duplicate_ids, suffix) { return(x) } -#' Replace duplicated IDs when merging Camera Trap Data packages -#' -#' Replaces duplicated deploymentIDs, mediaIDs and observationIDs between -#' two Camera Trap Data Packages with hashes generated by `vdigest_crc32`. -#' Used in `merge_camtrapdp()`. -#' -#' -#' @param x1,x2 Camera Trap Data Package objects, as returned by -#' `read_camtrapdp()`). -#' @return `x2` with duplicated IDs (compared to `x1`) replaced with hashes. -#' @family helper functions -#' @noRd -replace_duplicatedIDs <- function(x1, x2) { - x <- x1 - - # merge resources - deployments(x) <- dplyr::bind_rows(deployments(x1), deployments(x2)) - media(x) <- dplyr::bind_rows(media(x1), media(x2)) - observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) - - # get IDs - deploymentIDs <- purrr::pluck(deployments(x), "deploymentID") - mediaIDs <- purrr::pluck(media(x), "mediaID") - observationIDs <- purrr::pluck(observations(x), "observationID") - - if (any(duplicated(deploymentIDs))) { - # replace duplicated deploymentIDs in `x2` - duplicated_deploymentID <- deploymentIDs[duplicated(deploymentIDs)] - replacement_deploymentID <- vdigest_crc32(duplicated_deploymentID) - x2 <- replace_deploymentID( - x2, duplicated_deploymentID, replacement_deploymentID - ) - - # new merge with unique deploymentIDs - deployments(x) <- dplyr::bind_rows(deployments(x1), deployments(x2)) - media(x) <- dplyr::bind_rows(media(x1), media(x2)) - observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) - - # inform user - new_deploymentIDs <- vdigest_crc32(duplicated_deploymentID) - cli::cli_alert_warning( - c( - paste( - "{.arg x1} and {.arg x2} must have unique deploymentIDs.\n", - "{.arg x1} and {.arg x2} have duplicated deploymentIDs:", - "{.val {duplicated_deploymentID}}.\n", - "Duplicated deploymentIDs of {.arg x2} are now replaced by", - "{.val {replacement_deploymentID}} respectively." - ) - ), - class = "camtrapdp_warning_unique_deploymentID" - ) - } - - if (any(duplicated(mediaIDs))) { - # replace duplicated mediaIDs in `x2` - duplicated_mediaID <- mediaIDs[duplicated(mediaIDs)] - replacement_mediaID <- vdigest_crc32(duplicated_mediaID) - x2 <- replace_mediaID(x2, duplicated_mediaID, replacement_mediaID) - - # new merge with unique mediaIDs - media(x) <- dplyr::bind_rows(media(x1), media(x2)) - observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) - - # inform user - new_mediaIDs <- vdigest_crc32(duplicated_mediaID) - cli::cli_alert_warning( - c( - paste( - "{.arg x1} and {.arg x2} must have unique mediaIDs.\n", - "{.arg x1} and {.arg x2} have duplicated mediaIDs:", - "{.val {duplicated_mediaID}}.\n", - "Duplicated mediaIDs of {.arg x2} are now replaced by", - "{.val {replacement_mediaID}} respectively." - ) - ), - class = "camtrapdp_warning_unique_mediaID" - ) - } - - if (any(duplicated(observationIDs))) { - # replace duplicated deploymentIDs in `x2` - duplicated_observationID <- observationIDs[duplicated(observationIDs)] - replacement_observationID <- vdigest_crc32(duplicated_observationID) - x2 <- replace_observationID( - x2, duplicated_observationID, replacement_observationID - ) - - # new merge with unique observationIDs - observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) - # inform user - - cli::cli_alert_warning( - c( - paste( - "{.arg x1} and {.arg x2} must have unique observationIDs.\n", - "{.arg x1} and {.arg x2} have duplicated observationIDs:", - "{.val {duplicated_observationID}}.\n", - "Duplicated observationIDs of {.arg x2} are now replaced by", - "{.val {replacement_observationID}} respectively." - ) - ), - class = "camtrapdp_warning_unique_observationID" - ) - } - - return(x2) -} - #' Normalize list elements #' #' Converts each list element to a named vector with consistent handling of From af0876e069a840e0aa0552479bffad82d965890e Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 27 Sep 2024 10:51:17 +0200 Subject: [PATCH 051/142] use `add_suffx()` --- R/merge_camtrapdp.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 9a869f52..8117eef1 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -19,6 +19,10 @@ merge_camtrapdp <- function(x1, x2) { results_duplicate_ids <- check_duplicate_ids(x1, x2) # Add suffix to identifiers with duplicates + if (any(results_duplicate_ids)) { + x1 <- add_suffix(x1, results_duplicate_ids, suffix[1]) + x2 <- add_suffix(x2, results_duplicate_ids, suffix[2]) + } # Merge resources x <- x1 From 4f2ea1e9f1a7cfcfafc4f144084c543b78e78061 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 27 Sep 2024 10:53:56 +0200 Subject: [PATCH 052/142] add param suffix --- R/merge_camtrapdp.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 8117eef1..28ebd32e 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -2,6 +2,9 @@ #' #' @param x1,x2 Camera Trap Data Package objects (as returned by #' `read_camtrapdp()`), to be coerced to one. +#' @param suffix If there are duplicate IDs in x1 an x2, these suffixes will be +#' added to all the values of each identifier with duplicates, to disambiguate +#' them. Should be a character vector of length 2. #' @return `x` #' @family transformation functions #' @export @@ -11,7 +14,7 @@ #' x2 <- example_dataset() %>% #' filter_deployments(deploymentID %in% c("577b543a", "62c200a9")) #' x_merged <- merge_camtrapdp(x1, x2) -merge_camtrapdp <- function(x1, x2) { +merge_camtrapdp <- function(x1, x2, suffix = c(".x", ".y")) { check_camtrapdp(x1) check_camtrapdp(x2) From d198a683ed7ecd7164a7c661d8eafd719637d418 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 27 Sep 2024 11:09:49 +0200 Subject: [PATCH 053/142] keep NAs in mediaID when adding suffix --- R/utils.R | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/R/utils.R b/R/utils.R index 7ec9658e..0351aa58 100644 --- a/R/utils.R +++ b/R/utils.R @@ -109,12 +109,20 @@ add_suffix <- function(x, results_duplicate_ids, suffix) { # Add suffix to mediaIDs in media media(x) <- media(x) %>% - dplyr::mutate(mediaID = paste0(.data$mediaID, suffix)) + dplyr::mutate( + mediaID = ifelse( + !is.na(.data$mediaID), paste0(.data$mediaID, suffix), NA + ) + ) # Add suffix to mediaIDs in observations observations(x) <- observations(x) %>% - dplyr::mutate(mediaID = paste0(.data$mediaID, suffix)) + dplyr::mutate( + mediaID = ifelse( + !is.na(.data$mediaID), paste0(.data$mediaID, suffix), NA + ) + ) } if (results_duplicate_ids$observationID) { From 6cf7e0e807c9a3b6a75b1948372dcb368399f249 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 27 Sep 2024 11:18:16 +0200 Subject: [PATCH 054/142] do not merge objects in helper function --- R/utils.R | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/R/utils.R b/R/utils.R index 0351aa58..63c2540f 100644 --- a/R/utils.R +++ b/R/utils.R @@ -48,18 +48,20 @@ expand_cols <- function(df, colnames) { #' @family helper functions #' @noRd check_duplicate_ids <- function(x1, x2) { - x <- x1 result = list(deploymentID = FALSE, mediaID = FALSE, observationID = FALSE) - # Merge resources - deployments(x) <- dplyr::bind_rows(deployments(x1), deployments(x2)) - media(x) <- dplyr::bind_rows(media(x1), media(x2)) - observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) - - # Get IDs - deploymentIDs <- purrr::pluck(deployments(x), "deploymentID") - mediaIDs <- purrr::pluck(media(x), "mediaID") - observationIDs <- purrr::pluck(observations(x), "observationID") + deploymentIDs <- c( + unique(purrr::pluck(deployments(x1), "deploymentID")), + unique(purrr::pluck(deployments(x2), "deploymentID")) + ) + mediaIDs <- c( + unique(purrr::pluck(media(x1), "mediaID")), + unique(purrr::pluck(media(x2), "mediaID")) + ) + observationIDs <- c( + unique(purrr::pluck(observations(x1), "observationID")), + unique(purrr::pluck(observations(x2), "observationID")) + ) # Check for duplicates if (any(duplicated(deploymentIDs))) {result$deploymentID <- TRUE} From 83280655422deaba41f5f29d2fc001562d6362a1 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 27 Sep 2024 11:35:32 +0200 Subject: [PATCH 055/142] also add suffix to eventIDs and individualDs --- R/utils.R | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 52 insertions(+), 5 deletions(-) diff --git a/R/utils.R b/R/utils.R index 63c2540f..82e0a59b 100644 --- a/R/utils.R +++ b/R/utils.R @@ -44,11 +44,13 @@ expand_cols <- function(df, colnames) { #' @param x1,x2 Camera Trap Data Package objects (as returned by #' `read_camtrapdp()`), to be coerced to one. #' @return List with logical for each type of ID, that indicates whether that -#' ID type has duplicates. +#' ID type has duplicates between x1 and x2. #' @family helper functions #' @noRd check_duplicate_ids <- function(x1, x2) { - result = list(deploymentID = FALSE, mediaID = FALSE, observationID = FALSE) + result = list( + deploymentID = FALSE, mediaID = FALSE, observationID = FALSE, + eventID = FALSE, individualID = FALSE) deploymentIDs <- c( unique(purrr::pluck(deployments(x1), "deploymentID")), @@ -62,11 +64,21 @@ check_duplicate_ids <- function(x1, x2) { unique(purrr::pluck(observations(x1), "observationID")), unique(purrr::pluck(observations(x2), "observationID")) ) + eventIDs <- c( + unique(purrr::pluck(media(x1), "eventID")), + unique(purrr::pluck(media(x2), "eventID")) + ) + individualIDs <- c( + unique(purrr::pluck(observations(x1), "individualID")), + unique(purrr::pluck(observations(x2), "individualID")) + ) # Check for duplicates if (any(duplicated(deploymentIDs))) {result$deploymentID <- TRUE} if (any(duplicated(mediaIDs))) {result$mediaID <- TRUE} if (any(duplicated(observationIDs))) {result$observationID <- TRUE} + if (any(duplicated(eventIDs))) {result$eventID <- TRUE} + if (any(duplicated(individualIDs))) {result$individualID <- TRUE} return(result) } @@ -84,12 +96,12 @@ check_duplicate_ids <- function(x1, x2) { #' @family helper functions #' @noRd #' @examples -#' results_duplicate_ids <- list( -#' deploymentID = TRUE, mediaID = TRUE, observationID = TRUE -#' ) +#' results_duplicate_ids <- list(deploymentID = TRUE, mediaID = TRUE, +#' observationID = TRUE, eventID = TRUE, individualID = TRUE) #' x <- add_suffix(example_dataset(), results_duplicate_ids, suffix = ".x") add_suffix <- function(x, results_duplicate_ids, suffix) { + # deploymentID if (results_duplicate_ids$deploymentID) { # Add suffix to deploymentIDs in deployments deployments(x) <- @@ -107,6 +119,7 @@ add_suffix <- function(x, results_duplicate_ids, suffix) { dplyr::mutate(deploymentID = paste0(.data$deploymentID, suffix)) } + # mediaID if (results_duplicate_ids$mediaID) { # Add suffix to mediaIDs in media media(x) <- @@ -127,6 +140,7 @@ add_suffix <- function(x, results_duplicate_ids, suffix) { ) } + # observationID if (results_duplicate_ids$observationID) { # Add suffix to observationIDs in observations observations(x) <- @@ -134,6 +148,39 @@ add_suffix <- function(x, results_duplicate_ids, suffix) { dplyr::mutate(observationID = paste0(.data$observationID, suffix)) } + # eventID + if (results_duplicate_ids$eventID) { + # Add suffix to eventIDs in media + media(x) <- + media(x) %>% + dplyr::mutate( + eventID = ifelse( + !is.na(.data$eventID), paste0(.data$eventID, suffix), NA + ) + ) + + # Add suffix to eventIDs in observations + observations(x) <- + observations(x) %>% + dplyr::mutate( + eventID = ifelse( + !is.na(.data$eventID), paste0(.data$eventID, suffix), NA + ) + ) + } + + # individualID + if (results_duplicate_ids$individualID) { + # Add suffix to individualIDs in observations + observations(x) <- + observations(x) %>% + dplyr::mutate( + individualID = ifelse( + !is.na(.data$individualID), paste0(.data$individualID, suffix), NA + ) + ) + } + return(x) } From 7ac0413e9514def6516e72b5ddf73e9b19d5994c Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 27 Sep 2024 11:56:17 +0200 Subject: [PATCH 056/142] add warnings --- R/merge_camtrapdp.R | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 28ebd32e..0b40e72a 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -23,6 +23,26 @@ merge_camtrapdp <- function(x1, x2, suffix = c(".x", ".y")) { # Add suffix to identifiers with duplicates if (any(results_duplicate_ids)) { + + if (!is.character(suffix) || length(suffix) != 2) { + cli::cli_alert_warning( + c( + paste( + "{.arg suffix} must be a character vector of length 2, not", + "a {class(suffix)} object of length {length(suffix)}." + ) + ), + class = "camtrapdp_warning_suffix_invalid" + ) + } + + if (any(is.na(suffix))) { + cli::cli_alert_warning( + "{.arg suffix} can't be 'NA'.", + class = "camtrapdp_warning_suffix_NA" + ) + } + x1 <- add_suffix(x1, results_duplicate_ids, suffix[1]) x2 <- add_suffix(x2, results_duplicate_ids, suffix[2]) } From 42bdbdecc4aba8724685d76be724cdc6bf79152d Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 27 Sep 2024 12:14:28 +0200 Subject: [PATCH 057/142] avoid warning message of `any()` --- R/merge_camtrapdp.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 0b40e72a..76a6acab 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -22,7 +22,7 @@ merge_camtrapdp <- function(x1, x2, suffix = c(".x", ".y")) { results_duplicate_ids <- check_duplicate_ids(x1, x2) # Add suffix to identifiers with duplicates - if (any(results_duplicate_ids)) { + if (TRUE %in% results_duplicate_ids) { if (!is.character(suffix) || length(suffix) != 2) { cli::cli_alert_warning( From db1266cdecc24fb5561b96b6d33bfefde0475843 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 27 Sep 2024 12:23:57 +0200 Subject: [PATCH 058/142] update tests --- tests/testthat/test-merge_camtrapdp.R | 89 +++++---------------------- 1 file changed, 15 insertions(+), 74 deletions(-) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index 35860ca8..6b9e3b66 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -13,86 +13,27 @@ test_that("merge_camtrapdp() returns a valid camtrapdp object", { ) }) -test_that("merge_camtrapdp() returns unique deplpymentIDs, mediaIDs and - observationIDs", { +test_that("merge_camtrapdp() returns no duplicated deploymentIDs, mediaIDs + and observationIDs", { skip_if_offline() - duplicated_deploymentID <- "00a2c20d" - duplicated_mediaID <- "ca3ff293" - x1 <- example_dataset() %>% - filter_deployments(deploymentID %in% c(duplicated_deploymentID, "29b7d356")) - x2 <- example_dataset() %>% - filter_deployments( - deploymentID %in% c(duplicated_deploymentID, "62c200a9") - ) %>% - filter_media(mediaID %in% c(duplicated_mediaID, "bf610120")) - - # get original IDs - original_deploymentIDs <- c( - purrr::pluck(deployments(x1), "deploymentID"), - purrr::pluck(deployments(x2), "deploymentID") - ) - original_mediaIDs <- c( - purrr::pluck(media(x1), "mediaID"), - purrr::pluck(media(x2), "mediaID") - ) - original_observationIDs <- c( - purrr::pluck(observations(x1), "observationID"), - purrr::pluck(observations(x2), "observationID") - ) - duplicated_observationID <- - original_observationIDs[duplicated(original_observationIDs)] + x1 <- example_dataset() - # merge - x_merged <- suppressMessages( - merge_camtrapdp(x1, x2) - ) + # Merge + x_merged <- merge_camtrapdp(x1, x1) - # get new IDs - new_deploymentIDs <- purrr::pluck(deployments(x_merged), "deploymentID") - new_mediaIDs <- purrr::pluck(media(x_merged), "mediaID") - new_observationIDs <- purrr::pluck(observations(x_merged), "observationID") + # Check for duplicates + deploymentIDs <- purrr::pluck(deployments(x1), "deploymentID") + mediaIDs <- purrr::pluck(media(x1), "mediaID") + observationIDs <- purrr::pluck(observations(x1), "observationID") - # tests - expect_true(any(duplicated(original_deploymentIDs))) - expect_false(any(duplicated(new_deploymentIDs))) - expect_true( - vdigest_crc32(duplicated_deploymentID) %in% new_deploymentIDs - ) - expect_identical( - c(duplicated_deploymentID, "29b7d356", "77b0e58b", "62c200a9"), - new_deploymentIDs - ) + # Tests + expect_false(any(duplicated(deploymentIDs))) + expect_false(any(duplicated(mediaIDs))) + expect_false(any(duplicated(observationIDs))) +}) - expect_true(any(duplicated(original_mediaIDs))) - expect_false(any(duplicated(new_mediaIDs))) - expect_true(vdigest_crc32(duplicated_mediaID) %in% new_mediaIDs) +test_that("merge_camtrapdp() adds suffix to duplicated IDs but not if mediaID = NA", { - expect_true(any(duplicated(original_observationIDs))) - expect_false(any(duplicated(new_observationIDs))) - expect_true( - all(vdigest_crc32(duplicated_observationID) %in% new_observationIDs) - ) }) -test_that("merge_camtrapdp() returns message when IDs are replaced", { - skip_if_offline() - x1 <- example_dataset() - x2 <- example_dataset() %>% - filter_deployments(deploymentID %in% c("62c200a9")) %>% - filter_media(mediaID %in% c("fb58a2b9", "0bb2566e", "a6a7a04c")) - expect_message( - merge_camtrapdp(x1, x2) #, - # regexp = paste( - # "! `x1` and `x2` must have unique deploymentIDs.", - # "`x1` and `x2` have duplicated deploymentIDs: \"62c200a9\".", - # "Duplicated deploymentIDs of `x2` are now replaced by \"07ce6950\" respectively.", - # "! `x1` and `x2` must have unique mediaIDs.", - # "`x1` and `x2` have duplicated mediaIDs: \"fb58a2b9\", \"0bb2566e\", and \"a6a7a04c\".", - # "Duplicated mediaIDs of `x2` are now replaced by \"ba426f00\", \"8d5c0009\", and \"1689e0db\" respectively.", - # "! `x1` and `x2` must have unique observationIDs.", - # "`x1` and `x2` have duplicated observationIDs: \"a0431321\", \"fb58a2b9_1\", \"0bb2566e_1\", and \"a6a7a04c_1\".", - # "Duplicated observationIDs of `x2` are now replaced by \"c6eeccc0\", \"a8452c14\", \"a48adc8a\", and \"b78e02ba\" respectively." - # ) - ) -}) From 57c0bb33a2ae2d032953844e94746a4cae39eaed Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 27 Sep 2024 13:46:21 +0200 Subject: [PATCH 059/142] individualIDs are allowed to be duplicated between data packages --- R/utils.R | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/R/utils.R b/R/utils.R index 82e0a59b..bdb27378 100644 --- a/R/utils.R +++ b/R/utils.R @@ -50,7 +50,7 @@ expand_cols <- function(df, colnames) { check_duplicate_ids <- function(x1, x2) { result = list( deploymentID = FALSE, mediaID = FALSE, observationID = FALSE, - eventID = FALSE, individualID = FALSE) + eventID = FALSE) deploymentIDs <- c( unique(purrr::pluck(deployments(x1), "deploymentID")), @@ -68,17 +68,12 @@ check_duplicate_ids <- function(x1, x2) { unique(purrr::pluck(media(x1), "eventID")), unique(purrr::pluck(media(x2), "eventID")) ) - individualIDs <- c( - unique(purrr::pluck(observations(x1), "individualID")), - unique(purrr::pluck(observations(x2), "individualID")) - ) # Check for duplicates if (any(duplicated(deploymentIDs))) {result$deploymentID <- TRUE} if (any(duplicated(mediaIDs))) {result$mediaID <- TRUE} if (any(duplicated(observationIDs))) {result$observationID <- TRUE} if (any(duplicated(eventIDs))) {result$eventID <- TRUE} - if (any(duplicated(individualIDs))) {result$individualID <- TRUE} return(result) } @@ -97,7 +92,7 @@ check_duplicate_ids <- function(x1, x2) { #' @noRd #' @examples #' results_duplicate_ids <- list(deploymentID = TRUE, mediaID = TRUE, -#' observationID = TRUE, eventID = TRUE, individualID = TRUE) +#' observationID = TRUE, eventID = TRUE) #' x <- add_suffix(example_dataset(), results_duplicate_ids, suffix = ".x") add_suffix <- function(x, results_duplicate_ids, suffix) { @@ -169,18 +164,6 @@ add_suffix <- function(x, results_duplicate_ids, suffix) { ) } - # individualID - if (results_duplicate_ids$individualID) { - # Add suffix to individualIDs in observations - observations(x) <- - observations(x) %>% - dplyr::mutate( - individualID = ifelse( - !is.na(.data$individualID), paste0(.data$individualID, suffix), NA - ) - ) - } - return(x) } From 08f668d5b2d42a3b1308802843e46020d7b80066 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 27 Sep 2024 13:56:09 +0200 Subject: [PATCH 060/142] replace suffix with prefix --- R/merge_camtrapdp.R | 24 +++++++-------- R/utils.R | 42 +++++++++++++-------------- tests/testthat/test-merge_camtrapdp.R | 2 +- 3 files changed, 34 insertions(+), 34 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 76a6acab..4183a1d0 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -2,7 +2,7 @@ #' #' @param x1,x2 Camera Trap Data Package objects (as returned by #' `read_camtrapdp()`), to be coerced to one. -#' @param suffix If there are duplicate IDs in x1 an x2, these suffixes will be +#' @param prefix If there are duplicate IDs in x1 an x2, these prefixes will be #' added to all the values of each identifier with duplicates, to disambiguate #' them. Should be a character vector of length 2. #' @return `x` @@ -14,37 +14,37 @@ #' x2 <- example_dataset() %>% #' filter_deployments(deploymentID %in% c("577b543a", "62c200a9")) #' x_merged <- merge_camtrapdp(x1, x2) -merge_camtrapdp <- function(x1, x2, suffix = c(".x", ".y")) { +merge_camtrapdp <- function(x1, x2, prefix = c("x.", "y.")) { check_camtrapdp(x1) check_camtrapdp(x2) # check if identifiers have duplicates results_duplicate_ids <- check_duplicate_ids(x1, x2) - # Add suffix to identifiers with duplicates + # Add prefix to identifiers with duplicates if (TRUE %in% results_duplicate_ids) { - if (!is.character(suffix) || length(suffix) != 2) { + if (!is.character(prefix) || length(prefix) != 2) { cli::cli_alert_warning( c( paste( - "{.arg suffix} must be a character vector of length 2, not", - "a {class(suffix)} object of length {length(suffix)}." + "{.arg prefix} must be a character vector of length 2, not", + "a {class(prefix)} object of length {length(prefix)}." ) ), - class = "camtrapdp_warning_suffix_invalid" + class = "camtrapdp_warning_prefix_invalid" ) } - if (any(is.na(suffix))) { + if (any(is.na(prefix))) { cli::cli_alert_warning( - "{.arg suffix} can't be 'NA'.", - class = "camtrapdp_warning_suffix_NA" + "{.arg prefix} can't be 'NA'.", + class = "camtrapdp_warning_prefix_NA" ) } - x1 <- add_suffix(x1, results_duplicate_ids, suffix[1]) - x2 <- add_suffix(x2, results_duplicate_ids, suffix[2]) + x1 <- add_prefix(x1, results_duplicate_ids, prefix[1]) + x2 <- add_prefix(x2, results_duplicate_ids, prefix[2]) } # Merge resources diff --git a/R/utils.R b/R/utils.R index bdb27378..25ac9781 100644 --- a/R/utils.R +++ b/R/utils.R @@ -78,12 +78,12 @@ check_duplicate_ids <- function(x1, x2) { return(result) } -#' Add suffix to identifiers with duplicates +#' Add prefix to identifiers with duplicates #' -#' Adds suffix to all values of each identifier that has duplicates. +#' Adds prefix to all values of each identifier that has duplicates. #' #' @inheritParams print.camtrapdp -#' @param suffix The suffix to add to the IDs. +#' @param prefix The prefix to add to the IDs. #' @param results_duplicate_ids Output generated with `check_duplicate_ids()`. #' List with logical for each type of ID, that indicates whether that ID type #' has duplicates. @@ -93,73 +93,73 @@ check_duplicate_ids <- function(x1, x2) { #' @examples #' results_duplicate_ids <- list(deploymentID = TRUE, mediaID = TRUE, #' observationID = TRUE, eventID = TRUE) -#' x <- add_suffix(example_dataset(), results_duplicate_ids, suffix = ".x") -add_suffix <- function(x, results_duplicate_ids, suffix) { +#' x <- add_prefix(example_dataset(), results_duplicate_ids, prefix = ".x") +add_prefix <- function(x, results_duplicate_ids) { # deploymentID if (results_duplicate_ids$deploymentID) { - # Add suffix to deploymentIDs in deployments + # Add prefix to deploymentIDs in deployments deployments(x) <- deployments(x) %>% - dplyr::mutate(deploymentID = paste0(.data$deploymentID, suffix)) + dplyr::mutate(deploymentID = paste0(prefix, .data$deploymentID)) - # Add suffix to deploymentIDs in observations + # Add prefix to deploymentIDs in observations observations(x) <- observations(x) %>% - dplyr::mutate(deploymentID = paste0(.data$deploymentID, suffix)) + dplyr::mutate(deploymentID = paste0(prefix, .data$deploymentID)) - # Add suffix to deploymentIDs in media + # Add prefix to deploymentIDs in media media(x) <- media(x) %>% - dplyr::mutate(deploymentID = paste0(.data$deploymentID, suffix)) + dplyr::mutate(deploymentID = paste0(prefix, .data$deploymentID)) } # mediaID if (results_duplicate_ids$mediaID) { - # Add suffix to mediaIDs in media + # Add prefix to mediaIDs in media media(x) <- media(x) %>% dplyr::mutate( mediaID = ifelse( - !is.na(.data$mediaID), paste0(.data$mediaID, suffix), NA + !is.na(.data$mediaID), paste0(prefix, .data$mediaID), NA ) ) - # Add suffix to mediaIDs in observations + # Add prefix to mediaIDs in observations observations(x) <- observations(x) %>% dplyr::mutate( mediaID = ifelse( - !is.na(.data$mediaID), paste0(.data$mediaID, suffix), NA + !is.na(.data$mediaID), paste0(prefix, .data$mediaID), NA ) ) } # observationID if (results_duplicate_ids$observationID) { - # Add suffix to observationIDs in observations + # Add prefix to observationIDs in observations observations(x) <- observations(x) %>% - dplyr::mutate(observationID = paste0(.data$observationID, suffix)) + dplyr::mutate(observationID = paste0(prefix, .data$observationID)) } # eventID if (results_duplicate_ids$eventID) { - # Add suffix to eventIDs in media + # Add prefix to eventIDs in media media(x) <- media(x) %>% dplyr::mutate( eventID = ifelse( - !is.na(.data$eventID), paste0(.data$eventID, suffix), NA + !is.na(.data$eventID), paste0(prefix, .data$eventID), NA ) ) - # Add suffix to eventIDs in observations + # Add prefix to eventIDs in observations observations(x) <- observations(x) %>% dplyr::mutate( eventID = ifelse( - !is.na(.data$eventID), paste0(.data$eventID, suffix), NA + !is.na(.data$eventID), paste0(prefix, .data$eventID), NA ) ) } diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index 6b9e3b66..cc397290 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -32,7 +32,7 @@ test_that("merge_camtrapdp() returns no duplicated deploymentIDs, mediaIDs expect_false(any(duplicated(observationIDs))) }) -test_that("merge_camtrapdp() adds suffix to duplicated IDs but not if mediaID = NA", { +test_that("merge_camtrapdp() adds prefix to duplicated IDs but not if mediaID = NA", { }) From f7ca5924293e6105b0b0a58d698188278cb71a86 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 27 Sep 2024 14:01:54 +0200 Subject: [PATCH 061/142] typo --- R/utils.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utils.R b/R/utils.R index 25ac9781..648dd6a6 100644 --- a/R/utils.R +++ b/R/utils.R @@ -94,7 +94,7 @@ check_duplicate_ids <- function(x1, x2) { #' results_duplicate_ids <- list(deploymentID = TRUE, mediaID = TRUE, #' observationID = TRUE, eventID = TRUE) #' x <- add_prefix(example_dataset(), results_duplicate_ids, prefix = ".x") -add_prefix <- function(x, results_duplicate_ids) { +add_prefix <- function(x, results_duplicate_ids, prefix) { # deploymentID if (results_duplicate_ids$deploymentID) { From ee90618dd518d15c02f6a3007cd09622ef794e57 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 27 Sep 2024 14:23:27 +0200 Subject: [PATCH 062/142] merge_camtrapdp() adds prefixes to all values of identifiers --- tests/testthat/test-merge_camtrapdp.R | 29 +++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index cc397290..a5c86807 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -13,27 +13,40 @@ test_that("merge_camtrapdp() returns a valid camtrapdp object", { ) }) -test_that("merge_camtrapdp() returns no duplicated deploymentIDs, mediaIDs - and observationIDs", { +test_that("merge_camtrapdp() returns unique deploymentIDs, mediaIDs and + observationIDs", { skip_if_offline() x1 <- example_dataset() - - # Merge x_merged <- merge_camtrapdp(x1, x1) - # Check for duplicates deploymentIDs <- purrr::pluck(deployments(x1), "deploymentID") mediaIDs <- purrr::pluck(media(x1), "mediaID") observationIDs <- purrr::pluck(observations(x1), "observationID") - # Tests expect_false(any(duplicated(deploymentIDs))) expect_false(any(duplicated(mediaIDs))) expect_false(any(duplicated(observationIDs))) }) -test_that("merge_camtrapdp() adds prefix to duplicated IDs but not if mediaID = NA", { +test_that("merge_camtrapdp() adds prefixes to all values of identifiers + (deploymentID, mediaID, observationID and eventID) with duplicates + between packages, but not for mediaID = NA", { + skip_if_offline() + x <- example_dataset() + x_merged <- merge_camtrapdp(x, x, prefix = c("project1-", "project2-")) -}) + expect_true("project1-00a2c20d" %in% deployments(x_merged)$deploymentID) + expect_true("project2-00a2c20d" %in% deployments(x_merged)$deploymentID) + expect_true("project1-00a2c20d" %in% media(x_merged)$deploymentID) + expect_true("project1-00a2c20d" %in% observations(x_merged)$deploymentID) + expect_true("project1-07840dcc" %in% media(x_merged)$mediaID) + expect_true("project1-07840dcc" %in% observations(x_merged)$mediaID) + expect_false("project1-NA" %in% observations(x_merged)$mediaID) + expect_true(NA %in% observations(x_merged)$mediaID) + expect_true("project1-705e6036" %in% observations(x_merged)$observationID) + + expect_true("project1-4bb69c45" %in% media(x_merged)$eventID) + expect_true("project1-4bb69c45" %in% observations(x_merged)$eventID) +}) From c0119f0d9b4bb43668a9b019f1b4ab0806ef6c95 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 27 Sep 2024 14:51:00 +0200 Subject: [PATCH 063/142] devtools::document() --- man/merge_camtrapdp.Rd | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/man/merge_camtrapdp.Rd b/man/merge_camtrapdp.Rd index db19ef23..537a9392 100644 --- a/man/merge_camtrapdp.Rd +++ b/man/merge_camtrapdp.Rd @@ -4,11 +4,15 @@ \alias{merge_camtrapdp} \title{Merge Camera Trap Data packages} \usage{ -merge_camtrapdp(x1, x2) +merge_camtrapdp(x1, x2, prefix = c("x.", "y.")) } \arguments{ \item{x1, x2}{Camera Trap Data Package objects (as returned by \code{read_camtrapdp()}), to be coerced to one.} + +\item{prefix}{If there are duplicate IDs in x1 an x2, these prefixes will be +added to all the values of each identifier with duplicates, to disambiguate +them. Should be a character vector of length 2.} } \value{ \code{x} From af85ae612e7aae4ff1e19babe0fa104ddabfe963 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 27 Sep 2024 14:51:20 +0200 Subject: [PATCH 064/142] test on warning invalid prefix --- tests/testthat/test-merge_camtrapdp.R | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index a5c86807..37fbda24 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -1,3 +1,21 @@ +test_that("merge_camtrapdp() warns on invalid prefix", { + skip_if_offline() + x <- example_dataset() + expect_error( + merge_camtrapdp(x, x, prefix = c(1, 2)), + class = "camtrapdp_warning_prefix_invalid" + ) + expect_error( + merge_camtrapdp(x, x, prefix = c("one", "two", "three")), + class = "camtrapdp_warning_prefix_invalid" + ) + expect_error( + merge_camtrapdp(x, x, prefix = c("one-", NA)), + class = "camtrapdp_warning_prefix_NA" + ) + expect_no_error(merge_camtrapdp(x, x, prefix = c("this_", "works_"))) +}) + test_that("merge_camtrapdp() returns a valid camtrapdp object", { skip_if_offline() x1 <- example_dataset() %>% From 4509b4715874922f03429efd086d74f637cc70b7 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 27 Sep 2024 14:58:11 +0200 Subject: [PATCH 065/142] Update merge_camtrapdp.R --- R/merge_camtrapdp.R | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 4183a1d0..44cf035d 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -55,14 +55,11 @@ merge_camtrapdp <- function(x1, x2, prefix = c("x.", "y.")) { # Merge/update metadata x$name <- NA - # Create new ID x$id <- NA x$created <- format(Sys.time(), "%Y-%m-%dT%H:%M:%SZ") x$title <- NA x$contributors <- remove_duplicates(c(x1$contributors, x2$contributors)) - paragraph <- paste0( - "This dataset is a combination of 2 datasets: ", x1$title, "and", x2$title, - ".") + x$description <- paste(x1$description, x2$description, sep = "/n") x$version <- "1.0" x$keywords <- unique(x1$keywords, x2$keywords) x$image <- NULL @@ -74,6 +71,7 @@ merge_camtrapdp <- function(x1, x2, prefix = c("x.", "y.")) { x$bibliographicCitation <- NULL x$coordinatePrecision <- max(x1$coordinatePrecision, x2$coordinatePrecision, na.rm = TRUE) + if (!is.null(x1$id)) { relatedIdentifiers_x1 <- list( relationType = "IsDerivedFrom", From 8f759b03844ac0562f3c406b88dece5c5ededc6c Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 27 Sep 2024 15:03:31 +0200 Subject: [PATCH 066/142] raise error, not warning --- R/merge_camtrapdp.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 44cf035d..fe95c7a3 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -25,7 +25,7 @@ merge_camtrapdp <- function(x1, x2, prefix = c("x.", "y.")) { if (TRUE %in% results_duplicate_ids) { if (!is.character(prefix) || length(prefix) != 2) { - cli::cli_alert_warning( + cli::cli_abort( c( paste( "{.arg prefix} must be a character vector of length 2, not", @@ -37,7 +37,7 @@ merge_camtrapdp <- function(x1, x2, prefix = c("x.", "y.")) { } if (any(is.na(prefix))) { - cli::cli_alert_warning( + cli::cli_abort( "{.arg prefix} can't be 'NA'.", class = "camtrapdp_warning_prefix_NA" ) From fecb8a446cc06ae416914167f93d76f498a54f71 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 27 Sep 2024 16:30:19 +0200 Subject: [PATCH 067/142] merge_camtrapdp() returns error on duplicate Data Package id --- R/merge_camtrapdp.R | 13 +++++++++++++ tests/testthat/test-merge_camtrapdp.R | 9 +++++++++ 2 files changed, 22 insertions(+) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index fe95c7a3..a3cb0fdc 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -18,6 +18,19 @@ merge_camtrapdp <- function(x1, x2, prefix = c("x.", "y.")) { check_camtrapdp(x1) check_camtrapdp(x2) + if (x1$id == x2$id) { + cli::cli_abort( + c( + paste0( + "{.arg x1} and {.arg x2} should be different Camera Trap Data", + "Package objects with unique identifiers." + ), + x = "{.arg x1} and {.arg x2} have the same id: {.value x1$id}" + ), + class = "camtrapdp_error_camtrapdpid_duplicated" + ) + } + # check if identifiers have duplicates results_duplicate_ids <- check_duplicate_ids(x1, x2) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index 37fbda24..770ff8cb 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -1,3 +1,12 @@ +test_that("merge_camtrapdp() returns error on duplicate Data Package id", { + skip_if_offline() + x <- example_dataset() + expect_error( + merge_camtrapdp(x, x), + class = "camtrapdp_error_camtrapdpid_duplicated" + ) +}) + test_that("merge_camtrapdp() warns on invalid prefix", { skip_if_offline() x <- example_dataset() From 99e91aded196fbc39c9276c9f4c8d7170fb9b743 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 27 Sep 2024 16:31:21 +0200 Subject: [PATCH 068/142] correction: not a warning but error --- R/merge_camtrapdp.R | 4 ++-- tests/testthat/test-merge_camtrapdp.R | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index a3cb0fdc..56811a11 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -45,14 +45,14 @@ merge_camtrapdp <- function(x1, x2, prefix = c("x.", "y.")) { "a {class(prefix)} object of length {length(prefix)}." ) ), - class = "camtrapdp_warning_prefix_invalid" + class = "camtrapdp_error_prefix_invalid" ) } if (any(is.na(prefix))) { cli::cli_abort( "{.arg prefix} can't be 'NA'.", - class = "camtrapdp_warning_prefix_NA" + class = "camtrapdp_error_prefix_NA" ) } diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index 770ff8cb..9a391932 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -7,20 +7,20 @@ test_that("merge_camtrapdp() returns error on duplicate Data Package id", { ) }) -test_that("merge_camtrapdp() warns on invalid prefix", { +test_that("merge_camtrapdp() returns error on invalid prefix", { skip_if_offline() x <- example_dataset() expect_error( merge_camtrapdp(x, x, prefix = c(1, 2)), - class = "camtrapdp_warning_prefix_invalid" + class = "camtrapdp_error_prefix_invalid" ) expect_error( merge_camtrapdp(x, x, prefix = c("one", "two", "three")), - class = "camtrapdp_warning_prefix_invalid" + class = "camtrapdp_error_prefix_invalid" ) expect_error( merge_camtrapdp(x, x, prefix = c("one-", NA)), - class = "camtrapdp_warning_prefix_NA" + class = "camtrapdp_error_prefix_NA" ) expect_no_error(merge_camtrapdp(x, x, prefix = c("this_", "works_"))) }) From fe7b09bf17bfdcb9593355dff448a8e2a785489d Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 27 Sep 2024 16:36:24 +0200 Subject: [PATCH 069/142] give unique ids to example datasets to merge --- tests/testthat/test-merge_camtrapdp.R | 30 +++++++++++++++++++-------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index 9a391932..794e4aa6 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -9,20 +9,26 @@ test_that("merge_camtrapdp() returns error on duplicate Data Package id", { test_that("merge_camtrapdp() returns error on invalid prefix", { skip_if_offline() - x <- example_dataset() + x1 <- example_dataset() + x2 <- example_dataset() + x1$id <- 1 + x2$id <- 2 + expect_error( - merge_camtrapdp(x, x, prefix = c(1, 2)), + merge_camtrapdp(x1, x2, prefix = c(1, 2)), class = "camtrapdp_error_prefix_invalid" ) expect_error( - merge_camtrapdp(x, x, prefix = c("one", "two", "three")), + merge_camtrapdp(x1, x2, prefix = c("one", "two", "three")), class = "camtrapdp_error_prefix_invalid" ) expect_error( - merge_camtrapdp(x, x, prefix = c("one-", NA)), + merge_camtrapdp(x1, x2, prefix = c("one-", NA)), class = "camtrapdp_error_prefix_NA" ) - expect_no_error(merge_camtrapdp(x, x, prefix = c("this_", "works_"))) + expect_no_error(merge_camtrapdp(x1, x2, prefix = c("this_", "works_"))) + prefix_ids <- c(paste0(x1$id, "-"), paste0(x2$id, "-")) + expect_no_error(merge_camtrapdp(x1, x2, prefix = prefix_ids)) }) test_that("merge_camtrapdp() returns a valid camtrapdp object", { @@ -34,7 +40,7 @@ test_that("merge_camtrapdp() returns a valid camtrapdp object", { expect_no_error( suppressMessages( check_camtrapdp( - merge_camtrapdp(x1, x2) + merge_camtrapdp(x1, x2,) ) ) ) @@ -44,7 +50,10 @@ test_that("merge_camtrapdp() returns unique deploymentIDs, mediaIDs and observationIDs", { skip_if_offline() x1 <- example_dataset() - x_merged <- merge_camtrapdp(x1, x1) + x2 <- example_dataset() + x1$id <- 1 + x2$id <- 2 + x_merged <- merge_camtrapdp(x1, x2) deploymentIDs <- purrr::pluck(deployments(x1), "deploymentID") mediaIDs <- purrr::pluck(media(x1), "mediaID") @@ -59,8 +68,11 @@ test_that("merge_camtrapdp() adds prefixes to all values of identifiers (deploymentID, mediaID, observationID and eventID) with duplicates between packages, but not for mediaID = NA", { skip_if_offline() - x <- example_dataset() - x_merged <- merge_camtrapdp(x, x, prefix = c("project1-", "project2-")) + x1 <- example_dataset() + x2 <- example_dataset() + x1$id <- 1 + x2$id <- 2 + x_merged <- merge_camtrapdp(x1, x2, prefix = c("project1-", "project2-")) expect_true("project1-00a2c20d" %in% deployments(x_merged)$deploymentID) expect_true("project2-00a2c20d" %in% deployments(x_merged)$deploymentID) From da0cb182a39dd12e2ce0303fb1343eb75d0fb268 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Mon, 30 Sep 2024 10:50:31 +0200 Subject: [PATCH 070/142] reorder tests --- tests/testthat/test-merge_camtrapdp.R | 30 +++++++++++++-------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index 794e4aa6..770214b2 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -1,3 +1,18 @@ +test_that("merge_camtrapdp() returns a valid camtrapdp object", { + skip_if_offline() + x1 <- example_dataset() %>% + filter_deployments(deploymentID %in% c("00a2c20d", "29b7d356")) + x2 <- example_dataset() %>% + filter_deployments(deploymentID %in% c("577b543a", "62c200a9")) + expect_no_error( + suppressMessages( + check_camtrapdp( + merge_camtrapdp(x1, x2,) + ) + ) + ) +}) + test_that("merge_camtrapdp() returns error on duplicate Data Package id", { skip_if_offline() x <- example_dataset() @@ -31,21 +46,6 @@ test_that("merge_camtrapdp() returns error on invalid prefix", { expect_no_error(merge_camtrapdp(x1, x2, prefix = prefix_ids)) }) -test_that("merge_camtrapdp() returns a valid camtrapdp object", { - skip_if_offline() - x1 <- example_dataset() %>% - filter_deployments(deploymentID %in% c("00a2c20d", "29b7d356")) - x2 <- example_dataset() %>% - filter_deployments(deploymentID %in% c("577b543a", "62c200a9")) - expect_no_error( - suppressMessages( - check_camtrapdp( - merge_camtrapdp(x1, x2,) - ) - ) - ) -}) - test_that("merge_camtrapdp() returns unique deploymentIDs, mediaIDs and observationIDs", { skip_if_offline() From dac7973c05c2c2be452d9f7bd42ef6f6a59cf892 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Mon, 30 Sep 2024 11:00:18 +0200 Subject: [PATCH 071/142] camtrapdp id must be character --- R/merge_camtrapdp.R | 4 ++-- tests/testthat/test-merge_camtrapdp.R | 29 +++++++++++---------------- 2 files changed, 14 insertions(+), 19 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 56811a11..90eb1179 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -88,7 +88,7 @@ merge_camtrapdp <- function(x1, x2, prefix = c("x.", "y.")) { if (!is.null(x1$id)) { relatedIdentifiers_x1 <- list( relationType = "IsDerivedFrom", - relatedIdentifier = x1$id, + relatedIdentifier = as.character(x1$id), resourceTypeGeneral = "Data package", relatedIdentifierType = "id" ) @@ -98,7 +98,7 @@ merge_camtrapdp <- function(x1, x2, prefix = c("x.", "y.")) { if (!is.null(x2$id)) { relatedIdentifiers_x2 <- list( relationType = "IsDerivedFrom", - relatedIdentifier = x2$id, + relatedIdentifier = as.character(x2$id), resourceTypeGeneral = "Data package", relatedIdentifierType = "id" ) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index 770214b2..e444d4a2 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -1,16 +1,11 @@ test_that("merge_camtrapdp() returns a valid camtrapdp object", { skip_if_offline() - x1 <- example_dataset() %>% - filter_deployments(deploymentID %in% c("00a2c20d", "29b7d356")) - x2 <- example_dataset() %>% - filter_deployments(deploymentID %in% c("577b543a", "62c200a9")) - expect_no_error( - suppressMessages( - check_camtrapdp( - merge_camtrapdp(x1, x2,) - ) - ) - ) + x1 <- example_dataset() + x2 <- example_dataset() + x1$id <- "1" + x2$id <- "2" + + expect_no_error(check_camtrapdp(merge_camtrapdp(x1, x2))) }) test_that("merge_camtrapdp() returns error on duplicate Data Package id", { @@ -26,8 +21,8 @@ test_that("merge_camtrapdp() returns error on invalid prefix", { skip_if_offline() x1 <- example_dataset() x2 <- example_dataset() - x1$id <- 1 - x2$id <- 2 + x1$id <- "1" + x2$id <- "2" expect_error( merge_camtrapdp(x1, x2, prefix = c(1, 2)), @@ -51,8 +46,8 @@ test_that("merge_camtrapdp() returns unique deploymentIDs, mediaIDs and skip_if_offline() x1 <- example_dataset() x2 <- example_dataset() - x1$id <- 1 - x2$id <- 2 + x1$id <- "1" + x2$id <- "2" x_merged <- merge_camtrapdp(x1, x2) deploymentIDs <- purrr::pluck(deployments(x1), "deploymentID") @@ -70,8 +65,8 @@ test_that("merge_camtrapdp() adds prefixes to all values of identifiers skip_if_offline() x1 <- example_dataset() x2 <- example_dataset() - x1$id <- 1 - x2$id <- 2 + x1$id <- "1" + x2$id <- "2" x_merged <- merge_camtrapdp(x1, x2, prefix = c("project1-", "project2-")) expect_true("project1-00a2c20d" %in% deployments(x_merged)$deploymentID) From abe30ab28cb783f6ace525a2c374c4408adfa16f Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Mon, 30 Sep 2024 11:05:58 +0200 Subject: [PATCH 072/142] change default prefix --- R/merge_camtrapdp.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 90eb1179..1f18e168 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -4,7 +4,8 @@ #' `read_camtrapdp()`), to be coerced to one. #' @param prefix If there are duplicate IDs in x1 an x2, these prefixes will be #' added to all the values of each identifier with duplicates, to disambiguate -#' them. Should be a character vector of length 2. +#' them. Should be a character vector of length 2. By default, the prefixes are +#' the ID's of the Data Package. #' @return `x` #' @family transformation functions #' @export @@ -14,7 +15,7 @@ #' x2 <- example_dataset() %>% #' filter_deployments(deploymentID %in% c("577b543a", "62c200a9")) #' x_merged <- merge_camtrapdp(x1, x2) -merge_camtrapdp <- function(x1, x2, prefix = c("x.", "y.")) { +merge_camtrapdp <- function(x1, x2, prefix) { check_camtrapdp(x1) check_camtrapdp(x2) @@ -56,6 +57,10 @@ merge_camtrapdp <- function(x1, x2, prefix = c("x.", "y.")) { ) } + # Set default prefixes + if (is.null(prefix)) { + prefix <- c(paste0(x1$id, "_"), paste0(x2$id, "_"))} + x1 <- add_prefix(x1, results_duplicate_ids, prefix[1]) x2 <- add_prefix(x2, results_duplicate_ids, prefix[2]) } From b5c09921e11498f51a62355bee3b1f5f3c94c0f6 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Mon, 30 Sep 2024 11:34:51 +0200 Subject: [PATCH 073/142] set default in function --- R/merge_camtrapdp.R | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 1f18e168..2f6b8cb8 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -15,7 +15,7 @@ #' x2 <- example_dataset() %>% #' filter_deployments(deploymentID %in% c("577b543a", "62c200a9")) #' x_merged <- merge_camtrapdp(x1, x2) -merge_camtrapdp <- function(x1, x2, prefix) { +merge_camtrapdp <- function(x1, x2, prefix = c(x1$id, x2$id)) { check_camtrapdp(x1) check_camtrapdp(x2) @@ -57,10 +57,6 @@ merge_camtrapdp <- function(x1, x2, prefix) { ) } - # Set default prefixes - if (is.null(prefix)) { - prefix <- c(paste0(x1$id, "_"), paste0(x2$id, "_"))} - x1 <- add_prefix(x1, results_duplicate_ids, prefix[1]) x2 <- add_prefix(x2, results_duplicate_ids, prefix[2]) } From 006f217cab8207f89c3f7f439226e5f897ef0117 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Mon, 30 Sep 2024 11:39:11 +0200 Subject: [PATCH 074/142] typo --- tests/testthat/test-merge_camtrapdp.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index e444d4a2..9ccfa5b9 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -50,9 +50,9 @@ test_that("merge_camtrapdp() returns unique deploymentIDs, mediaIDs and x2$id <- "2" x_merged <- merge_camtrapdp(x1, x2) - deploymentIDs <- purrr::pluck(deployments(x1), "deploymentID") - mediaIDs <- purrr::pluck(media(x1), "mediaID") - observationIDs <- purrr::pluck(observations(x1), "observationID") + deploymentIDs <- purrr::pluck(deployments(x_merged), "deploymentID") + mediaIDs <- purrr::pluck(media(x_merged), "mediaID") + observationIDs <- purrr::pluck(observations(x_merged), "observationID") expect_false(any(duplicated(deploymentIDs))) expect_false(any(duplicated(mediaIDs))) From cbda9db1d88086fcd66665666911703dbf93e7d5 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Mon, 30 Sep 2024 13:49:54 +0200 Subject: [PATCH 075/142] add tests for metadata --- tests/testthat/test-merge_camtrapdp.R | 63 +++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index 9ccfa5b9..36e40080 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -84,3 +84,66 @@ test_that("merge_camtrapdp() adds prefixes to all values of identifiers expect_true("project1-4bb69c45" %in% media(x_merged)$eventID) expect_true("project1-4bb69c45" %in% observations(x_merged)$eventID) }) + +test_that("merge_camtrapdp() returns the expected result", { + skip_if_offline() + x1 <- example_dataset() + x2 <- example_dataset() + x1$id <- "1" + x2$id <- "2" + x_merged <- merge_camtrapdp(x1, x2) + + # Check metadata + expect_identical(x_merged$resources, x1$resources) + expect_identical(x_merged$profile, "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/camtrap-dp-profile.json") + expect_identical(x_merged$name, NA) + expect_identical(x_merged$id, NA) + expect_identical(x_merged$title, NA) + expect_identical(x_merged$contributors, x1$contributors) + # no test for description + expect_identical(x_merged$version, "1.0") + expect_identical(x_merged$keywords, x1$keywords) + expect_identical(x_merged$image, NULL) + expect_identical(x_merged$homepage, NULL) + expect_identical(x_merged$sources, x1$sources) + expect_equal(x_merged$licenses, x1$licenses) # fails because remove_duplicates switches order of subelements + expect_identical(x_merged$bibliographicCitation, NULL) + expect_identical(x_merged$projects, list(x1$project, x2$project)) + expect_identical(x_merged$coordinatePrecision, x1$coordinatePrecision) + expect_identical(x_merged$spatial, x1$spatial) + expect_identical(x_merged$temporal, x1$temporal) + expect_identical(x_merged$taxonomic, x1$taxonomic) + expect_identical(x_merged$references, x1$references) + expect_identical(x_merged$directory, x1$directory) + + relatedIdentifiers_merged <- list( + list( + relationType = "IsDerivedFrom", + relatedIdentifier = "https://doi.org/10.15468/5tb6ze", + resourceTypeGeneral = "Dataset", + relatedIdentifierType = "DOI" + ), + list( + relationType = "IsSupplementTo", + relatedIdentifier = "https://inbo.github.io/camtraptor/", + resourceTypeGeneral = "Software", + relatedIdentifierType = "URL" + ), + list( + relationType = "IsDerivedFrom", + relatedIdentifier = "1", + resourceTypeGeneral = "Data package", + relatedIdentifierType = "id" + ), + list( + relationType = "IsDerivedFrom", + relatedIdentifier = "2", + resourceTypeGeneral = "Data package", + relatedIdentifierType = "id" + ) + ) + + expect_identical(x_merged$relatedIdentifiers, relatedIdentifiers_merged) + + # Check data +}) From d76b4916bc5e79c44033bc33cfd736168baa4f2e Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Mon, 30 Sep 2024 17:15:51 +0200 Subject: [PATCH 076/142] account for ID == NULL --- R/merge_camtrapdp.R | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 2f6b8cb8..5452d2b1 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -19,17 +19,19 @@ merge_camtrapdp <- function(x1, x2, prefix = c(x1$id, x2$id)) { check_camtrapdp(x1) check_camtrapdp(x2) - if (x1$id == x2$id) { - cli::cli_abort( - c( - paste0( - "{.arg x1} and {.arg x2} should be different Camera Trap Data", - "Package objects with unique identifiers." + if (!is.null(x1$id) & !is.null(x2$id)) { + if (x1$id == x2$id) { + cli::cli_abort( + c( + paste0( + "{.arg x1} and {.arg x2} should be different Camera Trap Data", + "Package objects with unique identifiers." + ), + x = "{.arg x1} and {.arg x2} have the same id: {.value x1$id}" ), - x = "{.arg x1} and {.arg x2} have the same id: {.value x1$id}" - ), - class = "camtrapdp_error_camtrapdpid_duplicated" - ) + class = "camtrapdp_error_camtrapdpid_duplicated" + ) + } } # check if identifiers have duplicates From 472f3d3eef5da0cf8f65a342157dd45b5e09a1f4 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Mon, 30 Sep 2024 17:16:46 +0200 Subject: [PATCH 077/142] correct mistake in keywords --- R/merge_camtrapdp.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 5452d2b1..65254e75 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -77,7 +77,7 @@ merge_camtrapdp <- function(x1, x2, prefix = c(x1$id, x2$id)) { x$contributors <- remove_duplicates(c(x1$contributors, x2$contributors)) x$description <- paste(x1$description, x2$description, sep = "/n") x$version <- "1.0" - x$keywords <- unique(x1$keywords, x2$keywords) + x$keywords <- unique(c(x1$keywords, x2$keywords)) x$image <- NULL x$homepage <- NULL x$sources <- remove_duplicates(c(x1$sources, x2$sources)) From 4386be9be794389152c2a7c4cc521c60852b276d Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Mon, 30 Sep 2024 17:22:47 +0200 Subject: [PATCH 078/142] add tests (work in progress) --- tests/testthat/test-merge_camtrapdp.R | 121 +++++++++++++++++++++++++- 1 file changed, 120 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index 36e40080..f9cf4a55 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -67,25 +67,36 @@ test_that("merge_camtrapdp() adds prefixes to all values of identifiers x2 <- example_dataset() x1$id <- "1" x2$id <- "2" + + # Default prefixes + x_merged_default <- merge_camtrapdp(x1, x2) + expect_true("100a2c20d" %in% deployments(x_merged)$deploymentID) + expect_true("200a2c20d" %in% deployments(x_merged)$deploymentID) + + # Custom prefixes x_merged <- merge_camtrapdp(x1, x2, prefix = c("project1-", "project2-")) + # deploymentID expect_true("project1-00a2c20d" %in% deployments(x_merged)$deploymentID) expect_true("project2-00a2c20d" %in% deployments(x_merged)$deploymentID) expect_true("project1-00a2c20d" %in% media(x_merged)$deploymentID) expect_true("project1-00a2c20d" %in% observations(x_merged)$deploymentID) + # mediaID expect_true("project1-07840dcc" %in% media(x_merged)$mediaID) expect_true("project1-07840dcc" %in% observations(x_merged)$mediaID) expect_false("project1-NA" %in% observations(x_merged)$mediaID) expect_true(NA %in% observations(x_merged)$mediaID) + # observationID expect_true("project1-705e6036" %in% observations(x_merged)$observationID) + # eventID expect_true("project1-4bb69c45" %in% media(x_merged)$eventID) expect_true("project1-4bb69c45" %in% observations(x_merged)$eventID) }) -test_that("merge_camtrapdp() returns the expected result", { +test_that("merge_camtrapdp() returns the expected metadata", { skip_if_offline() x1 <- example_dataset() x2 <- example_dataset() @@ -147,3 +158,111 @@ test_that("merge_camtrapdp() returns the expected result", { # Check data }) + +test_that("merge_camtrapdp() does x and y", { + skip_if_offline() + x1 <- example_dataset() + + temp_dir <- tempdir() + on.exit(unlink(temp_dir, recursive = TRUE)) + zip_file <- file.path(temp_dir, "dataset.zip") + datapackage_file <- file.path(temp_dir, "datapackage.json") + url <- "https://ipt.nlbif.nl/archive.do?r=awd_pilot2" + + download.file(url, zip_file, mode = 'wb') + unzip(zip_file, exdir = temp_dir) + + x2 <- read_camtrapdp(datapackage_file) + + x_merged <- merge_camtrapdp(x1, x2) + + # Check metadata + profile <- "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/camtrap-dp-profile.json" + contributors <- list( + list( + title = "Axel Neukermans", + email = "axel.neukermans@inbo.be", + path = "https://orcid.org/0000-0003-0272-9180", + role = "contributor", + organization = "Research Institute for Nature and Forest (INBO)" + ), + list( + title = "Danny Van der beeck", + email = "daniel.vanderbeeck@gmail.com" + ), + list( + title = "Emma Cartuyvels", + email = "emma.cartuyvels@inbo.be", + role = "principalInvestigator", + organization = "Research Institute for Nature and Forest (INBO)" + ), + list( + title = "Peter Desmet", + email = "peter.desmet@inbo.be", + path = "https://orcid.org/0000-0002-8442-8025", + role = "contact", + organization = "Research Institute for Nature and Forest (INBO)" + ), + list( + title = "Research Institute for Nature and Forest (INBO)", + path = "https://inbo.be", + role = "rightsHolder" + ), + list( + title = "Research Institute for Nature and Forest (INBO)", + path = "https://inbo.be", + role = "publisher" + ), + list( + title = "Julian Evans", + email = "jevansbio@gmail.com", + role = "principalInvestigator", + organization = "University of Amsterdam", + firstName = "Julian", + lastName = "Evans" + ), + list( + title = "Rotem Zilber", + email = "r.kadanzilber@uva.nl", + role = "principalInvestigator", + organization = "University of Amsterdam", + firstName = "Rotem", + lastName = "Zilber" + ), + list( + title = "W. Daniel Kissling", + email = "wdkissling@gmail.com", + path = "https://www.danielkissling.de/", + role = "principalInvestigator", + organization = "University of Amsterdam", + firstName = "W. Daniel ", + lastName = "Kissling" + ) + ) + + expect_identical(x_merged$resources, x1$resources) + expect_identical(x_merged$profile, profile) + expect_identical(x_merged$name, NA) + expect_identical(x_merged$id, NA) + expect_identical(x_merged$title, NA) + expect_identical(x_merged$contributors, contributors) + # no test for description + expect_identical(x_merged$version, "1.0") + expect_identical(x_merged$keywords, c(x1$keywords, x2$keywords)) + expect_identical(x_merged$image, NULL) + expect_identical(x_merged$homepage, NULL) + expect_identical(x_merged$sources, sources) + expect_equal(x_merged$licenses, licenses) # fails because remove_duplicates switches order of subelements + expect_identical(x_merged$bibliographicCitation, NULL) + expect_identical(x_merged$projects, list(project, x2$project)) + expect_identical(x_merged$coordinatePrecision, coordinatePrecision) + expect_identical(x_merged$spatial, spatial) + expect_identical(x_merged$temporal, temporal) + expect_identical(x_merged$taxonomic, taxonomic) + expect_identical(x_merged$references, references) + expect_identical(x_merged$directory, directory) + + relatedIdentifiers_merged <- list() + + expect_identical(x_merged$relatedIdentifiers, relatedIdentifiers_merged) +}) From 2ff6d694d0e1e7b26cebf2685bdb510f79b7460c Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Tue, 1 Oct 2024 08:50:50 +0200 Subject: [PATCH 079/142] typo --- tests/testthat/test-merge_camtrapdp.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index f9cf4a55..8778eb6c 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -254,7 +254,7 @@ test_that("merge_camtrapdp() does x and y", { expect_identical(x_merged$sources, sources) expect_equal(x_merged$licenses, licenses) # fails because remove_duplicates switches order of subelements expect_identical(x_merged$bibliographicCitation, NULL) - expect_identical(x_merged$projects, list(project, x2$project)) + expect_identical(x_merged$projects, list(x1$project, x2$project)) expect_identical(x_merged$coordinatePrecision, coordinatePrecision) expect_identical(x_merged$spatial, spatial) expect_identical(x_merged$temporal, temporal) From 96b93222b7b4f423c6c5c35ebcfcc546c3061717 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Tue, 1 Oct 2024 17:06:48 +0200 Subject: [PATCH 080/142] update test on metadata --- tests/testthat/test-merge_camtrapdp.R | 75 ++++++++++++++++++++++++--- 1 file changed, 69 insertions(+), 6 deletions(-) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index 8778eb6c..6b2c8eac 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -159,7 +159,8 @@ test_that("merge_camtrapdp() returns the expected metadata", { # Check data }) -test_that("merge_camtrapdp() does x and y", { +test_that("merge_camtrapdp() returns the expected metadata when merging two + different Data Packages", { skip_if_offline() x1 <- example_dataset() @@ -240,6 +241,71 @@ test_that("merge_camtrapdp() does x and y", { ) ) + sources <- list( + list( + title = "Agouti", + path = "https://www.agouti.eu", + email = "agouti@wur.nl", + version = "v3.21" + ), + list( + title = "Agouti", + path = "https://www.agouti.eu", + email = "agouti@wur.nl", + version = "v4" + ) + ) + + licenses <- list( + list(name = "CC0-1.0", scope = "data"), + list(scope = "media", path = "http://creativecommons.org/licenses/by/4.0/"), + list(name = "CC-BY-4.0", scope = "data") + ) + + coordinatePrecision <- 0.001 + + spatial <- list( + type = "Polygon", + coordinates = structure( + c( + 4.013, 5.659, 5.659, 4.013, 4.013, + 50.699, 50.699, 52.35604, 52.35604, 50.699 + ), + dim = c(1L, 5L, 2L) + ) + ) + + temporal <- list(start = "2020-05-30", end = "2022-03-18") + + references <- list("Evans, J.C., Zilber, R., & Kissling, W.D. (2024). Data from three camera trapping pilots in the Amsterdam Water Supply Dunes of the Netherlands. Data in Brief, 54, 110544. https://doi.org/10.1016/j.dib.2024.110544") + + relatedIdentifiers_merged <- list( + list( + relationType = "IsDerivedFrom", + relatedIdentifier = "https://doi.org/10.15468/5tb6ze", + resourceTypeGeneral = "Dataset", + relatedIdentifierType = "DOI" + ), + list( + relationType = "IsSupplementTo", + relatedIdentifier = "https://inbo.github.io/camtraptor/", + resourceTypeGeneral = "Software", + relatedIdentifierType = "URL" + ), + list( + relationType = "IsPublishedIn", + relatedIdentifier = "https://doi.org/10.1016/j.dib.2024.110544", + resourceTypeGeneral = "DataPaper", + relatedIdentifierType = "DOI" + ), + list( + relationType = "IsDerivedFrom", + relatedIdentifier = "7cca70f5-ef8c-4f86-85fb-8f070937d7ab", + resourceTypeGeneral = "Data package", + relatedIdentifierType = "id" + ) + ) + expect_identical(x_merged$resources, x1$resources) expect_identical(x_merged$profile, profile) expect_identical(x_merged$name, NA) @@ -252,7 +318,7 @@ test_that("merge_camtrapdp() does x and y", { expect_identical(x_merged$image, NULL) expect_identical(x_merged$homepage, NULL) expect_identical(x_merged$sources, sources) - expect_equal(x_merged$licenses, licenses) # fails because remove_duplicates switches order of subelements + expect_equal(x_merged$licenses, licenses) expect_identical(x_merged$bibliographicCitation, NULL) expect_identical(x_merged$projects, list(x1$project, x2$project)) expect_identical(x_merged$coordinatePrecision, coordinatePrecision) @@ -260,9 +326,6 @@ test_that("merge_camtrapdp() does x and y", { expect_identical(x_merged$temporal, temporal) expect_identical(x_merged$taxonomic, taxonomic) expect_identical(x_merged$references, references) - expect_identical(x_merged$directory, directory) - - relatedIdentifiers_merged <- list() - + # expect_identical(x_merged$directory, directory) expect_identical(x_merged$relatedIdentifiers, relatedIdentifiers_merged) }) From 6aa0983ccf298931f32e7a151e9f6ccd4145f7fc Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Tue, 1 Oct 2024 17:07:05 +0200 Subject: [PATCH 081/142] small update --- R/merge_camtrapdp.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 65254e75..7ead81b4 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -113,10 +113,10 @@ merge_camtrapdp <- function(x1, x2, prefix = c(x1$id, x2$id)) { c(x1$relatedIdentifiers, x2$relatedIdentifiers, new_relatedIdentifiers) ) - x$references <- remove_duplicates(c(x1$references, x2$references)) + x$references <- unique(c(x1$references, x2$references)) - x <- - update_spatial(x) %>% + x <- x %>% + update_spatial() %>% update_temporal() %>% update_taxonomic() From 7da1b70efdddf157fd2eab453cf8b32170c78a2d Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Tue, 8 Oct 2024 11:59:04 +0200 Subject: [PATCH 082/142] update prefix --- R/merge_camtrapdp.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 7ead81b4..50f92ead 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -59,8 +59,8 @@ merge_camtrapdp <- function(x1, x2, prefix = c(x1$id, x2$id)) { ) } - x1 <- add_prefix(x1, results_duplicate_ids, prefix[1]) - x2 <- add_prefix(x2, results_duplicate_ids, prefix[2]) + x1 <- add_prefix(x1, results_duplicate_ids, paste0(prefix[1], "_")) + x2 <- add_prefix(x2, results_duplicate_ids, paste0(prefix[2], "_")) } # Merge resources From c3e874cf6d74ede4ff7f2ff6f25ba83e01f32762 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Tue, 8 Oct 2024 12:01:02 +0200 Subject: [PATCH 083/142] update parameter names --- R/merge_camtrapdp.R | 74 ++++++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 50f92ead..11e0dbe9 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -1,8 +1,8 @@ #' Merge Camera Trap Data packages #' -#' @param x1,x2 Camera Trap Data Package objects (as returned by +#' @param x,y Camera Trap Data Package objects (as returned by #' `read_camtrapdp()`), to be coerced to one. -#' @param prefix If there are duplicate IDs in x1 an x2, these prefixes will be +#' @param prefix If there are duplicate IDs in x an y, these prefixes will be #' added to all the values of each identifier with duplicates, to disambiguate #' them. Should be a character vector of length 2. By default, the prefixes are #' the ID's of the Data Package. @@ -10,24 +10,24 @@ #' @family transformation functions #' @export #' @examples -#' x1 <- example_dataset() %>% +#' x <- example_dataset() %>% #' filter_deployments(deploymentID %in% c("00a2c20d", "29b7d356")) -#' x2 <- example_dataset() %>% +#' y <- example_dataset() %>% #' filter_deployments(deploymentID %in% c("577b543a", "62c200a9")) -#' x_merged <- merge_camtrapdp(x1, x2) -merge_camtrapdp <- function(x1, x2, prefix = c(x1$id, x2$id)) { - check_camtrapdp(x1) - check_camtrapdp(x2) +#' merged_xy <- merge_camtrapdp(x, y) +merge_camtrapdp <- function(x, y, prefix = c(x$id, y$id)) { + check_camtrapdp(x) + check_camtrapdp(y) - if (!is.null(x1$id) & !is.null(x2$id)) { - if (x1$id == x2$id) { + if (!is.null(x$id) & !is.null(y$id)) { + if (x$id == y$id) { cli::cli_abort( c( paste0( - "{.arg x1} and {.arg x2} should be different Camera Trap Data", + "{.arg x} and {.arg y} should be different Camera Trap Data", "Package objects with unique identifiers." ), - x = "{.arg x1} and {.arg x2} have the same id: {.value x1$id}" + x = "{.arg x} and {.arg y} have the same id: {.value x$id}" ), class = "camtrapdp_error_camtrapdpid_duplicated" ) @@ -35,7 +35,7 @@ merge_camtrapdp <- function(x1, x2, prefix = c(x1$id, x2$id)) { } # check if identifiers have duplicates - results_duplicate_ids <- check_duplicate_ids(x1, x2) + results_duplicate_ids <- check_duplicate_ids(x, y) # Add prefix to identifiers with duplicates if (TRUE %in% results_duplicate_ids) { @@ -59,61 +59,61 @@ merge_camtrapdp <- function(x1, x2, prefix = c(x1$id, x2$id)) { ) } - x1 <- add_prefix(x1, results_duplicate_ids, paste0(prefix[1], "_")) - x2 <- add_prefix(x2, results_duplicate_ids, paste0(prefix[2], "_")) + x <- add_prefix(x, results_duplicate_ids, paste0(prefix[1], "_")) + y <- add_prefix(y, results_duplicate_ids, paste0(prefix[2], "_")) } # Merge resources - x <- x1 - deployments(x) <- dplyr::bind_rows(deployments(x1), deployments(x2)) - media(x) <- dplyr::bind_rows(media(x1), media(x2)) - observations(x) <- dplyr::bind_rows(observations(x1), observations(x2)) + x <- x + deployments(x) <- dplyr::bind_rows(deployments(x), deployments(y)) + media(x) <- dplyr::bind_rows(media(x), media(y)) + observations(x) <- dplyr::bind_rows(observations(x), observations(y)) # Merge/update metadata x$name <- NA x$id <- NA x$created <- format(Sys.time(), "%Y-%m-%dT%H:%M:%SZ") x$title <- NA - x$contributors <- remove_duplicates(c(x1$contributors, x2$contributors)) - x$description <- paste(x1$description, x2$description, sep = "/n") + x$contributors <- remove_duplicates(c(x$contributors, y$contributors)) + x$description <- paste(x$description, y$description, sep = "/n") x$version <- "1.0" - x$keywords <- unique(c(x1$keywords, x2$keywords)) + x$keywords <- unique(c(x$keywords, y$keywords)) x$image <- NULL x$homepage <- NULL - x$sources <- remove_duplicates(c(x1$sources, x2$sources)) - x$licenses <- remove_duplicates(c(x1$licenses, x2$licenses)) + x$sources <- remove_duplicates(c(x$sources, y$sources)) + x$licenses <- remove_duplicates(c(x$licenses, y$licenses)) x$project <- NULL - x$projects <- list(x1$project, x2$project) + x$projects <- list(x$project, y$project) x$bibliographicCitation <- NULL x$coordinatePrecision <- - max(x1$coordinatePrecision, x2$coordinatePrecision, na.rm = TRUE) + max(x$coordinatePrecision, y$coordinatePrecision, na.rm = TRUE) - if (!is.null(x1$id)) { - relatedIdentifiers_x1 <- list( + if (!is.null(x$id)) { + relatedIdentifiers_x <- list( relationType = "IsDerivedFrom", - relatedIdentifier = as.character(x1$id), + relatedIdentifier = as.character(x$id), resourceTypeGeneral = "Data package", relatedIdentifierType = "id" ) } else { - relatedIdentifiers_x1 <- list() + relatedIdentifiers_x <- list() } - if (!is.null(x2$id)) { - relatedIdentifiers_x2 <- list( + if (!is.null(y$id)) { + relatedIdentifiers_y <- list( relationType = "IsDerivedFrom", - relatedIdentifier = as.character(x2$id), + relatedIdentifier = as.character(y$id), resourceTypeGeneral = "Data package", relatedIdentifierType = "id" ) } else { - relatedIdentifiers_x2 <- list() + relatedIdentifiers_y <- list() } - new_relatedIdentifiers <- list(relatedIdentifiers_x1, relatedIdentifiers_x2) + new_relatedIdentifiers <- list(relatedIdentifiers_x, relatedIdentifiers_y) x$relatedIdentifiers <- remove_duplicates( - c(x1$relatedIdentifiers, x2$relatedIdentifiers, new_relatedIdentifiers) + c(x$relatedIdentifiers, y$relatedIdentifiers, new_relatedIdentifiers) ) - x$references <- unique(c(x1$references, x2$references)) + x$references <- unique(c(x$references, y$references)) x <- x %>% update_spatial() %>% From d2acd21fbeb4e9ffaa10bce92e5e7e1e3b91b825 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Tue, 8 Oct 2024 12:12:57 +0200 Subject: [PATCH 084/142] update parameter names and prefix --- tests/testthat/test-merge_camtrapdp.R | 184 +++++++++++++------------- 1 file changed, 92 insertions(+), 92 deletions(-) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index 6b2c8eac..c2382525 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -1,11 +1,11 @@ test_that("merge_camtrapdp() returns a valid camtrapdp object", { skip_if_offline() - x1 <- example_dataset() - x2 <- example_dataset() - x1$id <- "1" - x2$id <- "2" + x <- example_dataset() + y <- example_dataset() + x$id <- "1" + y$id <- "2" - expect_no_error(check_camtrapdp(merge_camtrapdp(x1, x2))) + expect_no_error(check_camtrapdp(merge_camtrapdp(x, y))) }) test_that("merge_camtrapdp() returns error on duplicate Data Package id", { @@ -19,40 +19,40 @@ test_that("merge_camtrapdp() returns error on duplicate Data Package id", { test_that("merge_camtrapdp() returns error on invalid prefix", { skip_if_offline() - x1 <- example_dataset() - x2 <- example_dataset() - x1$id <- "1" - x2$id <- "2" + x <- example_dataset() + y <- example_dataset() + x$id <- "1" + y$id <- "2" expect_error( - merge_camtrapdp(x1, x2, prefix = c(1, 2)), + merge_camtrapdp(x, y, prefix = c(1, 2)), class = "camtrapdp_error_prefix_invalid" ) expect_error( - merge_camtrapdp(x1, x2, prefix = c("one", "two", "three")), + merge_camtrapdp(x, y, prefix = c("one", "two", "three")), class = "camtrapdp_error_prefix_invalid" ) expect_error( - merge_camtrapdp(x1, x2, prefix = c("one-", NA)), + merge_camtrapdp(x, y, prefix = c("one", NA)), class = "camtrapdp_error_prefix_NA" ) - expect_no_error(merge_camtrapdp(x1, x2, prefix = c("this_", "works_"))) - prefix_ids <- c(paste0(x1$id, "-"), paste0(x2$id, "-")) - expect_no_error(merge_camtrapdp(x1, x2, prefix = prefix_ids)) + expect_no_error(merge_camtrapdp(x, y, prefix = c("this", "works"))) + prefix_ids <- c(x$id, y$id) + expect_no_error(merge_camtrapdp(x, y, prefix = prefix_ids)) }) test_that("merge_camtrapdp() returns unique deploymentIDs, mediaIDs and observationIDs", { skip_if_offline() - x1 <- example_dataset() - x2 <- example_dataset() - x1$id <- "1" - x2$id <- "2" - x_merged <- merge_camtrapdp(x1, x2) + x <- example_dataset() + y <- example_dataset() + x$id <- "1" + y$id <- "2" + merged_xy <- merge_camtrapdp(x, y) - deploymentIDs <- purrr::pluck(deployments(x_merged), "deploymentID") - mediaIDs <- purrr::pluck(media(x_merged), "mediaID") - observationIDs <- purrr::pluck(observations(x_merged), "observationID") + deploymentIDs <- purrr::pluck(deployments(merged_xy), "deploymentID") + mediaIDs <- purrr::pluck(media(merged_xy), "mediaID") + observationIDs <- purrr::pluck(observations(merged_xy), "observationID") expect_false(any(duplicated(deploymentIDs))) expect_false(any(duplicated(mediaIDs))) @@ -63,69 +63,69 @@ test_that("merge_camtrapdp() adds prefixes to all values of identifiers (deploymentID, mediaID, observationID and eventID) with duplicates between packages, but not for mediaID = NA", { skip_if_offline() - x1 <- example_dataset() - x2 <- example_dataset() - x1$id <- "1" - x2$id <- "2" + x <- example_dataset() + y <- example_dataset() + x$id <- "1" + y$id <- "2" # Default prefixes - x_merged_default <- merge_camtrapdp(x1, x2) - expect_true("100a2c20d" %in% deployments(x_merged)$deploymentID) - expect_true("200a2c20d" %in% deployments(x_merged)$deploymentID) + merged_xy_default <- merge_camtrapdp(x, y) + expect_true("1_00a2c20d" %in% deployments(merged_xy_default)$deploymentID) + expect_true("2_00a2c20d" %in% deployments(merged_xy_default)$deploymentID) # Custom prefixes - x_merged <- merge_camtrapdp(x1, x2, prefix = c("project1-", "project2-")) + merged_xy <- merge_camtrapdp(x, y, prefix = c("project1", "project2")) # deploymentID - expect_true("project1-00a2c20d" %in% deployments(x_merged)$deploymentID) - expect_true("project2-00a2c20d" %in% deployments(x_merged)$deploymentID) - expect_true("project1-00a2c20d" %in% media(x_merged)$deploymentID) - expect_true("project1-00a2c20d" %in% observations(x_merged)$deploymentID) + expect_true("project1_00a2c20d" %in% deployments(merged_xy)$deploymentID) + expect_true("project2_00a2c20d" %in% deployments(merged_xy)$deploymentID) + expect_true("project1_00a2c20d" %in% media(merged_xy)$deploymentID) + expect_true("project1_00a2c20d" %in% observations(merged_xy)$deploymentID) # mediaID - expect_true("project1-07840dcc" %in% media(x_merged)$mediaID) - expect_true("project1-07840dcc" %in% observations(x_merged)$mediaID) - expect_false("project1-NA" %in% observations(x_merged)$mediaID) - expect_true(NA %in% observations(x_merged)$mediaID) + expect_true("project1_07840dcc" %in% media(merged_xy)$mediaID) + expect_true("project1_07840dcc" %in% observations(merged_xy)$mediaID) + expect_false("project1_NA" %in% observations(merged_xy)$mediaID) + expect_true(NA %in% observations(merged_xy)$mediaID) # observationID - expect_true("project1-705e6036" %in% observations(x_merged)$observationID) + expect_true("project1_705e6036" %in% observations(merged_xy)$observationID) # eventID - expect_true("project1-4bb69c45" %in% media(x_merged)$eventID) - expect_true("project1-4bb69c45" %in% observations(x_merged)$eventID) + expect_true("project1_4bb69c45" %in% media(merged_xy)$eventID) + expect_true("project1_4bb69c45" %in% observations(merged_xy)$eventID) }) test_that("merge_camtrapdp() returns the expected metadata", { skip_if_offline() - x1 <- example_dataset() - x2 <- example_dataset() - x1$id <- "1" - x2$id <- "2" - x_merged <- merge_camtrapdp(x1, x2) + x <- example_dataset() + y <- example_dataset() + x$id <- "1" + y$id <- "2" + merged_xy <- merge_camtrapdp(x, y) # Check metadata - expect_identical(x_merged$resources, x1$resources) - expect_identical(x_merged$profile, "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/camtrap-dp-profile.json") - expect_identical(x_merged$name, NA) - expect_identical(x_merged$id, NA) - expect_identical(x_merged$title, NA) - expect_identical(x_merged$contributors, x1$contributors) + expect_identical(merged_xy$resources, x$resources) + expect_identical(merged_xy$profile, "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/camtrap-dp-profile.json") + expect_identical(merged_xy$name, NA) + expect_identical(merged_xy$id, NA) + expect_identical(merged_xy$title, NA) + expect_identical(merged_xy$contributors, x$contributors) # no test for description - expect_identical(x_merged$version, "1.0") - expect_identical(x_merged$keywords, x1$keywords) - expect_identical(x_merged$image, NULL) - expect_identical(x_merged$homepage, NULL) - expect_identical(x_merged$sources, x1$sources) - expect_equal(x_merged$licenses, x1$licenses) # fails because remove_duplicates switches order of subelements - expect_identical(x_merged$bibliographicCitation, NULL) - expect_identical(x_merged$projects, list(x1$project, x2$project)) - expect_identical(x_merged$coordinatePrecision, x1$coordinatePrecision) - expect_identical(x_merged$spatial, x1$spatial) - expect_identical(x_merged$temporal, x1$temporal) - expect_identical(x_merged$taxonomic, x1$taxonomic) - expect_identical(x_merged$references, x1$references) - expect_identical(x_merged$directory, x1$directory) + expect_identical(merged_xy$version, "1.0") + expect_identical(merged_xy$keywords, x$keywords) + expect_identical(merged_xy$image, NULL) + expect_identical(merged_xy$homepage, NULL) + expect_identical(merged_xy$sources, x$sources) + expect_equal(merged_xy$licenses, x$licenses) # fails because remove_duplicates switches order of subelements + expect_identical(merged_xy$bibliographicCitation, NULL) + expect_identical(merged_xy$projects, list(x$project, y$project)) + expect_identical(merged_xy$coordinatePrecision, x$coordinatePrecision) + expect_identical(merged_xy$spatial, x$spatial) + expect_identical(merged_xy$temporal, x$temporal) + expect_identical(merged_xy$taxonomic, x$taxonomic) + expect_identical(merged_xy$references, x$references) + expect_identical(merged_xy$directory, x$directory) relatedIdentifiers_merged <- list( list( @@ -154,7 +154,7 @@ test_that("merge_camtrapdp() returns the expected metadata", { ) ) - expect_identical(x_merged$relatedIdentifiers, relatedIdentifiers_merged) + expect_identical(merged_xy$relatedIdentifiers, relatedIdentifiers_merged) # Check data }) @@ -162,7 +162,7 @@ test_that("merge_camtrapdp() returns the expected metadata", { test_that("merge_camtrapdp() returns the expected metadata when merging two different Data Packages", { skip_if_offline() - x1 <- example_dataset() + x <- example_dataset() temp_dir <- tempdir() on.exit(unlink(temp_dir, recursive = TRUE)) @@ -173,9 +173,9 @@ test_that("merge_camtrapdp() returns the expected metadata when merging two download.file(url, zip_file, mode = 'wb') unzip(zip_file, exdir = temp_dir) - x2 <- read_camtrapdp(datapackage_file) + y <- read_camtrapdp(datapackage_file) - x_merged <- merge_camtrapdp(x1, x2) + merged_xy <- merge_camtrapdp(x, y) # Check metadata profile <- "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/camtrap-dp-profile.json" @@ -306,26 +306,26 @@ test_that("merge_camtrapdp() returns the expected metadata when merging two ) ) - expect_identical(x_merged$resources, x1$resources) - expect_identical(x_merged$profile, profile) - expect_identical(x_merged$name, NA) - expect_identical(x_merged$id, NA) - expect_identical(x_merged$title, NA) - expect_identical(x_merged$contributors, contributors) + expect_identical(merged_xy$resources, x$resources) + expect_identical(merged_xy$profile, profile) + expect_identical(merged_xy$name, NA) + expect_identical(merged_xy$id, NA) + expect_identical(merged_xy$title, NA) + expect_identical(merged_xy$contributors, contributors) # no test for description - expect_identical(x_merged$version, "1.0") - expect_identical(x_merged$keywords, c(x1$keywords, x2$keywords)) - expect_identical(x_merged$image, NULL) - expect_identical(x_merged$homepage, NULL) - expect_identical(x_merged$sources, sources) - expect_equal(x_merged$licenses, licenses) - expect_identical(x_merged$bibliographicCitation, NULL) - expect_identical(x_merged$projects, list(x1$project, x2$project)) - expect_identical(x_merged$coordinatePrecision, coordinatePrecision) - expect_identical(x_merged$spatial, spatial) - expect_identical(x_merged$temporal, temporal) - expect_identical(x_merged$taxonomic, taxonomic) - expect_identical(x_merged$references, references) - # expect_identical(x_merged$directory, directory) - expect_identical(x_merged$relatedIdentifiers, relatedIdentifiers_merged) + expect_identical(merged_xy$version, "1.0") + expect_identical(merged_xy$keywords, c(x$keywords, y$keywords)) + expect_identical(merged_xy$image, NULL) + expect_identical(merged_xy$homepage, NULL) + expect_identical(merged_xy$sources, sources) + expect_equal(merged_xy$licenses, licenses) + expect_identical(merged_xy$bibliographicCitation, NULL) + expect_identical(merged_xy$projects, list(x$project, y$project)) + expect_identical(merged_xy$coordinatePrecision, coordinatePrecision) + expect_identical(merged_xy$spatial, spatial) + expect_identical(merged_xy$temporal, temporal) + expect_identical(merged_xy$taxonomic, taxonomic) + expect_identical(merged_xy$references, references) + # expect_identical(merged_xy$directory, directory) + expect_identical(merged_xy$relatedIdentifiers, relatedIdentifiers_merged) }) From 649f0f1c6259e13ddf0c792a894644295e5b3211 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Tue, 8 Oct 2024 14:03:59 +0200 Subject: [PATCH 085/142] fix name merged DP --- R/merge_camtrapdp.R | 50 ++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 11e0dbe9..a8945692 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -14,7 +14,7 @@ #' filter_deployments(deploymentID %in% c("00a2c20d", "29b7d356")) #' y <- example_dataset() %>% #' filter_deployments(deploymentID %in% c("577b543a", "62c200a9")) -#' merged_xy <- merge_camtrapdp(x, y) +#' xy_merged <- merge_camtrapdp(x, y) merge_camtrapdp <- function(x, y, prefix = c(x$id, y$id)) { check_camtrapdp(x) check_camtrapdp(y) @@ -64,28 +64,28 @@ merge_camtrapdp <- function(x, y, prefix = c(x$id, y$id)) { } # Merge resources - x <- x - deployments(x) <- dplyr::bind_rows(deployments(x), deployments(y)) - media(x) <- dplyr::bind_rows(media(x), media(y)) - observations(x) <- dplyr::bind_rows(observations(x), observations(y)) + xy_merged <- x + deployments(xy_merged) <- dplyr::bind_rows(deployments(x), deployments(y)) + media(xy_merged) <- dplyr::bind_rows(media(x), media(y)) + observations(xy_merged) <- dplyr::bind_rows(observations(x), observations(y)) # Merge/update metadata - x$name <- NA - x$id <- NA - x$created <- format(Sys.time(), "%Y-%m-%dT%H:%M:%SZ") - x$title <- NA - x$contributors <- remove_duplicates(c(x$contributors, y$contributors)) - x$description <- paste(x$description, y$description, sep = "/n") - x$version <- "1.0" - x$keywords <- unique(c(x$keywords, y$keywords)) - x$image <- NULL - x$homepage <- NULL - x$sources <- remove_duplicates(c(x$sources, y$sources)) - x$licenses <- remove_duplicates(c(x$licenses, y$licenses)) - x$project <- NULL - x$projects <- list(x$project, y$project) - x$bibliographicCitation <- NULL - x$coordinatePrecision <- + xy_merged$name <- NA + xy_merged$id <- NA + xy_merged$created <- format(Sys.time(), "%Y-%m-%dT%H:%M:%SZ") + xy_merged$title <- NA + xy_merged$contributors <- remove_duplicates(c(x$contributors, y$contributors)) + xy_merged$description <- paste(x$description, y$description, sep = "/n") + xy_merged$version <- "1.0" + xy_merged$keywords <- unique(c(x$keywords, y$keywords)) + xy_merged$image <- NULL + xy_merged$homepage <- NULL + xy_merged$sources <- remove_duplicates(c(x$sources, y$sources)) + xy_merged$licenses <- remove_duplicates(c(x$licenses, y$licenses)) + xy_merged$project <- NULL + xy_merged$projects <- list(x$project, y$project) + xy_merged$bibliographicCitation <- NULL + xy_merged$coordinatePrecision <- max(x$coordinatePrecision, y$coordinatePrecision, na.rm = TRUE) if (!is.null(x$id)) { @@ -109,16 +109,16 @@ merge_camtrapdp <- function(x, y, prefix = c(x$id, y$id)) { relatedIdentifiers_y <- list() } new_relatedIdentifiers <- list(relatedIdentifiers_x, relatedIdentifiers_y) - x$relatedIdentifiers <- remove_duplicates( + xy_merged$relatedIdentifiers <- remove_duplicates( c(x$relatedIdentifiers, y$relatedIdentifiers, new_relatedIdentifiers) ) - x$references <- unique(c(x$references, y$references)) + xy_merged$references <- unique(c(x$references, y$references)) - x <- x %>% + xy_merged <- xy_merged %>% update_spatial() %>% update_temporal() %>% update_taxonomic() - return(x) + return(xy_merged) } From 3c25c05d21e995a6f5f5f3980ed53ecc394af11e Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Tue, 8 Oct 2024 14:04:11 +0200 Subject: [PATCH 086/142] rename merged DP --- tests/testthat/test-merge_camtrapdp.R | 126 +++++++++++++------------- 1 file changed, 63 insertions(+), 63 deletions(-) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index c2382525..460d381e 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -48,11 +48,11 @@ test_that("merge_camtrapdp() returns unique deploymentIDs, mediaIDs and y <- example_dataset() x$id <- "1" y$id <- "2" - merged_xy <- merge_camtrapdp(x, y) + xy_merged <- merge_camtrapdp(x, y) - deploymentIDs <- purrr::pluck(deployments(merged_xy), "deploymentID") - mediaIDs <- purrr::pluck(media(merged_xy), "mediaID") - observationIDs <- purrr::pluck(observations(merged_xy), "observationID") + deploymentIDs <- purrr::pluck(deployments(xy_merged), "deploymentID") + mediaIDs <- purrr::pluck(media(xy_merged), "mediaID") + observationIDs <- purrr::pluck(observations(xy_merged), "observationID") expect_false(any(duplicated(deploymentIDs))) expect_false(any(duplicated(mediaIDs))) @@ -69,31 +69,31 @@ test_that("merge_camtrapdp() adds prefixes to all values of identifiers y$id <- "2" # Default prefixes - merged_xy_default <- merge_camtrapdp(x, y) - expect_true("1_00a2c20d" %in% deployments(merged_xy_default)$deploymentID) - expect_true("2_00a2c20d" %in% deployments(merged_xy_default)$deploymentID) + xy_merged_default <- merge_camtrapdp(x, y) + expect_true("1_00a2c20d" %in% deployments(xy_merged_default)$deploymentID) + expect_true("2_00a2c20d" %in% deployments(xy_merged_default)$deploymentID) # Custom prefixes - merged_xy <- merge_camtrapdp(x, y, prefix = c("project1", "project2")) + xy_merged <- merge_camtrapdp(x, y, prefix = c("project1", "project2")) # deploymentID - expect_true("project1_00a2c20d" %in% deployments(merged_xy)$deploymentID) - expect_true("project2_00a2c20d" %in% deployments(merged_xy)$deploymentID) - expect_true("project1_00a2c20d" %in% media(merged_xy)$deploymentID) - expect_true("project1_00a2c20d" %in% observations(merged_xy)$deploymentID) + expect_true("project1_00a2c20d" %in% deployments(xy_merged)$deploymentID) + expect_true("project2_00a2c20d" %in% deployments(xy_merged)$deploymentID) + expect_true("project1_00a2c20d" %in% media(xy_merged)$deploymentID) + expect_true("project1_00a2c20d" %in% observations(xy_merged)$deploymentID) # mediaID - expect_true("project1_07840dcc" %in% media(merged_xy)$mediaID) - expect_true("project1_07840dcc" %in% observations(merged_xy)$mediaID) - expect_false("project1_NA" %in% observations(merged_xy)$mediaID) - expect_true(NA %in% observations(merged_xy)$mediaID) + expect_true("project1_07840dcc" %in% media(xy_merged)$mediaID) + expect_true("project1_07840dcc" %in% observations(xy_merged)$mediaID) + expect_false("project1_NA" %in% observations(xy_merged)$mediaID) + expect_true(NA %in% observations(xy_merged)$mediaID) # observationID - expect_true("project1_705e6036" %in% observations(merged_xy)$observationID) + expect_true("project1_705e6036" %in% observations(xy_merged)$observationID) # eventID - expect_true("project1_4bb69c45" %in% media(merged_xy)$eventID) - expect_true("project1_4bb69c45" %in% observations(merged_xy)$eventID) + expect_true("project1_4bb69c45" %in% media(xy_merged)$eventID) + expect_true("project1_4bb69c45" %in% observations(xy_merged)$eventID) }) test_that("merge_camtrapdp() returns the expected metadata", { @@ -102,30 +102,30 @@ test_that("merge_camtrapdp() returns the expected metadata", { y <- example_dataset() x$id <- "1" y$id <- "2" - merged_xy <- merge_camtrapdp(x, y) + xy_merged <- merge_camtrapdp(x, y) # Check metadata - expect_identical(merged_xy$resources, x$resources) - expect_identical(merged_xy$profile, "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/camtrap-dp-profile.json") - expect_identical(merged_xy$name, NA) - expect_identical(merged_xy$id, NA) - expect_identical(merged_xy$title, NA) - expect_identical(merged_xy$contributors, x$contributors) + expect_identical(xy_merged$resources, x$resources) + expect_identical(xy_merged$profile, "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/camtrap-dp-profile.json") + expect_identical(xy_merged$name, NA) + expect_identical(xy_merged$id, NA) + expect_identical(xy_merged$title, NA) + expect_identical(xy_merged$contributors, x$contributors) # no test for description - expect_identical(merged_xy$version, "1.0") - expect_identical(merged_xy$keywords, x$keywords) - expect_identical(merged_xy$image, NULL) - expect_identical(merged_xy$homepage, NULL) - expect_identical(merged_xy$sources, x$sources) - expect_equal(merged_xy$licenses, x$licenses) # fails because remove_duplicates switches order of subelements - expect_identical(merged_xy$bibliographicCitation, NULL) - expect_identical(merged_xy$projects, list(x$project, y$project)) - expect_identical(merged_xy$coordinatePrecision, x$coordinatePrecision) - expect_identical(merged_xy$spatial, x$spatial) - expect_identical(merged_xy$temporal, x$temporal) - expect_identical(merged_xy$taxonomic, x$taxonomic) - expect_identical(merged_xy$references, x$references) - expect_identical(merged_xy$directory, x$directory) + expect_identical(xy_merged$version, "1.0") + expect_identical(xy_merged$keywords, x$keywords) + expect_identical(xy_merged$image, NULL) + expect_identical(xy_merged$homepage, NULL) + expect_identical(xy_merged$sources, x$sources) + expect_equal(xy_merged$licenses, x$licenses) # fails because remove_duplicates switches order of subelements + expect_identical(xy_merged$bibliographicCitation, NULL) + expect_identical(xy_merged$projects, list(x$project, y$project)) + expect_identical(xy_merged$coordinatePrecision, x$coordinatePrecision) + expect_identical(xy_merged$spatial, x$spatial) + expect_identical(xy_merged$temporal, x$temporal) + expect_identical(xy_merged$taxonomic, x$taxonomic) + expect_identical(xy_merged$references, x$references) + expect_identical(xy_merged$directory, x$directory) relatedIdentifiers_merged <- list( list( @@ -154,7 +154,7 @@ test_that("merge_camtrapdp() returns the expected metadata", { ) ) - expect_identical(merged_xy$relatedIdentifiers, relatedIdentifiers_merged) + expect_identical(xy_merged$relatedIdentifiers, relatedIdentifiers_merged) # Check data }) @@ -175,7 +175,7 @@ test_that("merge_camtrapdp() returns the expected metadata when merging two y <- read_camtrapdp(datapackage_file) - merged_xy <- merge_camtrapdp(x, y) + xy_merged <- merge_camtrapdp(x, y) # Check metadata profile <- "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/camtrap-dp-profile.json" @@ -306,26 +306,26 @@ test_that("merge_camtrapdp() returns the expected metadata when merging two ) ) - expect_identical(merged_xy$resources, x$resources) - expect_identical(merged_xy$profile, profile) - expect_identical(merged_xy$name, NA) - expect_identical(merged_xy$id, NA) - expect_identical(merged_xy$title, NA) - expect_identical(merged_xy$contributors, contributors) + expect_identical(xy_merged$resources, x$resources) + expect_identical(xy_merged$profile, profile) + expect_identical(xy_merged$name, NA) + expect_identical(xy_merged$id, NA) + expect_identical(xy_merged$title, NA) + expect_identical(xy_merged$contributors, contributors) # no test for description - expect_identical(merged_xy$version, "1.0") - expect_identical(merged_xy$keywords, c(x$keywords, y$keywords)) - expect_identical(merged_xy$image, NULL) - expect_identical(merged_xy$homepage, NULL) - expect_identical(merged_xy$sources, sources) - expect_equal(merged_xy$licenses, licenses) - expect_identical(merged_xy$bibliographicCitation, NULL) - expect_identical(merged_xy$projects, list(x$project, y$project)) - expect_identical(merged_xy$coordinatePrecision, coordinatePrecision) - expect_identical(merged_xy$spatial, spatial) - expect_identical(merged_xy$temporal, temporal) - expect_identical(merged_xy$taxonomic, taxonomic) - expect_identical(merged_xy$references, references) - # expect_identical(merged_xy$directory, directory) - expect_identical(merged_xy$relatedIdentifiers, relatedIdentifiers_merged) + expect_identical(xy_merged$version, "1.0") + expect_identical(xy_merged$keywords, c(x$keywords, y$keywords)) + expect_identical(xy_merged$image, NULL) + expect_identical(xy_merged$homepage, NULL) + expect_identical(xy_merged$sources, sources) + expect_equal(xy_merged$licenses, licenses) + expect_identical(xy_merged$bibliographicCitation, NULL) + expect_identical(xy_merged$projects, list(x$project, y$project)) + expect_identical(xy_merged$coordinatePrecision, coordinatePrecision) + expect_identical(xy_merged$spatial, spatial) + expect_identical(xy_merged$temporal, temporal) + expect_identical(xy_merged$taxonomic, taxonomic) + expect_identical(xy_merged$references, references) + # expect_identical(xy_merged$directory, directory) + expect_identical(xy_merged$relatedIdentifiers, relatedIdentifiers_merged) }) From 13d9b445705e1604d554716e6271446be8398141 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Tue, 8 Oct 2024 16:21:50 +0200 Subject: [PATCH 087/142] add tests on custom prefixes --- tests/testthat/test-merge_camtrapdp.R | 87 ++++++++++++++++++++------- 1 file changed, 65 insertions(+), 22 deletions(-) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index 460d381e..d4af7007 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -23,7 +23,6 @@ test_that("merge_camtrapdp() returns error on invalid prefix", { y <- example_dataset() x$id <- "1" y$id <- "2" - expect_error( merge_camtrapdp(x, y, prefix = c(1, 2)), class = "camtrapdp_error_prefix_invalid" @@ -36,9 +35,16 @@ test_that("merge_camtrapdp() returns error on invalid prefix", { merge_camtrapdp(x, y, prefix = c("one", NA)), class = "camtrapdp_error_prefix_NA" ) + + expect_no_error(merge_camtrapdp(x, y)) expect_no_error(merge_camtrapdp(x, y, prefix = c("this", "works"))) - prefix_ids <- c(x$id, y$id) - expect_no_error(merge_camtrapdp(x, y, prefix = prefix_ids)) + + x$id <- NULL + y$id <- NULL + expect_error( + merge_camtrapdp(x, y), + class = "camtrapdp_error_prefix_invalid" + ) }) test_that("merge_camtrapdp() returns unique deploymentIDs, mediaIDs and @@ -59,7 +65,7 @@ test_that("merge_camtrapdp() returns unique deploymentIDs, mediaIDs and expect_false(any(duplicated(observationIDs))) }) -test_that("merge_camtrapdp() adds prefixes to all values of identifiers +test_that("merge_camtrapdp() adds default prefixes to all values of identifiers (deploymentID, mediaID, observationID and eventID) with duplicates between packages, but not for mediaID = NA", { skip_if_offline() @@ -67,36 +73,73 @@ test_that("merge_camtrapdp() adds prefixes to all values of identifiers y <- example_dataset() x$id <- "1" y$id <- "2" + xy_merged <- merge_camtrapdp(x, y) + + # deploymentID + expect_true("1_00a2c20d" %in% deployments(xy_merged)$deploymentID) + expect_true("2_00a2c20d" %in% deployments(xy_merged)$deploymentID) + expect_true("1_00a2c20d" %in% media(xy_merged)$deploymentID) + expect_true("2_00a2c20d" %in% media(xy_merged)$deploymentID) + expect_true("1_00a2c20d" %in% observations(xy_merged)$deploymentID) + expect_true("2_00a2c20d" %in% observations(xy_merged)$deploymentID) - # Default prefixes - xy_merged_default <- merge_camtrapdp(x, y) - expect_true("1_00a2c20d" %in% deployments(xy_merged_default)$deploymentID) - expect_true("2_00a2c20d" %in% deployments(xy_merged_default)$deploymentID) + # mediaID + expect_true("1_07840dcc" %in% media(xy_merged)$mediaID) + expect_true("2_07840dcc" %in% media(xy_merged)$mediaID) + expect_true("1_07840dcc" %in% observations(xy_merged)$mediaID) + expect_true("2_07840dcc" %in% observations(xy_merged)$mediaID) + expect_false("1_NA" %in% observations(xy_merged)$mediaID) + expect_true(NA %in% observations(xy_merged)$mediaID) - # Custom prefixes - xy_merged <- merge_camtrapdp(x, y, prefix = c("project1", "project2")) + # observationID + expect_true("1_705e6036" %in% observations(xy_merged)$observationID) + expect_true("2_705e6036" %in% observations(xy_merged)$observationID) + + # eventID + expect_true("1_4bb69c45" %in% media(xy_merged)$eventID) + expect_true("2_4bb69c45" %in% media(xy_merged)$eventID) + expect_true("1_4bb69c45" %in% observations(xy_merged)$eventID) + expect_true("2_4bb69c45" %in% observations(xy_merged)$eventID) +}) + +test_that("merge_camtrapdp() adds custom prefixes to all values of identifiers + (deploymentID, mediaID, observationID and eventID) with duplicates + between packages, but not for mediaID = NA", { + skip_if_offline() + x <- example_dataset() + y <- example_dataset() + x$id <- NULL + y$id <- NULL + xy_merged <- merge_camtrapdp(x, y, prefix = c("x", "y")) # deploymentID - expect_true("project1_00a2c20d" %in% deployments(xy_merged)$deploymentID) - expect_true("project2_00a2c20d" %in% deployments(xy_merged)$deploymentID) - expect_true("project1_00a2c20d" %in% media(xy_merged)$deploymentID) - expect_true("project1_00a2c20d" %in% observations(xy_merged)$deploymentID) + expect_true("x_00a2c20d" %in% deployments(xy_merged)$deploymentID) + expect_true("y_00a2c20d" %in% deployments(xy_merged)$deploymentID) + expect_true("x_00a2c20d" %in% media(xy_merged)$deploymentID) + expect_true("y_00a2c20d" %in% media(xy_merged)$deploymentID) + expect_true("x_00a2c20d" %in% observations(xy_merged)$deploymentID) + expect_true("y_00a2c20d" %in% observations(xy_merged)$deploymentID) # mediaID - expect_true("project1_07840dcc" %in% media(xy_merged)$mediaID) - expect_true("project1_07840dcc" %in% observations(xy_merged)$mediaID) - expect_false("project1_NA" %in% observations(xy_merged)$mediaID) + expect_true("x_07840dcc" %in% media(xy_merged)$mediaID) + expect_true("y_07840dcc" %in% media(xy_merged)$mediaID) + expect_true("x_07840dcc" %in% observations(xy_merged)$mediaID) + expect_true("y_07840dcc" %in% observations(xy_merged)$mediaID) + expect_false("x_NA" %in% observations(xy_merged)$mediaID) expect_true(NA %in% observations(xy_merged)$mediaID) # observationID - expect_true("project1_705e6036" %in% observations(xy_merged)$observationID) + expect_true("x_705e6036" %in% observations(xy_merged)$observationID) + expect_true("y_705e6036" %in% observations(xy_merged)$observationID) # eventID - expect_true("project1_4bb69c45" %in% media(xy_merged)$eventID) - expect_true("project1_4bb69c45" %in% observations(xy_merged)$eventID) + expect_true("x_4bb69c45" %in% media(xy_merged)$eventID) + expect_true("y_4bb69c45" %in% media(xy_merged)$eventID) + expect_true("x_4bb69c45" %in% observations(xy_merged)$eventID) + expect_true("y_4bb69c45" %in% observations(xy_merged)$eventID) }) -test_that("merge_camtrapdp() returns the expected metadata", { +test_that("merge_camtrapdp() returns the expected metadata ", { skip_if_offline() x <- example_dataset() y <- example_dataset() @@ -318,7 +361,7 @@ test_that("merge_camtrapdp() returns the expected metadata when merging two expect_identical(xy_merged$image, NULL) expect_identical(xy_merged$homepage, NULL) expect_identical(xy_merged$sources, sources) - expect_equal(xy_merged$licenses, licenses) + expect_identical(xy_merged$licenses, licenses) expect_identical(xy_merged$bibliographicCitation, NULL) expect_identical(xy_merged$projects, list(x$project, y$project)) expect_identical(xy_merged$coordinatePrecision, coordinatePrecision) From 8822b7e0507770a1f54396870276f221da1e6e0e Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Tue, 8 Oct 2024 16:40:17 +0200 Subject: [PATCH 088/142] Update test-merge_camtrapdp.R --- tests/testthat/test-merge_camtrapdp.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index d4af7007..12a54ea2 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -50,8 +50,10 @@ test_that("merge_camtrapdp() returns error on invalid prefix", { test_that("merge_camtrapdp() returns unique deploymentIDs, mediaIDs and observationIDs", { skip_if_offline() - x <- example_dataset() - y <- example_dataset() + x <- example_dataset() %>% + filter_deployments(deploymentID %in% c("00a2c20d", "29b7d356")) + y <- example_dataset() %>% + filter_deployments(deploymentID %in% c("577b543a", "62c200a9")) x$id <- "1" y$id <- "2" xy_merged <- merge_camtrapdp(x, y) @@ -63,6 +65,9 @@ test_that("merge_camtrapdp() returns unique deploymentIDs, mediaIDs and expect_false(any(duplicated(deploymentIDs))) expect_false(any(duplicated(mediaIDs))) expect_false(any(duplicated(observationIDs))) + + expect_true("00a2c20d" %in% deployments(xy_merged)$deploymentID) + expect_true("577b543a" %in% deployments(xy_merged)$deploymentID) }) test_that("merge_camtrapdp() adds default prefixes to all values of identifiers From 156d6d88fb73de73dd409f6cde506b0c79ab2c28 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Tue, 8 Oct 2024 16:56:11 +0200 Subject: [PATCH 089/142] add test on piping --- tests/testthat/test-merge_camtrapdp.R | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index 12a54ea2..64200498 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -377,3 +377,27 @@ test_that("merge_camtrapdp() returns the expected metadata when merging two # expect_identical(xy_merged$directory, directory) expect_identical(xy_merged$relatedIdentifiers, relatedIdentifiers_merged) }) + +test_that("merge_camtrapdp() can be used in a pipe to merge multiple + camtrap DP", { + skip_if_offline() + + temp_dir <- tempdir() + on.exit(unlink(temp_dir, recursive = TRUE)) + zip_file <- file.path(temp_dir, "dataset.zip") + datapackage_file <- file.path(temp_dir, "datapackage.json") + url <- "https://ipt.nlbif.nl/archive.do?r=awd_pilot2" + + download.file(url, zip_file, mode = 'wb') + unzip(zip_file, exdir = temp_dir) + + x <- read_camtrapdp(datapackage_file) + y <- example_dataset() %>% + filter_deployments(deploymentID %in% c("00a2c20d", "29b7d356")) + z <- example_dataset() %>% + filter_deployments(deploymentID %in% c("577b543a", "62c200a9")) + y$id <- "y" + z$id <- "z" + + expect_no_error(x %>% merge_camtrapdp(y) %>% merge_camtrapdp(z)) +}) From a9c07d980f1faf54744343c23bd7565e8289f6f6 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Tue, 8 Oct 2024 16:56:23 +0200 Subject: [PATCH 090/142] set id to NULL instead of NA --- R/merge_camtrapdp.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index a8945692..838bf5cc 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -71,7 +71,7 @@ merge_camtrapdp <- function(x, y, prefix = c(x$id, y$id)) { # Merge/update metadata xy_merged$name <- NA - xy_merged$id <- NA + xy_merged$id <- NULL xy_merged$created <- format(Sys.time(), "%Y-%m-%dT%H:%M:%SZ") xy_merged$title <- NA xy_merged$contributors <- remove_duplicates(c(x$contributors, y$contributors)) From b6b8c687564000b1a23d6080e41bc70e0879f2b3 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Tue, 8 Oct 2024 17:04:58 +0200 Subject: [PATCH 091/142] add tests for description --- tests/testthat/test-merge_camtrapdp.R | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index 64200498..eff9719e 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -159,7 +159,10 @@ test_that("merge_camtrapdp() returns the expected metadata ", { expect_identical(xy_merged$id, NA) expect_identical(xy_merged$title, NA) expect_identical(xy_merged$contributors, x$contributors) - # no test for description + expect_identical( + xy_merged$description, + paste(x$description, y$description, sep = "/n") + ) expect_identical(xy_merged$version, "1.0") expect_identical(xy_merged$keywords, x$keywords) expect_identical(xy_merged$image, NULL) @@ -289,6 +292,8 @@ test_that("merge_camtrapdp() returns the expected metadata when merging two ) ) + description <- "MICA - Muskrat and coypu camera trap observations in Belgium, the Netherlands and Germany is an occurrence dataset published by the Research Institute of Nature and Forest (INBO). It is part of the LIFE project MICA, in which innovative techniques are tested for a more efficient control of muskrat and coypu populations, both invasive species. This dataset is a sample of the original dataset and serves as an example of a Camera Trap Data Package (Camtrap DP)./nCamera trap pilot 2 was a test of the difference in species detection and data accumulation between a Snyper Commander camera with a regular lens (52°) and one with a wide lens (100°). The cameras were deployed at 30 cm above the ground within the herbivore exclosure Zeeveld Noord in the Amsterdam Water Supply Dunes from 14th of August 2021 to 24th of September 2021. During this pilot, a solar panel failure caused the cameras to stop recording data from the 24th of August 2021 to the 6th of September (14 days). During annotation, only days in which both cameras were operational were annotated. This led to a total of 1,113 images over 28 days from the two cameras. A detailed description of the dataset can be found in a data paper published in the journal Data in Brief (Evans et al. 2024, https://doi.org/10.1016/j.dib.2024.110544)." + sources <- list( list( title = "Agouti", @@ -360,7 +365,10 @@ test_that("merge_camtrapdp() returns the expected metadata when merging two expect_identical(xy_merged$id, NA) expect_identical(xy_merged$title, NA) expect_identical(xy_merged$contributors, contributors) - # no test for description + expect_identical( + xy_merged$description, + paste(x$description, y$description, sep = "/n") + ) expect_identical(xy_merged$version, "1.0") expect_identical(xy_merged$keywords, c(x$keywords, y$keywords)) expect_identical(xy_merged$image, NULL) From f35e46dcd34c80f463f9b8323b48a8ea712509e5 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Tue, 8 Oct 2024 17:06:07 +0200 Subject: [PATCH 092/142] id is set to NULL instead of NA --- tests/testthat/test-merge_camtrapdp.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index eff9719e..83a7dbe4 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -156,7 +156,7 @@ test_that("merge_camtrapdp() returns the expected metadata ", { expect_identical(xy_merged$resources, x$resources) expect_identical(xy_merged$profile, "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/camtrap-dp-profile.json") expect_identical(xy_merged$name, NA) - expect_identical(xy_merged$id, NA) + expect_identical(xy_merged$id, NULL) expect_identical(xy_merged$title, NA) expect_identical(xy_merged$contributors, x$contributors) expect_identical( @@ -362,7 +362,7 @@ test_that("merge_camtrapdp() returns the expected metadata when merging two expect_identical(xy_merged$resources, x$resources) expect_identical(xy_merged$profile, profile) expect_identical(xy_merged$name, NA) - expect_identical(xy_merged$id, NA) + expect_identical(xy_merged$id, NULL) expect_identical(xy_merged$title, NA) expect_identical(xy_merged$contributors, contributors) expect_identical( From 3c4bb10db0308c236a0452d202b4eaca4467af18 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Wed, 9 Oct 2024 10:23:54 +0200 Subject: [PATCH 093/142] taxonomic scope should also be updated in filter_deployments() --- R/filter_deployments.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/filter_deployments.R b/R/filter_deployments.R index ee8c6e86..0102b929 100644 --- a/R/filter_deployments.R +++ b/R/filter_deployments.R @@ -69,5 +69,8 @@ filter_deployments <- function(x, ...) { update_temporal() %>% update_spatial() + # Update taxonomic scope in metadata + x <- update_taxonomic(x) + return(x) } From 633171b66f77c1ca94320cd18187ef54a855e85f Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Wed, 9 Oct 2024 10:49:50 +0200 Subject: [PATCH 094/142] set directory --- R/merge_camtrapdp.R | 1 + tests/testthat/test-merge_camtrapdp.R | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 838bf5cc..76c98cc9 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -114,6 +114,7 @@ merge_camtrapdp <- function(x, y, prefix = c(x$id, y$id)) { ) xy_merged$references <- unique(c(x$references, y$references)) + xy_merged$directory <- "." xy_merged <- xy_merged %>% update_spatial() %>% diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index 83a7dbe4..df6a6562 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -382,7 +382,7 @@ test_that("merge_camtrapdp() returns the expected metadata when merging two expect_identical(xy_merged$temporal, temporal) expect_identical(xy_merged$taxonomic, taxonomic) expect_identical(xy_merged$references, references) - # expect_identical(xy_merged$directory, directory) + expect_identical(xy_merged$directory, ".") expect_identical(xy_merged$relatedIdentifiers, relatedIdentifiers_merged) }) From d9943ce7d6002c10a816ee57e877c335038ca9ee Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Wed, 9 Oct 2024 10:55:23 +0200 Subject: [PATCH 095/142] update documentation --- R/filter_deployments.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/filter_deployments.R b/R/filter_deployments.R index 0102b929..48d8586a 100644 --- a/R/filter_deployments.R +++ b/R/filter_deployments.R @@ -6,8 +6,8 @@ #' #' - Media are filtered on associated `deploymentID`. #' - Observations are filtered on associated `deploymentID`. -#' - Metadata (`x$temporal` and `x$spatial`) are updated to match the filtered -#' deployments. +#' - Metadata (`x$temporal`, `x$spatial` and `x$taxonomic`) are updated to +#' match the filtered deployments. #' #' @inheritParams print.camtrapdp #' @param ... Filtering conditions, see `dplyr::filter()`. From 56454a849aafe704277694110efb502da1203aeb Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Wed, 16 Oct 2024 17:44:07 +0200 Subject: [PATCH 096/142] Update test-merge_camtrapdp.R --- tests/testthat/test-merge_camtrapdp.R | 121 +++++++++++++++++++++++++- 1 file changed, 117 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index df6a6562..0945f70f 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -152,6 +152,12 @@ test_that("merge_camtrapdp() returns the expected metadata ", { y$id <- "2" xy_merged <- merge_camtrapdp(x, y) + # Can't compare with x$licenses because remove_duplicates switches order of + # subelements + licenses <- list( + list(name = "CC0-1.0", scope = "data"), + list(scope = "media", path = "http://creativecommons.org/licenses/by/4.0/")) + # Check metadata expect_identical(xy_merged$resources, x$resources) expect_identical(xy_merged$profile, "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/camtrap-dp-profile.json") @@ -168,7 +174,7 @@ test_that("merge_camtrapdp() returns the expected metadata ", { expect_identical(xy_merged$image, NULL) expect_identical(xy_merged$homepage, NULL) expect_identical(xy_merged$sources, x$sources) - expect_equal(xy_merged$licenses, x$licenses) # fails because remove_duplicates switches order of subelements + expect_identical(xy_merged$licenses, licenses) expect_identical(xy_merged$bibliographicCitation, NULL) expect_identical(xy_merged$projects, list(x$project, y$project)) expect_identical(xy_merged$coordinatePrecision, x$coordinatePrecision) @@ -176,7 +182,7 @@ test_that("merge_camtrapdp() returns the expected metadata ", { expect_identical(xy_merged$temporal, x$temporal) expect_identical(xy_merged$taxonomic, x$taxonomic) expect_identical(xy_merged$references, x$references) - expect_identical(xy_merged$directory, x$directory) + expect_identical(xy_merged$directory, ".") relatedIdentifiers_merged <- list( list( @@ -215,17 +221,17 @@ test_that("merge_camtrapdp() returns the expected metadata when merging two skip_if_offline() x <- example_dataset() + # Download second Camera Trap Data package temp_dir <- tempdir() on.exit(unlink(temp_dir, recursive = TRUE)) zip_file <- file.path(temp_dir, "dataset.zip") datapackage_file <- file.path(temp_dir, "datapackage.json") url <- "https://ipt.nlbif.nl/archive.do?r=awd_pilot2" - download.file(url, zip_file, mode = 'wb') unzip(zip_file, exdir = temp_dir) - y <- read_camtrapdp(datapackage_file) + # Merge xy_merged <- merge_camtrapdp(x, y) # Check metadata @@ -330,6 +336,113 @@ test_that("merge_camtrapdp() returns the expected metadata when merging two temporal <- list(start = "2020-05-30", end = "2022-03-18") + taxonomic <- list( + list( + scientificName = "Anas platyrhynchos", + taxonID = "https://www.checklistbank.org/dataset/COL2023/taxon/DGP6", + taxonRank = "species", + family = NA_character_, + order. = NA_character_, + vernacularNames = list(eng = "mallard", nld = "wilde eend") + ), + list( + scientificName = "Anas strepera", + taxonID = "https://www.checklistbank.org/dataset/COL2023/taxon/DGPL", + taxonRank = "species", + family = NA_character_, + order. = NA_character_, + vernacularNames = list(eng = "gadwall", nld = "krakeend") + ), + list( + scientificName = "Apodemus sylvaticus", + taxonID = "https://www.checklistbank.org/dataset/COL2023/taxon/FRJJ", + taxonRank = "species", + family = "Muridae", + order. = "Rodentia", + vernacularNames = list(eng = "wood mouse", nld = "bosmuis") + ), + list( + scientificName = "Ardea", + taxonID = "https://www.checklistbank.org/dataset/COL2023/taxon/32FH", + taxonRank = "genus", + family = NA_character_, + order. = NA_character_, + vernacularNames = list(eng = "great herons", nld = "reigers") + ), + list( + scientificName = "Ardea cinerea", + taxonID = "https://www.checklistbank.org/dataset/COL2023/taxon/GCHS", + taxonRank = "species", + family = NA_character_, + order. = NA_character_, + vernacularNames = list(eng = "grey heron", nld = "blauwe reiger") + ), + list( + scientificName = "Aves", + taxonID = "https://www.checklistbank.org/dataset/COL2023/taxon/V2", + taxonRank = "class", + family = NA_character_, + order. = NA_character_, + vernacularNames = list(eng = "bird sp.", nld = "vogel") + ), + list( + scientificName = "Corvus corone", + taxonID = "https://www.checklistbank.org/dataset/COL2023/taxon/YNHJ", + taxonRank = "species", + family = "Corvidae", + order. = "Passeriformes", + vernacularNames = list(eng = "carrion crow", nld = "zwarte kraai") + ), + list( + scientificName = "Homo sapiens", + taxonID = "https://www.checklistbank.org/dataset/COL2023/taxon/6MB3T", + taxonRank = "species", + family = "Hominidae", + order. = "Primates", + vernacularNames = list(eng = "human", nld = "mens") + ), + list( + scientificName = "Martes foina", + taxonID = "https://www.checklistbank.org/dataset/COL2023/taxon/3Y9VW", + taxonRank = "species", + family = NA_character_, + order. = NA_character_, + vernacularNames = list(eng = "beech marten", nld = "steenmarter") + ), + list( + scientificName = "Mustela putorius", + taxonID = "https://www.checklistbank.org/dataset/COL2023/taxon/44QYC", + taxonRank = "species", + family = NA_character_, + order. = NA_character_, + vernacularNames = list(eng = "European polecat", nld = "bunzing") + ), + list( + scientificName = "Oryctolagus cuniculus", + taxonID = "https://www.checklistbank.org/dataset/COL2023/taxon/74ZBP", + taxonRank = "species", + family = "Leporidae", + order. = "Lagomorpha", + vernacularNames = list(eng = "European rabbit", nld = "Europees konijn") + ), + list( + scientificName = "Rattus norvegicus", + taxonID = "https://www.checklistbank.org/dataset/COL2023/taxon/4RM67", + taxonRank = "species", + family = NA_character_, + order. = NA_character_, + vernacularNames = list(eng = "brown rat", nld = "bruine rat") + ), + list( + scientificName = "Vulpes vulpes", + taxonID = "https://www.checklistbank.org/dataset/COL2023/taxon/5BSG3", + taxonRank = "species", + family = "Canidae", + order. = "Carnivora", + vernacularNames = list(eng = "red fox", nld = "vos") + ) + ) + references <- list("Evans, J.C., Zilber, R., & Kissling, W.D. (2024). Data from three camera trapping pilots in the Amsterdam Water Supply Dunes of the Netherlands. Data in Brief, 54, 110544. https://doi.org/10.1016/j.dib.2024.110544") relatedIdentifiers_merged <- list( From 913eb9894b9d70e90ca8aa3f6a11ff3e3880abc4 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Wed, 16 Oct 2024 17:52:41 +0200 Subject: [PATCH 097/142] Update NEWS.md --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 49ddb9cc..ce044acc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,7 @@ * New function `write_eml()` transforms Camtrap DP metadata to EML (#61). * New function `round_coordinates()` allows to fuzzy/generalize location information by rounding deployment `latitude` and `longitude`. It also updates `coordinateUncertainty` in the deployments and `coordinatePrecision` and spatial scope in the metadata (#85). * New function `shift_time()` allows to shift/correct date-times in data and metadata for specified deploymentIDs and duration (#105). +* New function `merge_camtrapdp()` merges two Camera Trap Data packages (#112). * `filter_deployments()` and `deployments()<-` now update the spatial, temporal and taxonomic scope in the metadata based on the returned data (#72, #111, #132). * `filter_observations()`, `filter_media()`, `media()<-` and `observations()<-` now update the taxonomic scope in the metadata based on the returned data (#73, #111). * `read_camtrapdp()` now updates the spatial and temporal scope in metadata based on the data (#130). From f8327b63b07ebda3530f3fc202443fb452f0fbb7 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Wed, 16 Oct 2024 18:12:44 +0200 Subject: [PATCH 098/142] add documentation --- R/merge_camtrapdp.R | 30 +++++++++++++++++++++++- man/merge_camtrapdp.Rd | 53 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 75 insertions(+), 8 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 76c98cc9..e2d0c392 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -2,13 +2,41 @@ #' #' @param x,y Camera Trap Data Package objects (as returned by #' `read_camtrapdp()`), to be coerced to one. -#' @param prefix If there are duplicate IDs in x an y, these prefixes will be +#' @param prefix If there are duplicate IDs between x and y, prefixes will be #' added to all the values of each identifier with duplicates, to disambiguate #' them. Should be a character vector of length 2. By default, the prefixes are #' the ID's of the Data Package. #' @return `x` #' @family transformation functions #' @export +#' @section Merging details: +#' Deployments, media and observations are combined. If there are duplicate IDs +#' between x and y, prefixes will be added to all the values of each identifier +#' with duplicates, to disambiguate them. +#' The following properties are set: +#' - **name**: Set to NA. +#' - **id**: Set to NULL. +#' - **created**: Set to current timestamp. +#' - **title**: Set to NA. +#' - **contributors**: A combination is made and duplicates are removed. +#' - **description**: A combination is made. +#' - **version**: Set to 1.0. +#' - **keywords**: A combination is made and duplicates are removed. +#' - **image**: Set to NULL. +#' - **homepage**: Set to NULL. +#' - **sources**: A combination is made and duplicates are removed. +#' - **licenses**: A combination is made and duplicates are removed. +#' - **bibliographicCitation**: Set to NULL. +#' - **project**: List of the projects. +#' - **coordinatePrecision**: Set to the least precise `coordinatePrecision`. +#' - **spatial**: Reset based on the new deployments. +#' - **temporal**: Reset based on the new deployments. +#' - **taxonomic**: A combination is made and duplicates are removed. +#' - **relatedIdentifiers**: A combination is made and duplicates are removed. +#' - **references**: A combination is made and duplicates are removed. +#' @section Merging multiple Camera Trap Data Packages: +#' `merge_camtrapdp()` can be used in a pipe to merge multiple camtrap DP. +#' - x %>% merge_camtrapdp(y) %>% merge_camtrapdp(z) #' @examples #' x <- example_dataset() %>% #' filter_deployments(deploymentID %in% c("00a2c20d", "29b7d356")) diff --git a/man/merge_camtrapdp.Rd b/man/merge_camtrapdp.Rd index 537a9392..fe85fa04 100644 --- a/man/merge_camtrapdp.Rd +++ b/man/merge_camtrapdp.Rd @@ -4,15 +4,16 @@ \alias{merge_camtrapdp} \title{Merge Camera Trap Data packages} \usage{ -merge_camtrapdp(x1, x2, prefix = c("x.", "y.")) +merge_camtrapdp(x, y, prefix = c(x$id, y$id)) } \arguments{ -\item{x1, x2}{Camera Trap Data Package objects (as returned by +\item{x, y}{Camera Trap Data Package objects (as returned by \code{read_camtrapdp()}), to be coerced to one.} -\item{prefix}{If there are duplicate IDs in x1 an x2, these prefixes will be +\item{prefix}{If there are duplicate IDs between x and y, prefixes will be added to all the values of each identifier with duplicates, to disambiguate -them. Should be a character vector of length 2.} +them. Should be a character vector of length 2. By default, the prefixes are +the ID's of the Data Package.} } \value{ \code{x} @@ -20,12 +21,50 @@ them. Should be a character vector of length 2.} \description{ Merge Camera Trap Data packages } +\section{Merging details}{ + +Deployments, media and observations are combined. If there are duplicate IDs +between x and y, prefixes will be added to all the values of each identifier +with duplicates, to disambiguate them. +The following properties are set: +\itemize{ +\item \strong{name}: Set to NA. +\item \strong{id}: Set to NULL. +\item \strong{created}: Set to current timestamp. +\item \strong{title}: Set to NA. +\item \strong{contributors}: A combination is made and duplicates are removed. +\item \strong{description}: A combination is made. +\item \strong{version}: Set to 1.0. +\item \strong{keywords}: A combination is made and duplicates are removed. +\item \strong{image}: Set to NULL. +\item \strong{homepage}: Set to NULL. +\item \strong{sources}: A combination is made and duplicates are removed. +\item \strong{licenses}: A combination is made and duplicates are removed. +\item \strong{bibliographicCitation}: Set to NULL. +\item \strong{project}: List of the projects. +\item \strong{coordinatePrecision}: Set to the least precise \code{coordinatePrecision}. +\item \strong{spatial}: Reset based on the new deployments. +\item \strong{temporal}: Reset based on the new deployments. +\item \strong{taxonomic}: A combination is made and duplicates are removed. +\item \strong{relatedIdentifiers}: A combination is made and duplicates are removed. +\item \strong{references}: A combination is made and duplicates are removed. +} +} + +\section{Merging multiple Camera Trap Data Packages}{ + +\code{merge_camtrapdp()} can be used in a pipe to merge multiple camtrap DP. +\itemize{ +\item x \%>\% merge_camtrapdp(y) \%>\% merge_camtrapdp(z) +} +} + \examples{ -x1 <- example_dataset() \%>\% +x <- example_dataset() \%>\% filter_deployments(deploymentID \%in\% c("00a2c20d", "29b7d356")) -x2 <- example_dataset() \%>\% +y <- example_dataset() \%>\% filter_deployments(deploymentID \%in\% c("577b543a", "62c200a9")) -x_merged <- merge_camtrapdp(x1, x2) +xy_merged <- merge_camtrapdp(x, y) } \seealso{ Other transformation functions: From 99ebe0d6d678a09a2fcf91843336e80ef4bdad69 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Wed, 16 Oct 2024 18:13:37 +0200 Subject: [PATCH 099/142] update on project(s) --- R/merge_camtrapdp.R | 3 +-- tests/testthat/test-merge_camtrapdp.R | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index e2d0c392..d51938a2 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -110,8 +110,7 @@ merge_camtrapdp <- function(x, y, prefix = c(x$id, y$id)) { xy_merged$homepage <- NULL xy_merged$sources <- remove_duplicates(c(x$sources, y$sources)) xy_merged$licenses <- remove_duplicates(c(x$licenses, y$licenses)) - xy_merged$project <- NULL - xy_merged$projects <- list(x$project, y$project) + xy_merged$project <- list(x$project, y$project) xy_merged$bibliographicCitation <- NULL xy_merged$coordinatePrecision <- max(x$coordinatePrecision, y$coordinatePrecision, na.rm = TRUE) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index 0945f70f..d690c8a9 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -176,7 +176,7 @@ test_that("merge_camtrapdp() returns the expected metadata ", { expect_identical(xy_merged$sources, x$sources) expect_identical(xy_merged$licenses, licenses) expect_identical(xy_merged$bibliographicCitation, NULL) - expect_identical(xy_merged$projects, list(x$project, y$project)) + expect_identical(xy_merged$project, list(x$project, y$project)) expect_identical(xy_merged$coordinatePrecision, x$coordinatePrecision) expect_identical(xy_merged$spatial, x$spatial) expect_identical(xy_merged$temporal, x$temporal) @@ -489,7 +489,7 @@ test_that("merge_camtrapdp() returns the expected metadata when merging two expect_identical(xy_merged$sources, sources) expect_identical(xy_merged$licenses, licenses) expect_identical(xy_merged$bibliographicCitation, NULL) - expect_identical(xy_merged$projects, list(x$project, y$project)) + expect_identical(xy_merged$project, list(x$project, y$project)) expect_identical(xy_merged$coordinatePrecision, coordinatePrecision) expect_identical(xy_merged$spatial, spatial) expect_identical(xy_merged$temporal, temporal) From 637ee274bb945b6ba0570dfa5e6f2f36c816e8d5 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Wed, 16 Oct 2024 18:18:35 +0200 Subject: [PATCH 100/142] use x and y instead of x1 and x2 --- R/utils.R | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/R/utils.R b/R/utils.R index 648dd6a6..a8dea3ec 100644 --- a/R/utils.R +++ b/R/utils.R @@ -41,32 +41,32 @@ expand_cols <- function(df, colnames) { #' #' Checks for duplicated IDs in two Camera Trap Data Package objects combined. #' -#' @param x1,x2 Camera Trap Data Package objects (as returned by +#' @param x,y Camera Trap Data Package objects (as returned by #' `read_camtrapdp()`), to be coerced to one. #' @return List with logical for each type of ID, that indicates whether that -#' ID type has duplicates between x1 and x2. +#' ID type has duplicates between x and y. #' @family helper functions #' @noRd -check_duplicate_ids <- function(x1, x2) { +check_duplicate_ids <- function(x, y) { result = list( deploymentID = FALSE, mediaID = FALSE, observationID = FALSE, eventID = FALSE) deploymentIDs <- c( - unique(purrr::pluck(deployments(x1), "deploymentID")), - unique(purrr::pluck(deployments(x2), "deploymentID")) + unique(purrr::pluck(deployments(x), "deploymentID")), + unique(purrr::pluck(deployments(y), "deploymentID")) ) mediaIDs <- c( - unique(purrr::pluck(media(x1), "mediaID")), - unique(purrr::pluck(media(x2), "mediaID")) + unique(purrr::pluck(media(x), "mediaID")), + unique(purrr::pluck(media(y), "mediaID")) ) observationIDs <- c( - unique(purrr::pluck(observations(x1), "observationID")), - unique(purrr::pluck(observations(x2), "observationID")) + unique(purrr::pluck(observations(x), "observationID")), + unique(purrr::pluck(observations(y), "observationID")) ) eventIDs <- c( - unique(purrr::pluck(media(x1), "eventID")), - unique(purrr::pluck(media(x2), "eventID")) + unique(purrr::pluck(media(x), "eventID")), + unique(purrr::pluck(media(y), "eventID")) ) # Check for duplicates From 0ed0511fd5ede70e25d8820e51fad74b4a0ba95b Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Wed, 16 Oct 2024 18:20:32 +0200 Subject: [PATCH 101/142] fix example --- R/merge_camtrapdp.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index d51938a2..9562df93 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -42,6 +42,8 @@ #' filter_deployments(deploymentID %in% c("00a2c20d", "29b7d356")) #' y <- example_dataset() %>% #' filter_deployments(deploymentID %in% c("577b543a", "62c200a9")) +#' x$id <- "1" +#' y$id <- "2" #' xy_merged <- merge_camtrapdp(x, y) merge_camtrapdp <- function(x, y, prefix = c(x$id, y$id)) { check_camtrapdp(x) From e166bb6d85dcc0a4640eebad77d4072e712ad5c3 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Wed, 16 Oct 2024 18:22:16 +0200 Subject: [PATCH 102/142] add visible binding for global variables --- R/taxa.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/taxa.R b/R/taxa.R index 156dec9e..bb159680 100644 --- a/R/taxa.R +++ b/R/taxa.R @@ -23,7 +23,7 @@ taxa <- function(x) { dplyr::select("scientificName", dplyr::starts_with("taxon.")) %>% dplyr::distinct() %>% dplyr::rename_with(~ sub("^taxon.", "", .x)) %>% - dplyr::arrange(scientificName) + dplyr::arrange(.data$scientificName) # Remove duplicates without taxonID if ("taxonID" %in% names(taxa)) { @@ -42,13 +42,13 @@ taxa <- function(x) { # Remove duplicates with the least information duplicates_with_least_info <- taxa %>% - dplyr::mutate(columns_with_info = rowSums(!is.na(.))) %>% + dplyr::mutate(.data$columns_with_info = rowSums(!is.na(.))) %>% dplyr::group_by(.data$scientificName) %>% dplyr::filter(dplyr::n() > 1) %>% - dplyr::arrange(dplyr::desc(columns_with_info)) %>% + dplyr::arrange(dplyr::desc(.data$columns_with_info)) %>% dplyr::slice_tail(n = -1) %>% # Remove first row from group (with most info) dplyr::ungroup() %>% - dplyr::select(-columns_with_info) + dplyr::select(-.data$columns_with_info) taxa <- dplyr::anti_join( taxa, duplicates_with_least_info, From eea76b6077f41388708bc20fed50b6295a3e9455 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Wed, 16 Oct 2024 18:24:54 +0200 Subject: [PATCH 103/142] typo --- R/taxa.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/taxa.R b/R/taxa.R index bb159680..f8e95be3 100644 --- a/R/taxa.R +++ b/R/taxa.R @@ -42,7 +42,7 @@ taxa <- function(x) { # Remove duplicates with the least information duplicates_with_least_info <- taxa %>% - dplyr::mutate(.data$columns_with_info = rowSums(!is.na(.))) %>% + dplyr::mutate(columns_with_info = rowSums(!is.na(.))) %>% dplyr::group_by(.data$scientificName) %>% dplyr::filter(dplyr::n() > 1) %>% dplyr::arrange(dplyr::desc(.data$columns_with_info)) %>% From ccc479b422211a364c392f760b0474c73f040420 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Wed, 16 Oct 2024 18:29:25 +0200 Subject: [PATCH 104/142] document() --- man/merge_camtrapdp.Rd | 2 ++ 1 file changed, 2 insertions(+) diff --git a/man/merge_camtrapdp.Rd b/man/merge_camtrapdp.Rd index fe85fa04..299190c1 100644 --- a/man/merge_camtrapdp.Rd +++ b/man/merge_camtrapdp.Rd @@ -64,6 +64,8 @@ x <- example_dataset() \%>\% filter_deployments(deploymentID \%in\% c("00a2c20d", "29b7d356")) y <- example_dataset() \%>\% filter_deployments(deploymentID \%in\% c("577b543a", "62c200a9")) +x$id <- "1" +y$id <- "2" xy_merged <- merge_camtrapdp(x, y) } \seealso{ From 800a49c1a4c329818b89b5659a07597732588a86 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Wed, 16 Oct 2024 18:30:19 +0200 Subject: [PATCH 105/142] Update DESCRIPTION --- DESCRIPTION | 1 - 1 file changed, 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6a545d4f..0b935ff1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,7 +26,6 @@ URL: https://github.com/inbo/camtrapdp, https://inbo.github.io/camtrapdp/ BugReports: https://github.com/inbo/camtrapdp/issues Imports: cli, - digest, dplyr, EML, frictionless (>= 1.1.0), From e86a7b7e526bb68ddd5e225f92af32f5130aeefe Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Wed, 16 Oct 2024 18:33:58 +0200 Subject: [PATCH 106/142] avoid error on lacking visible binding --- R/taxa.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/taxa.R b/R/taxa.R index f8e95be3..518e8a9d 100644 --- a/R/taxa.R +++ b/R/taxa.R @@ -22,7 +22,7 @@ taxa <- function(x) { dplyr::filter(!is.na(.data$scientificName)) %>% dplyr::select("scientificName", dplyr::starts_with("taxon.")) %>% dplyr::distinct() %>% - dplyr::rename_with(~ sub("^taxon.", "", .x)) %>% + dplyr::rename_with(~ sub("^taxon.", "", .cols = everything())) %>% dplyr::arrange(.data$scientificName) # Remove duplicates without taxonID From dce31674300e98d065e0b894b9b4cc5e0a123e39 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Thu, 17 Oct 2024 10:42:05 +0200 Subject: [PATCH 107/142] undo mistake --- R/taxa.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/taxa.R b/R/taxa.R index 518e8a9d..f8e95be3 100644 --- a/R/taxa.R +++ b/R/taxa.R @@ -22,7 +22,7 @@ taxa <- function(x) { dplyr::filter(!is.na(.data$scientificName)) %>% dplyr::select("scientificName", dplyr::starts_with("taxon.")) %>% dplyr::distinct() %>% - dplyr::rename_with(~ sub("^taxon.", "", .cols = everything())) %>% + dplyr::rename_with(~ sub("^taxon.", "", .x)) %>% dplyr::arrange(.data$scientificName) # Remove duplicates without taxonID From b959be5b92014d096e355a6da1c3be27a0c0fab9 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 25 Oct 2024 15:34:17 +0200 Subject: [PATCH 108/142] check for additional resources --- R/merge_camtrapdp.R | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 9562df93..50c0bedf 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -93,12 +93,21 @@ merge_camtrapdp <- function(x, y, prefix = c(x$id, y$id)) { y <- add_prefix(y, results_duplicate_ids, paste0(prefix[2], "_")) } - # Merge resources + # Merge camtrap DP resources xy_merged <- x deployments(xy_merged) <- dplyr::bind_rows(deployments(x), deployments(y)) media(xy_merged) <- dplyr::bind_rows(media(x), media(y)) observations(xy_merged) <- dplyr::bind_rows(observations(x), observations(y)) + # Merge additional resources + camtrapdp_resources <- c("deployments", "media", "observations") + x_resource_names <- purrr::map(x$resources, ~ .[["name"]]) %>% unlist() + y_resource_names <- purrr::map(x$resources, ~ .[["name"]]) %>% unlist() + x_additional_resources <- + x_resource_names[!x_resource_names %in% camtrapdp_resources] + y_additional_resources <- + y_resource_names[!y_resource_names %in% camtrapdp_resources] + # Merge/update metadata xy_merged$name <- NA xy_merged$id <- NULL From f3c6633558a6a8605a9cafa07410f3a08999d0fd Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 25 Oct 2024 16:03:41 +0200 Subject: [PATCH 109/142] add additional resources --- R/merge_camtrapdp.R | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 50c0bedf..93f0fd46 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -93,7 +93,7 @@ merge_camtrapdp <- function(x, y, prefix = c(x$id, y$id)) { y <- add_prefix(y, results_duplicate_ids, paste0(prefix[2], "_")) } - # Merge camtrap DP resources + # Merge Camera Trap DP resources xy_merged <- x deployments(xy_merged) <- dplyr::bind_rows(deployments(x), deployments(y)) media(xy_merged) <- dplyr::bind_rows(media(x), media(y)) @@ -108,6 +108,34 @@ merge_camtrapdp <- function(x, y, prefix = c(x$id, y$id)) { y_additional_resources <- y_resource_names[!y_resource_names %in% camtrapdp_resources] + all_additional_resources <- c(x_additional_resources, y_additional_resources) + + if (length(all_additional_resources) > 0) { + duplicated_resources <- duplicated(all_additional_resources) + duplicated_names <- all_additional_resources[duplicated_resources] + + # Add prefixes to resource names that are not unique + if (any(duplicated_names)) { + purrr::map(duplicated_names, function(duplicated_name) { + x_index <- which(purrr::map(x$resources, "name") == duplicated_name) + y_index <- which(purrr::map(y$resources, "name") == duplicated_name) + xy_merged$resources[[x_index]]$name <- paste0(prefix[1], "_") + y$resources[y_index]$name <- paste0(prefix[2], "_") + xy_merged$resources <- append(xy_merged$resources, y$resources[y_index]) + }) + + unique_resources <- + all_additional_resources[!all_additional_resources %in% duplicated_names] + + # Add resources + purrr::map(all_additional_resources, function(resource_name) { + index <- which(purrr::map(y$resources, "name") == resource_name) + resource <- x$resources[index] + xy_merged$resources <- append(xy_merged$resources, resource) + }) + } + } + # Merge/update metadata xy_merged$name <- NA xy_merged$id <- NULL From 82d2d263864266964829463a6addfa8ebbf1d8b5 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 25 Oct 2024 18:19:28 +0200 Subject: [PATCH 110/142] move to helper functions --- R/merge_camtrapdp.R | 38 ++-------------------------------- R/utils.R | 50 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 36 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 93f0fd46..9ea196fa 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -6,7 +6,7 @@ #' added to all the values of each identifier with duplicates, to disambiguate #' them. Should be a character vector of length 2. By default, the prefixes are #' the ID's of the Data Package. -#' @return `x` +#' @return `xy_merged` #' @family transformation functions #' @export #' @section Merging details: @@ -100,41 +100,7 @@ merge_camtrapdp <- function(x, y, prefix = c(x$id, y$id)) { observations(xy_merged) <- dplyr::bind_rows(observations(x), observations(y)) # Merge additional resources - camtrapdp_resources <- c("deployments", "media", "observations") - x_resource_names <- purrr::map(x$resources, ~ .[["name"]]) %>% unlist() - y_resource_names <- purrr::map(x$resources, ~ .[["name"]]) %>% unlist() - x_additional_resources <- - x_resource_names[!x_resource_names %in% camtrapdp_resources] - y_additional_resources <- - y_resource_names[!y_resource_names %in% camtrapdp_resources] - - all_additional_resources <- c(x_additional_resources, y_additional_resources) - - if (length(all_additional_resources) > 0) { - duplicated_resources <- duplicated(all_additional_resources) - duplicated_names <- all_additional_resources[duplicated_resources] - - # Add prefixes to resource names that are not unique - if (any(duplicated_names)) { - purrr::map(duplicated_names, function(duplicated_name) { - x_index <- which(purrr::map(x$resources, "name") == duplicated_name) - y_index <- which(purrr::map(y$resources, "name") == duplicated_name) - xy_merged$resources[[x_index]]$name <- paste0(prefix[1], "_") - y$resources[y_index]$name <- paste0(prefix[2], "_") - xy_merged$resources <- append(xy_merged$resources, y$resources[y_index]) - }) - - unique_resources <- - all_additional_resources[!all_additional_resources %in% duplicated_names] - - # Add resources - purrr::map(all_additional_resources, function(resource_name) { - index <- which(purrr::map(y$resources, "name") == resource_name) - resource <- x$resources[index] - xy_merged$resources <- append(xy_merged$resources, resource) - }) - } - } + xy_merged <- merge_additional_resources(xy_merged, x, y, prefix) # Merge/update metadata xy_merged$name <- NA diff --git a/R/utils.R b/R/utils.R index 9f8dc2f4..75ece8bd 100644 --- a/R/utils.R +++ b/R/utils.R @@ -331,6 +331,56 @@ remove_duplicates <- function(data_list) { return(unique_data_list) } +#' Merge additional resources +#' +#' Merges resources that are different from the required Camera Trap Data +#' Package resources (deployments, media and observations). Resources with the +#' same name are not combined, but prefixes are added to the resource names. +#' +#' @param xy_merged +#' @inheritParams merge_camtrapdp +#' +#' @return `xy_merged` +#' @family helper functions +#' @noRd +merge_additional_resources <- function(xy_merged, x, y, prefix) { + camtrapdp_resources <- c("deployments", "media", "observations") + x_resource_names <- purrr::map(x$resources, ~ .[["name"]]) %>% unlist() + y_resource_names <- purrr::map(x$resources, ~ .[["name"]]) %>% unlist() + x_additional_resources <- + x_resource_names[!x_resource_names %in% camtrapdp_resources] + y_additional_resources <- + y_resource_names[!y_resource_names %in% camtrapdp_resources] + + all_additional_resources <- c(x_additional_resources, y_additional_resources) + + if (length(all_additional_resources) > 0) { + duplicated_resources <- duplicated(all_additional_resources) + duplicated_names <- all_additional_resources[duplicated_resources] + + # Add prefixes to resource names that are not unique + if (any(duplicated_names)) { + purrr::map(duplicated_names, function(duplicated_name) { + x_index <- which(purrr::map(x$resources, "name") == duplicated_name) + y_index <- which(purrr::map(y$resources, "name") == duplicated_name) + xy_merged$resources[[x_index]]$name <- paste0(prefix[1], "_") + y$resources[y_index]$name <- paste0(prefix[2], "_") + xy_merged$resources <- append(xy_merged$resources, y$resources[y_index]) + }) + + unique_resources <- + all_additional_resources[!all_additional_resources %in% duplicated_names] + + # Add resources + purrr::map(all_additional_resources, function(resource_name) { + index <- which(purrr::map(y$resources, "name") == resource_name) + resource <- x$resources[index] + xy_merged$resources <- append(xy_merged$resources, resource) + }) + } + } +} + #' Creates list of contributors in EML format #' #' @param contributor_list List of contributors From 19f98bd52959a6810fc12b8acb9e7463ed5a2873 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 25 Oct 2024 18:36:10 +0200 Subject: [PATCH 111/142] typo's --- R/utils.R | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/R/utils.R b/R/utils.R index 75ece8bd..1b59eb40 100644 --- a/R/utils.R +++ b/R/utils.R @@ -346,7 +346,7 @@ remove_duplicates <- function(data_list) { merge_additional_resources <- function(xy_merged, x, y, prefix) { camtrapdp_resources <- c("deployments", "media", "observations") x_resource_names <- purrr::map(x$resources, ~ .[["name"]]) %>% unlist() - y_resource_names <- purrr::map(x$resources, ~ .[["name"]]) %>% unlist() + y_resource_names <- purrr::map(y$resources, ~ .[["name"]]) %>% unlist() x_additional_resources <- x_resource_names[!x_resource_names %in% camtrapdp_resources] y_additional_resources <- @@ -358,27 +358,29 @@ merge_additional_resources <- function(xy_merged, x, y, prefix) { duplicated_resources <- duplicated(all_additional_resources) duplicated_names <- all_additional_resources[duplicated_resources] - # Add prefixes to resource names that are not unique - if (any(duplicated_names)) { + # Add prefixes to resource names that are not unique, and add + if (any(duplicated_resources)) { purrr::map(duplicated_names, function(duplicated_name) { x_index <- which(purrr::map(x$resources, "name") == duplicated_name) y_index <- which(purrr::map(y$resources, "name") == duplicated_name) - xy_merged$resources[[x_index]]$name <- paste0(prefix[1], "_") - y$resources[y_index]$name <- paste0(prefix[2], "_") + xy_merged$resources[[x_index]]$name <- + paste0(prefix[1], "_", duplicated_name) + y$resources[[y_index]]$name <- paste0(prefix[2], "_", duplicated_name) xy_merged$resources <- append(xy_merged$resources, y$resources[y_index]) }) - - unique_resources <- - all_additional_resources[!all_additional_resources %in% duplicated_names] - - # Add resources - purrr::map(all_additional_resources, function(resource_name) { - index <- which(purrr::map(y$resources, "name") == resource_name) - resource <- x$resources[index] - xy_merged$resources <- append(xy_merged$resources, resource) - }) } + + # Add unique resources + unique_resources <- + all_additional_resources[!all_additional_resources %in% duplicated_names] + purrr::map(unique_resources, function(resource_name) { + index <- which(purrr::map(y$resources, "name") == resource_name) + resource <- x$resources[index] + xy_merged$resources <- append(xy_merged$resources, resource) + }) } + + return(xy_merged) } #' Creates list of contributors in EML format From 4451fee53875786c703d9711d057f93e3d7de3ef Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 25 Oct 2024 22:06:19 +0200 Subject: [PATCH 112/142] Update test-merge_camtrapdp.R --- tests/testthat/test-merge_camtrapdp.R | 29 ++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index d690c8a9..1f5992d5 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -144,6 +144,33 @@ test_that("merge_camtrapdp() adds custom prefixes to all values of identifiers expect_true("y_4bb69c45" %in% observations(xy_merged)$eventID) }) +test_that("merge_camtrapdp() adds default prefixes to the names of + additional resources that are not unique and not required by Camera + Trap Data Package standard", { + skip_if_offline() + x <- example_dataset() + y <- example_dataset() + x$id <- "1" + y$id <- "2" + x$resources <- append( + y$resources, + list(list( + name = "annotations", + data = list(id = 1L, comment = "albino fox")) + )) + y$resources <- append( + y$resources, + list(list(name = "foo", description = "blabla"))) + xy_merged <- merge_camtrapdp(x, y) + + resource_names <- purrr::map(xy_merged$resources, ~ .[["name"]]) %>% unlist() + expected_names <- c( + "deployments", "media", "observations", "1_individuals", "annotations", + "2_individuals", "foo") + + expect_identical(resource_names, expected_names) +}) + test_that("merge_camtrapdp() returns the expected metadata ", { skip_if_offline() x <- example_dataset() @@ -159,7 +186,7 @@ test_that("merge_camtrapdp() returns the expected metadata ", { list(scope = "media", path = "http://creativecommons.org/licenses/by/4.0/")) # Check metadata - expect_identical(xy_merged$resources, x$resources) + expect_equal(length(xy_merged$resources), 5) expect_identical(xy_merged$profile, "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/camtrap-dp-profile.json") expect_identical(xy_merged$name, NA) expect_identical(xy_merged$id, NULL) From 427226f068bebe4098d21ccd13ee8fbbb18116dd Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 25 Oct 2024 22:06:53 +0200 Subject: [PATCH 113/142] fix `merge_additional_resources()` --- R/utils.R | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/R/utils.R b/R/utils.R index 1b59eb40..2f9ab4dc 100644 --- a/R/utils.R +++ b/R/utils.R @@ -361,22 +361,23 @@ merge_additional_resources <- function(xy_merged, x, y, prefix) { # Add prefixes to resource names that are not unique, and add if (any(duplicated_resources)) { purrr::map(duplicated_names, function(duplicated_name) { - x_index <- which(purrr::map(x$resources, "name") == duplicated_name) + xy_index <- + which(purrr::map(xy_merged$resources, "name") == duplicated_name) y_index <- which(purrr::map(y$resources, "name") == duplicated_name) - xy_merged$resources[[x_index]]$name <- + xy_merged$resources[[xy_index]]$name <- paste0(prefix[1], "_", duplicated_name) y$resources[[y_index]]$name <- paste0(prefix[2], "_", duplicated_name) - xy_merged$resources <- append(xy_merged$resources, y$resources[y_index]) + xy_merged$resources <<- append(xy_merged$resources, y$resources[y_index]) }) } - # Add unique resources - unique_resources <- - all_additional_resources[!all_additional_resources %in% duplicated_names] - purrr::map(unique_resources, function(resource_name) { + # Add unique resources from y + y_unique_resources <- + y_additional_resources[!y_additional_resources %in% duplicated_names] + purrr::map(y_unique_resources, function(resource_name) { index <- which(purrr::map(y$resources, "name") == resource_name) - resource <- x$resources[index] - xy_merged$resources <- append(xy_merged$resources, resource) + resource <- y$resources[index] + xy_merged$resources <<- append(xy_merged$resources, resource) }) } From f3f2a451a7a02e1889e67ffc9550a49ba306aed9 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Fri, 25 Oct 2024 22:11:11 +0200 Subject: [PATCH 114/142] update documentation --- R/merge_camtrapdp.R | 4 +++- R/utils.R | 4 ++-- man/merge_camtrapdp.Rd | 4 +++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 9ea196fa..3aa499de 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -6,13 +6,15 @@ #' added to all the values of each identifier with duplicates, to disambiguate #' them. Should be a character vector of length 2. By default, the prefixes are #' the ID's of the Data Package. -#' @return `xy_merged` +#' @return `xy_merged` Merged Camera Trap Data Package #' @family transformation functions #' @export #' @section Merging details: #' Deployments, media and observations are combined. If there are duplicate IDs #' between x and y, prefixes will be added to all the values of each identifier #' with duplicates, to disambiguate them. +#' Additional resources are added, but not combined. If additional resources +#' have the same name, prefixes will be added to the resource name. #' The following properties are set: #' - **name**: Set to NA. #' - **id**: Set to NULL. diff --git a/R/utils.R b/R/utils.R index 2f9ab4dc..924376d6 100644 --- a/R/utils.R +++ b/R/utils.R @@ -337,10 +337,10 @@ remove_duplicates <- function(data_list) { #' Package resources (deployments, media and observations). Resources with the #' same name are not combined, but prefixes are added to the resource names. #' -#' @param xy_merged +#' @param xy_merged Merged Camera Trap Data Package #' @inheritParams merge_camtrapdp #' -#' @return `xy_merged` +#' @return `xy_merged` Merged Camera Trap Data Package #' @family helper functions #' @noRd merge_additional_resources <- function(xy_merged, x, y, prefix) { diff --git a/man/merge_camtrapdp.Rd b/man/merge_camtrapdp.Rd index 299190c1..67635911 100644 --- a/man/merge_camtrapdp.Rd +++ b/man/merge_camtrapdp.Rd @@ -16,7 +16,7 @@ them. Should be a character vector of length 2. By default, the prefixes are the ID's of the Data Package.} } \value{ -\code{x} +\code{xy_merged} Merged Camera Trap Data Package } \description{ Merge Camera Trap Data packages @@ -26,6 +26,8 @@ Merge Camera Trap Data packages Deployments, media and observations are combined. If there are duplicate IDs between x and y, prefixes will be added to all the values of each identifier with duplicates, to disambiguate them. +Additional resources are added, but not combined. If additional resources +have the same name, prefixes will be added to the resource name. The following properties are set: \itemize{ \item \strong{name}: Set to NA. From 28802d3278d5038090802ea8c6b7c3c72cc8d9af Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Mon, 28 Oct 2024 10:33:41 +0100 Subject: [PATCH 115/142] reorder --- R/utils.R | 112 +++++++++++++++++++++++++++--------------------------- 1 file changed, 57 insertions(+), 55 deletions(-) diff --git a/R/utils.R b/R/utils.R index 924376d6..2803f03f 100644 --- a/R/utils.R +++ b/R/utils.R @@ -39,7 +39,8 @@ expand_cols <- function(df, colnames) { #' Check for duplicated IDs #' -#' Checks for duplicated IDs in two Camera Trap Data Package objects combined. +#' Checks for duplicated IDs (deploymentID, mediaID, observationID and eventID) +#' in two Camera Trap Data Package objects combined. #' #' @param x,y Camera Trap Data Package objects (as returned by #' `read_camtrapdp()`), to be coerced to one. @@ -80,7 +81,8 @@ check_duplicate_ids <- function(x, y) { #' Add prefix to identifiers with duplicates #' -#' Adds prefix to all values of each identifier that has duplicates. +#' Adds prefix to all values of each identifier (deploymentID, mediaID, +#' observationID and eventID) that has duplicates. #' #' @inheritParams print.camtrapdp #' @param prefix The prefix to add to the IDs. @@ -167,6 +169,59 @@ add_prefix <- function(x, results_duplicate_ids, prefix) { return(x) } +#' Merge additional resources +#' +#' Merges resources that are different from the required Camera Trap Data +#' Package resources (deployments, media and observations). Resources with the +#' same name are not combined, but prefixes are added to the resource names. +#' +#' @param xy_merged Merged Camera Trap Data Package +#' @inheritParams merge_camtrapdp +#' +#' @return `xy_merged` Merged Camera Trap Data Package +#' @family helper functions +#' @noRd +merge_additional_resources <- function(xy_merged, x, y, prefix) { + camtrapdp_resources <- c("deployments", "media", "observations") + x_resource_names <- purrr::map(x$resources, ~ .[["name"]]) %>% unlist() + y_resource_names <- purrr::map(y$resources, ~ .[["name"]]) %>% unlist() + x_additional_resources <- + x_resource_names[!x_resource_names %in% camtrapdp_resources] + y_additional_resources <- + y_resource_names[!y_resource_names %in% camtrapdp_resources] + + all_additional_resources <- c(x_additional_resources, y_additional_resources) + + if (length(all_additional_resources) > 0) { + duplicated_resources <- duplicated(all_additional_resources) + duplicated_names <- all_additional_resources[duplicated_resources] + + # Add prefixes to resource names that are not unique + if (any(duplicated_resources)) { + purrr::map(duplicated_names, function(duplicated_name) { + xy_index <- + which(purrr::map(xy_merged$resources, "name") == duplicated_name) + y_index <- which(purrr::map(y$resources, "name") == duplicated_name) + xy_merged$resources[[xy_index]]$name <- + paste0(prefix[1], "_", duplicated_name) + y$resources[[y_index]]$name <- paste0(prefix[2], "_", duplicated_name) + xy_merged$resources <<- append(xy_merged$resources, y$resources[y_index]) + }) + } + + # Add unique resources from y + y_unique_resources <- + y_additional_resources[!y_additional_resources %in% duplicated_names] + purrr::map(y_unique_resources, function(resource_name) { + index <- which(purrr::map(y$resources, "name") == resource_name) + resource <- y$resources[index] + xy_merged$resources <<- append(xy_merged$resources, resource) + }) + } + + return(xy_merged) +} + #' Normalize list elements #' #' Converts each list element to a named vector with consistent handling of @@ -331,59 +386,6 @@ remove_duplicates <- function(data_list) { return(unique_data_list) } -#' Merge additional resources -#' -#' Merges resources that are different from the required Camera Trap Data -#' Package resources (deployments, media and observations). Resources with the -#' same name are not combined, but prefixes are added to the resource names. -#' -#' @param xy_merged Merged Camera Trap Data Package -#' @inheritParams merge_camtrapdp -#' -#' @return `xy_merged` Merged Camera Trap Data Package -#' @family helper functions -#' @noRd -merge_additional_resources <- function(xy_merged, x, y, prefix) { - camtrapdp_resources <- c("deployments", "media", "observations") - x_resource_names <- purrr::map(x$resources, ~ .[["name"]]) %>% unlist() - y_resource_names <- purrr::map(y$resources, ~ .[["name"]]) %>% unlist() - x_additional_resources <- - x_resource_names[!x_resource_names %in% camtrapdp_resources] - y_additional_resources <- - y_resource_names[!y_resource_names %in% camtrapdp_resources] - - all_additional_resources <- c(x_additional_resources, y_additional_resources) - - if (length(all_additional_resources) > 0) { - duplicated_resources <- duplicated(all_additional_resources) - duplicated_names <- all_additional_resources[duplicated_resources] - - # Add prefixes to resource names that are not unique, and add - if (any(duplicated_resources)) { - purrr::map(duplicated_names, function(duplicated_name) { - xy_index <- - which(purrr::map(xy_merged$resources, "name") == duplicated_name) - y_index <- which(purrr::map(y$resources, "name") == duplicated_name) - xy_merged$resources[[xy_index]]$name <- - paste0(prefix[1], "_", duplicated_name) - y$resources[[y_index]]$name <- paste0(prefix[2], "_", duplicated_name) - xy_merged$resources <<- append(xy_merged$resources, y$resources[y_index]) - }) - } - - # Add unique resources from y - y_unique_resources <- - y_additional_resources[!y_additional_resources %in% duplicated_names] - purrr::map(y_unique_resources, function(resource_name) { - index <- which(purrr::map(y$resources, "name") == resource_name) - resource <- y$resources[index] - xy_merged$resources <<- append(xy_merged$resources, resource) - }) - } - - return(xy_merged) -} - #' Creates list of contributors in EML format #' #' @param contributor_list List of contributors From fd068f9555a0bfd37b4f35afa3b1c698836e2618 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Mon, 28 Oct 2024 12:21:39 +0100 Subject: [PATCH 116/142] Add new helper function --- R/utils.R | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/R/utils.R b/R/utils.R index 2803f03f..7882128f 100644 --- a/R/utils.R +++ b/R/utils.R @@ -386,7 +386,7 @@ remove_duplicates <- function(data_list) { return(unique_data_list) } -#' Creates list of contributors in EML format +#' Create list of contributors in EML format #' #' @param contributor_list List of contributors #' @return List of contributors as emld responsibleParty objects. @@ -406,3 +406,23 @@ create_eml_contributors <- function(contributor_list) { onlineUrl = .$path )) } + +#' Replace NULL values recursively +#' +#' Replaces NULL values with NA by recursively iterating through each element of +#' the input list. +#' +#' @param x A nested list. +#' @return A nested list identical to the input x, but with all NULL values +#' replaced by NA. +#' @family helper functions +#' @noRd +replace_null_recursive <- function(x) { + purrr::map(x, function(element) { + if (is.list(element) && !is.null(element)) { + replace_null_recursive(element) + } else { + ifelse(is.null(element), NA, element) + } + }) +} From 572d5af473f3aa982a4b75e9554007eef754b6a2 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Mon, 28 Oct 2024 12:22:22 +0100 Subject: [PATCH 117/142] Replace NULL values (generated because of reading JSON) with NA --- R/taxonomic.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/taxonomic.R b/R/taxonomic.R index 1a23ae03..28294678 100644 --- a/R/taxonomic.R +++ b/R/taxonomic.R @@ -15,6 +15,9 @@ taxonomic <- function(x) { return(NULL) } + # Replace NULL with NA + taxonomic_list <- replace_null_recursive(taxonomic_list) + # Convert list into a data.frame taxa <- purrr::map( From 84dc619424c58076a721f103cb039a8bb7f5d5ca Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Mon, 28 Oct 2024 12:22:32 +0100 Subject: [PATCH 118/142] Update test-write_camtrapdp.R --- tests/testthat/test-write_camtrapdp.R | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tests/testthat/test-write_camtrapdp.R b/tests/testthat/test-write_camtrapdp.R index d35ce671..cd3bd68b 100644 --- a/tests/testthat/test-write_camtrapdp.R +++ b/tests/testthat/test-write_camtrapdp.R @@ -29,6 +29,31 @@ test_that("write_camtrapdp() writes a (filtered) dataset that can be read", { expect_lt(nrow(observations(x_written)), nrow(observations(x))) }) +test_that("write_camtrapdp() writes a merged dataset that can be read", { + skip_if_offline() + x <- example_dataset() + + # Download second Camera Trap Data package + temp_dir <- tempdir() + on.exit(unlink(temp_dir, recursive = TRUE)) + zip_file <- file.path(temp_dir, "dataset.zip") + datapackage_file <- file.path(temp_dir, "datapackage.json") + url <- "https://ipt.nlbif.nl/archive.do?r=awd_pilot2" + download.file(url, zip_file, mode = "wb") + unzip(zip_file, exdir = temp_dir) + y <- read_camtrapdp(datapackage_file) + + # Merge + xy_merged <- merge_camtrapdp(x, y) + + # Write + write_camtrapdp(xy_merged, file.path(temp_dir, "processed"), compress = TRUE) + + expect_no_error( + read_camtrapdp(file.path(temp_dir, "processed", "datapackage.json")) + ) +}) + test_that("write_camtrapdp() writes the unaltered example dataset as is", { skip_if_offline() x <- example_dataset() From 7adf2c3aef42c74172656a3786c692a20713a862 Mon Sep 17 00:00:00 2001 From: Peter Desmet Date: Mon, 28 Oct 2024 14:59:10 +0100 Subject: [PATCH 119/142] Use resources() --- R/utils.R | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/R/utils.R b/R/utils.R index 7882128f..415351f9 100644 --- a/R/utils.R +++ b/R/utils.R @@ -183,8 +183,8 @@ add_prefix <- function(x, results_duplicate_ids, prefix) { #' @noRd merge_additional_resources <- function(xy_merged, x, y, prefix) { camtrapdp_resources <- c("deployments", "media", "observations") - x_resource_names <- purrr::map(x$resources, ~ .[["name"]]) %>% unlist() - y_resource_names <- purrr::map(y$resources, ~ .[["name"]]) %>% unlist() + x_resource_names <- frictionless::resources(x) + y_resource_names <- frictionless::resources(y) x_additional_resources <- x_resource_names[!x_resource_names %in% camtrapdp_resources] y_additional_resources <- @@ -251,7 +251,6 @@ normalize_list <- function(data_list, unique_names) { #' Check if one element is equal to or a subset of another and vice versa #' -#' #' @param element1,element2 elements to compare. #' @return logical. #' @family helper functions @@ -269,7 +268,7 @@ normalize_list <- function(data_list, unique_names) { #' role = "principalInvestigator", #' organization = "Research Institute for Nature and Forest (INBO)" #' ) -#' is.subset(element1, element2) +#' is_subset(element1, element2) is_subset <- function(element1, element2) { all( purrr::map_vec(names(element1), function(field) { From 5e7360faf035354c889162fc788cc4aa3288dcf5 Mon Sep 17 00:00:00 2001 From: Sanne Govaert <44606923+sannegovaert@users.noreply.github.com> Date: Tue, 29 Oct 2024 09:19:06 +0100 Subject: [PATCH 120/142] avoid tidyselect warning --- R/taxa.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/taxa.R b/R/taxa.R index f8e95be3..8288e0eb 100644 --- a/R/taxa.R +++ b/R/taxa.R @@ -45,10 +45,10 @@ taxa <- function(x) { dplyr::mutate(columns_with_info = rowSums(!is.na(.))) %>% dplyr::group_by(.data$scientificName) %>% dplyr::filter(dplyr::n() > 1) %>% - dplyr::arrange(dplyr::desc(.data$columns_with_info)) %>% + dplyr::arrange(dplyr::desc(columns_with_info)) %>% dplyr::slice_tail(n = -1) %>% # Remove first row from group (with most info) dplyr::ungroup() %>% - dplyr::select(-.data$columns_with_info) + dplyr::select(-columns_with_info) taxa <- dplyr::anti_join( taxa, duplicates_with_least_info, From b01c581c69beeb155fba84e477a487bc5a09420e Mon Sep 17 00:00:00 2001 From: Peter Desmet Date: Tue, 29 Oct 2024 14:23:35 +0100 Subject: [PATCH 121/142] Create a new helper function to prefix duplicates/identifiers --- R/merge_camtrapdp.R | 47 +++++++-------- R/utils-duplicates.R | 67 ++++++++++++++++++++++ R/utils.R | 132 ------------------------------------------- 3 files changed, 88 insertions(+), 158 deletions(-) create mode 100644 R/utils-duplicates.R diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 3aa499de..78c6b1ec 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -66,35 +66,30 @@ merge_camtrapdp <- function(x, y, prefix = c(x$id, y$id)) { } } - # check if identifiers have duplicates - results_duplicate_ids <- check_duplicate_ids(x, y) - - # Add prefix to identifiers with duplicates - if (TRUE %in% results_duplicate_ids) { - - if (!is.character(prefix) || length(prefix) != 2) { - cli::cli_abort( - c( - paste( - "{.arg prefix} must be a character vector of length 2, not", - "a {class(prefix)} object of length {length(prefix)}." - ) - ), - class = "camtrapdp_error_prefix_invalid" - ) - } - - if (any(is.na(prefix))) { - cli::cli_abort( - "{.arg prefix} can't be 'NA'.", - class = "camtrapdp_error_prefix_NA" - ) - } + if (!is.character(prefix) || length(prefix) != 2) { + cli::cli_abort( + c( + paste( + "{.arg prefix} must be a character vector of length 2, not", + "a {class(prefix)} object of length {length(prefix)}." + ) + ), + class = "camtrapdp_error_prefix_invalid" + ) + } - x <- add_prefix(x, results_duplicate_ids, paste0(prefix[1], "_")) - y <- add_prefix(y, results_duplicate_ids, paste0(prefix[2], "_")) + if (any(is.na(prefix))) { + cli::cli_abort( + "{.arg prefix} can't be 'NA'.", + class = "camtrapdp_error_prefix_NA" + ) } + # Add prefix to duplicate identifiers + x_original <- x + x <- prefix_identifiers(x, y, prefix[1]) + y <- prefix_identifiers(y, x_original, prefix[2]) + # Merge Camera Trap DP resources xy_merged <- x deployments(xy_merged) <- dplyr::bind_rows(deployments(x), deployments(y)) diff --git a/R/utils-duplicates.R b/R/utils-duplicates.R new file mode 100644 index 00000000..bdc33a3c --- /dev/null +++ b/R/utils-duplicates.R @@ -0,0 +1,67 @@ +#' Add a prefix to duplicates +#' +#' Compares two character vectors and adds a prefix to any values in `a` that +#' also occur in `b`. +#' +#' @param a,b Character vectors. +#' @param prefix Prefix to add, as `_`. +#' @return `a` with updated values. +#' @family helper functions +#' @noRd +#' @examples +#' a <- c("a", "a", "b", "c", 1, 1, NA) +#' b <- c( "b", 1, 1, "e", NA) +#' prefix_duplicates(a, b, "prefix") +prefix_duplicates <- function(a, b, prefix) { + duplicates <- a %in% b + a_updated <- purrr::map2_chr( + a, + duplicates, + ~ if(.y && !is.na(.x)) paste(prefix, .x, sep = "_") else .x + ) + return(a_updated) +} + +#' Add a prefix to identifiers in Camera Trap Data Package that occur in another +#' +#' Compares two Camera Trap Data Package objects (`x` and `y`) and adds a prefix +#' to any relevant identifiers in `x` that also occur in `y`: +#' - `deploymentID` (in deployments, media and observations) +#' - `mediaID` (in media and observations) +#' - `observationID` (in observations) +#' - `eventID` (in media and observations) +#' +#' @param x,y Camera Trap Data Package objects to compare +#' @param prefix Prefix to add, as `_`. +#' @return `x` with updated identifiers. +#' @family helper functions +#' @noRd +prefix_identifiers <- function(x, y, prefix) { + y_deployment_ids <- purrr::pluck(deployments(y), "deploymentID") + y_media_ids <- purrr::pluck(media(y), "mediaID") + y_observation_ids <- purrr::pluck(observations(y), "observationID") + y_event_ids <- purrr::pluck(observations(y), "eventID") + + deployments(x) <- + deployments(x) %>% + dplyr::mutate( + deploymentID = prefix_duplicates(deploymentID, y_deployment_ids, prefix) + ) + media(x) <- + media(x) %>% + dplyr::mutate( + deploymentID = prefix_duplicates(deploymentID, y_deployment_ids, prefix), + mediaID = prefix_duplicates(mediaID, y_media_ids, prefix), + eventID = prefix_duplicates(eventID, y_event_ids, prefix) + ) + observations(x) <- + observations(x) %>% + dplyr::mutate( + deploymentID = prefix_duplicates(deploymentID, y_deployment_ids, prefix), + observationID = prefix_duplicates(observationID, y_observation_ids, prefix), + mediaID = prefix_duplicates(mediaID, y_media_ids, prefix), + eventID = prefix_duplicates(eventID, y_event_ids, prefix) + ) + + return(x) +} diff --git a/R/utils.R b/R/utils.R index 415351f9..f5ebc942 100644 --- a/R/utils.R +++ b/R/utils.R @@ -37,138 +37,6 @@ expand_cols <- function(df, colnames) { return(df) } -#' Check for duplicated IDs -#' -#' Checks for duplicated IDs (deploymentID, mediaID, observationID and eventID) -#' in two Camera Trap Data Package objects combined. -#' -#' @param x,y Camera Trap Data Package objects (as returned by -#' `read_camtrapdp()`), to be coerced to one. -#' @return List with logical for each type of ID, that indicates whether that -#' ID type has duplicates between x and y. -#' @family helper functions -#' @noRd -check_duplicate_ids <- function(x, y) { - result = list( - deploymentID = FALSE, mediaID = FALSE, observationID = FALSE, - eventID = FALSE) - - deploymentIDs <- c( - unique(purrr::pluck(deployments(x), "deploymentID")), - unique(purrr::pluck(deployments(y), "deploymentID")) - ) - mediaIDs <- c( - unique(purrr::pluck(media(x), "mediaID")), - unique(purrr::pluck(media(y), "mediaID")) - ) - observationIDs <- c( - unique(purrr::pluck(observations(x), "observationID")), - unique(purrr::pluck(observations(y), "observationID")) - ) - eventIDs <- c( - unique(purrr::pluck(media(x), "eventID")), - unique(purrr::pluck(media(y), "eventID")) - ) - - # Check for duplicates - if (any(duplicated(deploymentIDs))) {result$deploymentID <- TRUE} - if (any(duplicated(mediaIDs))) {result$mediaID <- TRUE} - if (any(duplicated(observationIDs))) {result$observationID <- TRUE} - if (any(duplicated(eventIDs))) {result$eventID <- TRUE} - - return(result) -} - -#' Add prefix to identifiers with duplicates -#' -#' Adds prefix to all values of each identifier (deploymentID, mediaID, -#' observationID and eventID) that has duplicates. -#' -#' @inheritParams print.camtrapdp -#' @param prefix The prefix to add to the IDs. -#' @param results_duplicate_ids Output generated with `check_duplicate_ids()`. -#' List with logical for each type of ID, that indicates whether that ID type -#' has duplicates. -#' @return `x` -#' @family helper functions -#' @noRd -#' @examples -#' results_duplicate_ids <- list(deploymentID = TRUE, mediaID = TRUE, -#' observationID = TRUE, eventID = TRUE) -#' x <- add_prefix(example_dataset(), results_duplicate_ids, prefix = ".x") -add_prefix <- function(x, results_duplicate_ids, prefix) { - - # deploymentID - if (results_duplicate_ids$deploymentID) { - # Add prefix to deploymentIDs in deployments - deployments(x) <- - deployments(x) %>% - dplyr::mutate(deploymentID = paste0(prefix, .data$deploymentID)) - - # Add prefix to deploymentIDs in observations - observations(x) <- - observations(x) %>% - dplyr::mutate(deploymentID = paste0(prefix, .data$deploymentID)) - - # Add prefix to deploymentIDs in media - media(x) <- - media(x) %>% - dplyr::mutate(deploymentID = paste0(prefix, .data$deploymentID)) - } - - # mediaID - if (results_duplicate_ids$mediaID) { - # Add prefix to mediaIDs in media - media(x) <- - media(x) %>% - dplyr::mutate( - mediaID = ifelse( - !is.na(.data$mediaID), paste0(prefix, .data$mediaID), NA - ) - ) - - # Add prefix to mediaIDs in observations - observations(x) <- - observations(x) %>% - dplyr::mutate( - mediaID = ifelse( - !is.na(.data$mediaID), paste0(prefix, .data$mediaID), NA - ) - ) - } - - # observationID - if (results_duplicate_ids$observationID) { - # Add prefix to observationIDs in observations - observations(x) <- - observations(x) %>% - dplyr::mutate(observationID = paste0(prefix, .data$observationID)) - } - - # eventID - if (results_duplicate_ids$eventID) { - # Add prefix to eventIDs in media - media(x) <- - media(x) %>% - dplyr::mutate( - eventID = ifelse( - !is.na(.data$eventID), paste0(prefix, .data$eventID), NA - ) - ) - - # Add prefix to eventIDs in observations - observations(x) <- - observations(x) %>% - dplyr::mutate( - eventID = ifelse( - !is.na(.data$eventID), paste0(prefix, .data$eventID), NA - ) - ) - } - - return(x) -} - #' Merge additional resources #' #' Merges resources that are different from the required Camera Trap Data From add540a1f454f14f114a12d80d728d25e7a1f286 Mon Sep 17 00:00:00 2001 From: Peter Desmet Date: Tue, 29 Oct 2024 14:23:41 +0100 Subject: [PATCH 122/142] Silence downloads --- tests/testthat/test-merge_camtrapdp.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index 1f5992d5..f9a5ba4f 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -254,7 +254,7 @@ test_that("merge_camtrapdp() returns the expected metadata when merging two zip_file <- file.path(temp_dir, "dataset.zip") datapackage_file <- file.path(temp_dir, "datapackage.json") url <- "https://ipt.nlbif.nl/archive.do?r=awd_pilot2" - download.file(url, zip_file, mode = 'wb') + download.file(url, zip_file, mode = 'wb', quiet = TRUE) unzip(zip_file, exdir = temp_dir) y <- read_camtrapdp(datapackage_file) @@ -536,7 +536,7 @@ test_that("merge_camtrapdp() can be used in a pipe to merge multiple datapackage_file <- file.path(temp_dir, "datapackage.json") url <- "https://ipt.nlbif.nl/archive.do?r=awd_pilot2" - download.file(url, zip_file, mode = 'wb') + download.file(url, zip_file, mode = 'wb', quiet = TRUE) unzip(zip_file, exdir = temp_dir) x <- read_camtrapdp(datapackage_file) From f3e29d9d6a4095dbc9819ceb277c8da388075e8f Mon Sep 17 00:00:00 2001 From: Peter Desmet Date: Tue, 29 Oct 2024 17:26:21 +0100 Subject: [PATCH 123/142] Always use and require dataset$id, remove prefix argument - Update description - Remove test for merging - Simplify error --- R/merge_camtrapdp.R | 109 +++++++++++++------------ man/merge_camtrapdp.Rd | 76 ++++++++--------- tests/testthat/test-merge_camtrapdp.R | 112 +++++++------------------- 3 files changed, 122 insertions(+), 175 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 78c6b1ec..39a078a6 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -1,34 +1,45 @@ -#' Merge Camera Trap Data packages +#' Merge two Camera Trap Data Packages #' -#' @param x,y Camera Trap Data Package objects (as returned by -#' `read_camtrapdp()`), to be coerced to one. -#' @param prefix If there are duplicate IDs between x and y, prefixes will be -#' added to all the values of each identifier with duplicates, to disambiguate -#' them. Should be a character vector of length 2. By default, the prefixes are -#' the ID's of the Data Package. -#' @return `xy_merged` Merged Camera Trap Data Package +#' Merges two Camera Trap Data Package objects into one. +#' +#' @param x,y Camera Trap Data Package objects, as returned by +#' [read_camtrapdp()]. +#' @return A single Camera Trap Data Package object that is the combination of +#' `x` and `y`. #' @family transformation functions #' @export -#' @section Merging details: -#' Deployments, media and observations are combined. If there are duplicate IDs -#' between x and y, prefixes will be added to all the values of each identifier -#' with duplicates, to disambiguate them. -#' Additional resources are added, but not combined. If additional resources -#' have the same name, prefixes will be added to the resource name. -#' The following properties are set: -#' - **name**: Set to NA. -#' - **id**: Set to NULL. +#' @section Transformation details: +#' +#' Both `x` and `y` must have a unique dataset identifier `x$id` and `y$id`. +#' This identifier is used to prefix identifiers in the data that occur in both +#' datasets. +#' For example: +#' - `x` contains `deploymentID`s `c("a", "b")`. +#' - `y` contains `deploymentID`s `c("b", "c")`. +#' - Then merged `xy` will contain `deploymentID`s `c("a", "x_b", "y_b", "c")`. +#' +#' Data are merged as follows: +#' - Deployments are combined, with `deploymentID` kept unique. +#' - Media are combined, with `mediaID`, `deploymentID` and `eventID` kept +#' unique. +#' - Observations are combined, with `observationID`, `deploymentID`, `mediaID` +#' and `eventID` kept unique. +#' - Additional resources are retained, with the resource name kept unique. +#' +#' Metadata properties are merged as follows: +#' - **name**: Set to `NA`. +#' - **id**: Removed. #' - **created**: Set to current timestamp. -#' - **title**: Set to NA. +#' - **title**: Set to `NA`. #' - **contributors**: A combination is made and duplicates are removed. #' - **description**: A combination is made. -#' - **version**: Set to 1.0. +#' - **version**: Set to `1.0`. #' - **keywords**: A combination is made and duplicates are removed. -#' - **image**: Set to NULL. -#' - **homepage**: Set to NULL. +#' - **image**: Removed. +#' - **homepage**: Removed. #' - **sources**: A combination is made and duplicates are removed. #' - **licenses**: A combination is made and duplicates are removed. -#' - **bibliographicCitation**: Set to NULL. +#' - **bibliographicCitation**: Removed. #' - **project**: List of the projects. #' - **coordinatePrecision**: Set to the least precise `coordinatePrecision`. #' - **spatial**: Reset based on the new deployments. @@ -36,59 +47,47 @@ #' - **taxonomic**: A combination is made and duplicates are removed. #' - **relatedIdentifiers**: A combination is made and duplicates are removed. #' - **references**: A combination is made and duplicates are removed. -#' @section Merging multiple Camera Trap Data Packages: -#' `merge_camtrapdp()` can be used in a pipe to merge multiple camtrap DP. -#' - x %>% merge_camtrapdp(y) %>% merge_camtrapdp(z) #' @examples #' x <- example_dataset() %>% #' filter_deployments(deploymentID %in% c("00a2c20d", "29b7d356")) #' y <- example_dataset() %>% #' filter_deployments(deploymentID %in% c("577b543a", "62c200a9")) -#' x$id <- "1" -#' y$id <- "2" -#' xy_merged <- merge_camtrapdp(x, y) -merge_camtrapdp <- function(x, y, prefix = c(x$id, y$id)) { +#' x$id <- "x" +#' y$id <- "y" +#' merge_camtrapdp(x, y) +merge_camtrapdp <- function(x, y) { check_camtrapdp(x) check_camtrapdp(y) - if (!is.null(x$id) & !is.null(y$id)) { - if (x$id == y$id) { + # Check identifiers + check_identifier <- function(id, arg) { + if (is.null(id) || is.na(id) || !is.character(id)) { cli::cli_abort( c( - paste0( - "{.arg x} and {.arg y} should be different Camera Trap Data", - "Package objects with unique identifiers." - ), - x = "{.arg x} and {.arg y} have the same id: {.value x$id}" + "{.arg {arg}} must have a unique (character) identifier.", + "i" = "Assign one to {.field {arg}$id}." ), - class = "camtrapdp_error_camtrapdpid_duplicated" + class = "camtrapdp_error_identifier_invalid" ) } } - - if (!is.character(prefix) || length(prefix) != 2) { + check_identifier(x$id, "x") + check_identifier(y$id, "y") + if (x$id == y$id) { cli::cli_abort( c( - paste( - "{.arg prefix} must be a character vector of length 2, not", - "a {class(prefix)} object of length {length(prefix)}." - ) + "{.arg x} and {.arg y} must have different unique identifiers.", + "x" = "{.field x$id} and {.field y$id} currently have the same value: + {.val {x$id}}." ), - class = "camtrapdp_error_prefix_invalid" - ) - } - - if (any(is.na(prefix))) { - cli::cli_abort( - "{.arg prefix} can't be 'NA'.", - class = "camtrapdp_error_prefix_NA" + class = "camtrapdp_error_identifier_duplicated" ) } - # Add prefix to duplicate identifiers + # Add prefix to duplicate identifiers in data x_original <- x - x <- prefix_identifiers(x, y, prefix[1]) - y <- prefix_identifiers(y, x_original, prefix[2]) + x <- prefix_identifiers(x, y, x$id) + y <- prefix_identifiers(y, x_original, y$id) # Merge Camera Trap DP resources xy_merged <- x @@ -97,7 +96,7 @@ merge_camtrapdp <- function(x, y, prefix = c(x$id, y$id)) { observations(xy_merged) <- dplyr::bind_rows(observations(x), observations(y)) # Merge additional resources - xy_merged <- merge_additional_resources(xy_merged, x, y, prefix) + xy_merged <- merge_additional_resources(xy_merged, x, y, c(x$id, y$id)) # Merge/update metadata xy_merged$name <- NA diff --git a/man/merge_camtrapdp.Rd b/man/merge_camtrapdp.Rd index 67635911..3ecd0d17 100644 --- a/man/merge_camtrapdp.Rd +++ b/man/merge_camtrapdp.Rd @@ -2,47 +2,59 @@ % Please edit documentation in R/merge_camtrapdp.R \name{merge_camtrapdp} \alias{merge_camtrapdp} -\title{Merge Camera Trap Data packages} +\title{Merge two Camera Trap Data Packages} \usage{ -merge_camtrapdp(x, y, prefix = c(x$id, y$id)) +merge_camtrapdp(x, y) } \arguments{ -\item{x, y}{Camera Trap Data Package objects (as returned by -\code{read_camtrapdp()}), to be coerced to one.} - -\item{prefix}{If there are duplicate IDs between x and y, prefixes will be -added to all the values of each identifier with duplicates, to disambiguate -them. Should be a character vector of length 2. By default, the prefixes are -the ID's of the Data Package.} +\item{x, y}{Camera Trap Data Package objects, as returned by +\code{\link[=read_camtrapdp]{read_camtrapdp()}}.} } \value{ -\code{xy_merged} Merged Camera Trap Data Package +A single Camera Trap Data Package object that is the combination of +\code{x} and \code{y}. } \description{ -Merge Camera Trap Data packages +Merges two Camera Trap Data Package objects into one. } -\section{Merging details}{ +\section{Transformation details}{ + -Deployments, media and observations are combined. If there are duplicate IDs -between x and y, prefixes will be added to all the values of each identifier -with duplicates, to disambiguate them. -Additional resources are added, but not combined. If additional resources -have the same name, prefixes will be added to the resource name. -The following properties are set: +Both \code{x} and \code{y} must have a unique dataset identifier \code{x$id} and \code{y$id}. +This identifier is used to prefix identifiers in the data that occur in both +datasets. +For example: \itemize{ -\item \strong{name}: Set to NA. -\item \strong{id}: Set to NULL. +\item \code{x} contains \code{deploymentID}s \code{c("a", "b")}. +\item \code{y} contains \code{deploymentID}s \code{c("b", "c")}. +\item Then merged \code{xy} will contain \code{deploymentID}s \code{c("a", "x_b", "y_b", "c")}. +} + +Data are merged as follows: +\itemize{ +\item Deployments are combined, with \code{deploymentID} kept unique. +\item Media are combined, with \code{mediaID}, \code{deploymentID} and \code{eventID} kept +unique. +\item Observations are combined, with \code{observationID}, \code{deploymentID}, \code{mediaID} +and \code{eventID} kept unique. +\item Additional resources are retained, with the resource name kept unique. +} + +Metadata properties are merged as follows: +\itemize{ +\item \strong{name}: Set to \code{NA}. +\item \strong{id}: Removed. \item \strong{created}: Set to current timestamp. -\item \strong{title}: Set to NA. +\item \strong{title}: Set to \code{NA}. \item \strong{contributors}: A combination is made and duplicates are removed. \item \strong{description}: A combination is made. -\item \strong{version}: Set to 1.0. +\item \strong{version}: Set to \code{1.0}. \item \strong{keywords}: A combination is made and duplicates are removed. -\item \strong{image}: Set to NULL. -\item \strong{homepage}: Set to NULL. +\item \strong{image}: Removed. +\item \strong{homepage}: Removed. \item \strong{sources}: A combination is made and duplicates are removed. \item \strong{licenses}: A combination is made and duplicates are removed. -\item \strong{bibliographicCitation}: Set to NULL. +\item \strong{bibliographicCitation}: Removed. \item \strong{project}: List of the projects. \item \strong{coordinatePrecision}: Set to the least precise \code{coordinatePrecision}. \item \strong{spatial}: Reset based on the new deployments. @@ -53,22 +65,14 @@ The following properties are set: } } -\section{Merging multiple Camera Trap Data Packages}{ - -\code{merge_camtrapdp()} can be used in a pipe to merge multiple camtrap DP. -\itemize{ -\item x \%>\% merge_camtrapdp(y) \%>\% merge_camtrapdp(z) -} -} - \examples{ x <- example_dataset() \%>\% filter_deployments(deploymentID \%in\% c("00a2c20d", "29b7d356")) y <- example_dataset() \%>\% filter_deployments(deploymentID \%in\% c("577b543a", "62c200a9")) -x$id <- "1" -y$id <- "2" -xy_merged <- merge_camtrapdp(x, y) +x$id <- "x" +y$id <- "y" +merge_camtrapdp(x, y) } \seealso{ Other transformation functions: diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index f9a5ba4f..6c4b83c0 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -8,43 +8,41 @@ test_that("merge_camtrapdp() returns a valid camtrapdp object", { expect_no_error(check_camtrapdp(merge_camtrapdp(x, y))) }) -test_that("merge_camtrapdp() returns error on duplicate Data Package id", { +test_that("merge_camtrapdp() returns error on duplicate or missing package id", { skip_if_offline() x <- example_dataset() + + # Duplicate identifiers expect_error( merge_camtrapdp(x, x), - class = "camtrapdp_error_camtrapdpid_duplicated" + class = "camtrapdp_error_identifier_duplicated" ) -}) -test_that("merge_camtrapdp() returns error on invalid prefix", { - skip_if_offline() - x <- example_dataset() - y <- example_dataset() - x$id <- "1" - y$id <- "2" + # Invalid identifier + y <- x + x$id <- NULL expect_error( - merge_camtrapdp(x, y, prefix = c(1, 2)), - class = "camtrapdp_error_prefix_invalid" + merge_camtrapdp(x, y), + class = "camtrapdp_error_identifier_invalid" ) + x$id <- NA_character_ expect_error( - merge_camtrapdp(x, y, prefix = c("one", "two", "three")), - class = "camtrapdp_error_prefix_invalid" + merge_camtrapdp(x, y), + class = "camtrapdp_error_identifier_invalid" ) + x$id <- 1 expect_error( - merge_camtrapdp(x, y, prefix = c("one", NA)), - class = "camtrapdp_error_prefix_NA" + merge_camtrapdp(x, y), + class = "camtrapdp_error_identifier_invalid" ) - - expect_no_error(merge_camtrapdp(x, y)) - expect_no_error(merge_camtrapdp(x, y, prefix = c("this", "works"))) - - x$id <- NULL - y$id <- NULL + x$id <- "x" + y$id <- 1 expect_error( merge_camtrapdp(x, y), - class = "camtrapdp_error_prefix_invalid" + class = "camtrapdp_error_identifier_invalid" ) + y$id <- "y" + expect_no_error(merge_camtrapdp(x, y)) }) test_that("merge_camtrapdp() returns unique deploymentIDs, mediaIDs and @@ -107,43 +105,6 @@ test_that("merge_camtrapdp() adds default prefixes to all values of identifiers expect_true("2_4bb69c45" %in% observations(xy_merged)$eventID) }) -test_that("merge_camtrapdp() adds custom prefixes to all values of identifiers - (deploymentID, mediaID, observationID and eventID) with duplicates - between packages, but not for mediaID = NA", { - skip_if_offline() - x <- example_dataset() - y <- example_dataset() - x$id <- NULL - y$id <- NULL - xy_merged <- merge_camtrapdp(x, y, prefix = c("x", "y")) - - # deploymentID - expect_true("x_00a2c20d" %in% deployments(xy_merged)$deploymentID) - expect_true("y_00a2c20d" %in% deployments(xy_merged)$deploymentID) - expect_true("x_00a2c20d" %in% media(xy_merged)$deploymentID) - expect_true("y_00a2c20d" %in% media(xy_merged)$deploymentID) - expect_true("x_00a2c20d" %in% observations(xy_merged)$deploymentID) - expect_true("y_00a2c20d" %in% observations(xy_merged)$deploymentID) - - # mediaID - expect_true("x_07840dcc" %in% media(xy_merged)$mediaID) - expect_true("y_07840dcc" %in% media(xy_merged)$mediaID) - expect_true("x_07840dcc" %in% observations(xy_merged)$mediaID) - expect_true("y_07840dcc" %in% observations(xy_merged)$mediaID) - expect_false("x_NA" %in% observations(xy_merged)$mediaID) - expect_true(NA %in% observations(xy_merged)$mediaID) - - # observationID - expect_true("x_705e6036" %in% observations(xy_merged)$observationID) - expect_true("y_705e6036" %in% observations(xy_merged)$observationID) - - # eventID - expect_true("x_4bb69c45" %in% media(xy_merged)$eventID) - expect_true("y_4bb69c45" %in% media(xy_merged)$eventID) - expect_true("x_4bb69c45" %in% observations(xy_merged)$eventID) - expect_true("y_4bb69c45" %in% observations(xy_merged)$eventID) -}) - test_that("merge_camtrapdp() adds default prefixes to the names of additional resources that are not unique and not required by Camera Trap Data Package standard", { @@ -244,7 +205,7 @@ test_that("merge_camtrapdp() returns the expected metadata ", { }) test_that("merge_camtrapdp() returns the expected metadata when merging two - different Data Packages", { + different Data Packages", { skip_if_offline() x <- example_dataset() @@ -257,6 +218,7 @@ test_that("merge_camtrapdp() returns the expected metadata when merging two download.file(url, zip_file, mode = 'wb', quiet = TRUE) unzip(zip_file, exdir = temp_dir) y <- read_camtrapdp(datapackage_file) + y$id <- "y" # Merge xy_merged <- merge_camtrapdp(x, y) @@ -496,6 +458,12 @@ test_that("merge_camtrapdp() returns the expected metadata when merging two relatedIdentifier = "7cca70f5-ef8c-4f86-85fb-8f070937d7ab", resourceTypeGeneral = "Data package", relatedIdentifierType = "id" + ), + list( + relationType = "IsDerivedFrom", + relatedIdentifier = "y", + resourceTypeGeneral = "Data package", + relatedIdentifierType = "id" ) ) @@ -525,27 +493,3 @@ test_that("merge_camtrapdp() returns the expected metadata when merging two expect_identical(xy_merged$directory, ".") expect_identical(xy_merged$relatedIdentifiers, relatedIdentifiers_merged) }) - -test_that("merge_camtrapdp() can be used in a pipe to merge multiple - camtrap DP", { - skip_if_offline() - - temp_dir <- tempdir() - on.exit(unlink(temp_dir, recursive = TRUE)) - zip_file <- file.path(temp_dir, "dataset.zip") - datapackage_file <- file.path(temp_dir, "datapackage.json") - url <- "https://ipt.nlbif.nl/archive.do?r=awd_pilot2" - - download.file(url, zip_file, mode = 'wb', quiet = TRUE) - unzip(zip_file, exdir = temp_dir) - - x <- read_camtrapdp(datapackage_file) - y <- example_dataset() %>% - filter_deployments(deploymentID %in% c("00a2c20d", "29b7d356")) - z <- example_dataset() %>% - filter_deployments(deploymentID %in% c("577b543a", "62c200a9")) - y$id <- "y" - z$id <- "z" - - expect_no_error(x %>% merge_camtrapdp(y) %>% merge_camtrapdp(z)) -}) From 9dacf3f62a5756c91de81fd071003acaba2477b1 Mon Sep 17 00:00:00 2001 From: Peter Desmet Date: Tue, 5 Nov 2024 09:13:09 +0100 Subject: [PATCH 124/142] Print name of dataset + add additional_resources() helper --- R/print.R | 18 +++++++++--------- R/utils.R | 12 ++++++++++++ tests/testthat/test-print.R | 2 +- 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/R/print.R b/R/print.R index bc64e7f5..ef689c0b 100644 --- a/R/print.R +++ b/R/print.R @@ -20,26 +20,26 @@ print.camtrapdp <- function(x, ...) { # check_camtrapdp() not necessary: print only triggered for camtrapdp object # Calculate number of rows for the tables (resources in x$data) - tables_rows <- + tables <- purrr::pluck(x, "data") %>% purrr::map(nrow) + tables_length <- length(tables) - # List tables - tables <- names(tables_rows) + # Show name and tables + name <- if (!is.null(x$name)) cli::format_inline("{.val {x$name}} ") else "" cli::cat_line( cli::format_inline( - "A Camera Trap Data Package with {length(tables)} table{?s}{?./:/:}" + "A Camera Trap Data Package {name}with {tables_length} table{?s}{?./:/:}" ) ) purrr::walk2( - names(tables_rows), - tables_rows, + names(tables), + tables, ~ cli::cat_bullet(cli::format_inline("{.x}: {.val {.y}} rows")) ) - # List additional resources (not in x$data), if any - resources <- frictionless::resources(x) - extra_resources <- resources[!resources %in% tables] + # List additional resources, if any + extra_resources <- additional_resources(x) if (length(extra_resources) > 0) { cli::cat_line("") cli::cat_line( diff --git a/R/utils.R b/R/utils.R index f5ebc942..5c9e3d2f 100644 --- a/R/utils.R +++ b/R/utils.R @@ -37,6 +37,18 @@ expand_cols <- function(df, colnames) { return(df) } +#' Lists the names of additional resources in a Camera Trap Data Package +#' +#' @inheritParams print.camtrapdp +#' @return Character vector with the additional resource names. +#' @family helper functions +#' @noRd +additional_resources <- function(x) { + camtrapdp_resource_names <- c("deployments", "media", "observations") + resource_names <- frictionless::resources(x) + resource_names[!resource_names %in% camtrapdp_resource_names] +} + #' Merge additional resources #' #' Merges resources that are different from the required Camera Trap Data diff --git a/tests/testthat/test-print.R b/tests/testthat/test-print.R index 283a887f..a8ee4e70 100644 --- a/tests/testthat/test-print.R +++ b/tests/testthat/test-print.R @@ -16,7 +16,7 @@ test_that("print() informs about the number of tables, their rows and unclass()" expect_output( print(x_no_additional), regexp = paste( - "A Camera Trap Data Package with 3 tables:", + "A Camera Trap Data Package \"camtrap-dp-example-dataset\" with 3 tables:", "* deployments: 4 rows", "* media: 423 rows", "* observations: 549 rows", From 13f6fc8fd092abb81c4b2bc88ca1b4f4cfe96481 Mon Sep 17 00:00:00 2001 From: Peter Desmet Date: Tue, 5 Nov 2024 09:20:03 +0100 Subject: [PATCH 125/142] Use name, not identifier as prefix --- R/merge_camtrapdp.R | 33 +++++++++++++------------- man/merge_camtrapdp.Rd | 4 ++-- tests/testthat/test-merge_camtrapdp.R | 34 +++++++++++++-------------- 3 files changed, 36 insertions(+), 35 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 39a078a6..f5598bb7 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -10,8 +10,8 @@ #' @export #' @section Transformation details: #' -#' Both `x` and `y` must have a unique dataset identifier `x$id` and `y$id`. -#' This identifier is used to prefix identifiers in the data that occur in both +#' Both `x` and `y` must have a unique dataset name `x$name` and `y$name`. +#' This name is used to prefix identifiers in the data that occur in both #' datasets. #' For example: #' - `x` contains `deploymentID`s `c("a", "b")`. @@ -59,30 +59,31 @@ merge_camtrapdp <- function(x, y) { check_camtrapdp(x) check_camtrapdp(y) - # Check identifiers - check_identifier <- function(id, arg) { - if (is.null(id) || is.na(id) || !is.character(id)) { + # Check names + check_name <- function(name, arg) { + if (is.null(name) || is.na(name) || !is.character(name)) { cli::cli_abort( c( - "{.arg {arg}} must have a unique (character) identifier.", - "i" = "Assign one to {.field {arg}$id}." + "{.arg {arg}} must have a unique (character) name.", + "i" = "Assign one to {.field {arg}$name}." ), - class = "camtrapdp_error_identifier_invalid" + class = "camtrapdp_error_name_invalid" ) } } - check_identifier(x$id, "x") - check_identifier(y$id, "y") - if (x$id == y$id) { + check_name(x$name, "x") + check_name(y$name, "y") + if (x$name == y$name) { cli::cli_abort( c( - "{.arg x} and {.arg y} must have different unique identifiers.", - "x" = "{.field x$id} and {.field y$id} currently have the same value: - {.val {x$id}}." + "{.arg x} and {.arg y} must have different unique names.", + "x" = "{.field x$name} and {.field y$name} currently have the same + value: {.val {x$name}}." ), - class = "camtrapdp_error_identifier_duplicated" + class = "camtrapdp_error_name_duplicated" ) } + prefixes <- c(x$name, y$name) # Add prefix to duplicate identifiers in data x_original <- x @@ -99,7 +100,7 @@ merge_camtrapdp <- function(x, y) { xy_merged <- merge_additional_resources(xy_merged, x, y, c(x$id, y$id)) # Merge/update metadata - xy_merged$name <- NA + xy_merged$name <- NULL xy_merged$id <- NULL xy_merged$created <- format(Sys.time(), "%Y-%m-%dT%H:%M:%SZ") xy_merged$title <- NA diff --git a/man/merge_camtrapdp.Rd b/man/merge_camtrapdp.Rd index 3ecd0d17..026c939f 100644 --- a/man/merge_camtrapdp.Rd +++ b/man/merge_camtrapdp.Rd @@ -20,8 +20,8 @@ Merges two Camera Trap Data Package objects into one. \section{Transformation details}{ -Both \code{x} and \code{y} must have a unique dataset identifier \code{x$id} and \code{y$id}. -This identifier is used to prefix identifiers in the data that occur in both +Both \code{x} and \code{y} must have a unique dataset name \code{x$name} and \code{y$name}. +This name is used to prefix identifiers in the data that occur in both datasets. For example: \itemize{ diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index 6c4b83c0..dc399723 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -1,47 +1,47 @@ test_that("merge_camtrapdp() returns a valid camtrapdp object", { skip_if_offline() x <- example_dataset() - y <- example_dataset() - x$id <- "1" - y$id <- "2" + y <- x + x$name <- "x" + y$name <- "y" expect_no_error(check_camtrapdp(merge_camtrapdp(x, y))) }) -test_that("merge_camtrapdp() returns error on duplicate or missing package id", { +test_that("merge_camtrapdp() returns error on duplicate or missing package name", { skip_if_offline() x <- example_dataset() # Duplicate identifiers expect_error( merge_camtrapdp(x, x), - class = "camtrapdp_error_identifier_duplicated" + class = "camtrapdp_error_name_duplicated" ) # Invalid identifier y <- x - x$id <- NULL + x$name <- NULL expect_error( merge_camtrapdp(x, y), - class = "camtrapdp_error_identifier_invalid" + class = "camtrapdp_error_name_invalid" ) - x$id <- NA_character_ + x$name <- NA_character_ expect_error( merge_camtrapdp(x, y), - class = "camtrapdp_error_identifier_invalid" + class = "camtrapdp_error_name_invalid" ) - x$id <- 1 + x$name <- 1 expect_error( merge_camtrapdp(x, y), - class = "camtrapdp_error_identifier_invalid" + class = "camtrapdp_error_name_invalid" ) - x$id <- "x" - y$id <- 1 + x$name <- "x" + y$name <- 1 expect_error( merge_camtrapdp(x, y), - class = "camtrapdp_error_identifier_invalid" + class = "camtrapdp_error_name_invalid" ) - y$id <- "y" + y$name <- "y" expect_no_error(merge_camtrapdp(x, y)) }) @@ -52,8 +52,8 @@ test_that("merge_camtrapdp() returns unique deploymentIDs, mediaIDs and filter_deployments(deploymentID %in% c("00a2c20d", "29b7d356")) y <- example_dataset() %>% filter_deployments(deploymentID %in% c("577b543a", "62c200a9")) - x$id <- "1" - y$id <- "2" + x$name <- "x" + y$name <- "y" xy_merged <- merge_camtrapdp(x, y) deploymentIDs <- purrr::pluck(deployments(xy_merged), "deploymentID") From b36f0b1ef76b54aa56ed23cb4d3bf93c9cbb3a9e Mon Sep 17 00:00:00 2001 From: Peter Desmet Date: Tue, 5 Nov 2024 16:24:08 +0100 Subject: [PATCH 126/142] Create and use utils-merge helpers Remove merge_additional_resources --- R/merge_camtrapdp.R | 22 +++---- R/utils-duplicates.R | 67 -------------------- R/utils-merge.R | 148 +++++++++++++++++++++++++++++++++++++++++++ R/utils.R | 53 ---------------- 4 files changed, 158 insertions(+), 132 deletions(-) delete mode 100644 R/utils-duplicates.R create mode 100644 R/utils-merge.R diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index f5598bb7..503830a8 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -85,19 +85,16 @@ merge_camtrapdp <- function(x, y) { } prefixes <- c(x$name, y$name) - # Add prefix to duplicate identifiers in data - x_original <- x - x <- prefix_identifiers(x, y, x$id) - y <- prefix_identifiers(y, x_original, y$id) - - # Merge Camera Trap DP resources + # Create xy_merged from x xy_merged <- x - deployments(xy_merged) <- dplyr::bind_rows(deployments(x), deployments(y)) - media(xy_merged) <- dplyr::bind_rows(media(x), media(y)) - observations(xy_merged) <- dplyr::bind_rows(observations(x), observations(y)) - # Merge additional resources - xy_merged <- merge_additional_resources(xy_merged, x, y, c(x$id, y$id)) + # Merge resources + xy_merged$resources <- merge_resources(x, y, prefixes) + + # Merge data + deployments(xy_merged) <- merge_deployments(x, y, prefixes) + media(xy_merged) <- merge_media(x, y, prefixes) + observations(xy_merged) <- merge_observations(x, y, prefixes) # Merge/update metadata xy_merged$name <- NULL @@ -145,7 +142,8 @@ merge_camtrapdp <- function(x, y) { xy_merged$references <- unique(c(x$references, y$references)) xy_merged$directory <- "." - xy_merged <- xy_merged %>% + xy_merged <- + xy_merged %>% update_spatial() %>% update_temporal() %>% update_taxonomic() diff --git a/R/utils-duplicates.R b/R/utils-duplicates.R deleted file mode 100644 index bdc33a3c..00000000 --- a/R/utils-duplicates.R +++ /dev/null @@ -1,67 +0,0 @@ -#' Add a prefix to duplicates -#' -#' Compares two character vectors and adds a prefix to any values in `a` that -#' also occur in `b`. -#' -#' @param a,b Character vectors. -#' @param prefix Prefix to add, as `_`. -#' @return `a` with updated values. -#' @family helper functions -#' @noRd -#' @examples -#' a <- c("a", "a", "b", "c", 1, 1, NA) -#' b <- c( "b", 1, 1, "e", NA) -#' prefix_duplicates(a, b, "prefix") -prefix_duplicates <- function(a, b, prefix) { - duplicates <- a %in% b - a_updated <- purrr::map2_chr( - a, - duplicates, - ~ if(.y && !is.na(.x)) paste(prefix, .x, sep = "_") else .x - ) - return(a_updated) -} - -#' Add a prefix to identifiers in Camera Trap Data Package that occur in another -#' -#' Compares two Camera Trap Data Package objects (`x` and `y`) and adds a prefix -#' to any relevant identifiers in `x` that also occur in `y`: -#' - `deploymentID` (in deployments, media and observations) -#' - `mediaID` (in media and observations) -#' - `observationID` (in observations) -#' - `eventID` (in media and observations) -#' -#' @param x,y Camera Trap Data Package objects to compare -#' @param prefix Prefix to add, as `_`. -#' @return `x` with updated identifiers. -#' @family helper functions -#' @noRd -prefix_identifiers <- function(x, y, prefix) { - y_deployment_ids <- purrr::pluck(deployments(y), "deploymentID") - y_media_ids <- purrr::pluck(media(y), "mediaID") - y_observation_ids <- purrr::pluck(observations(y), "observationID") - y_event_ids <- purrr::pluck(observations(y), "eventID") - - deployments(x) <- - deployments(x) %>% - dplyr::mutate( - deploymentID = prefix_duplicates(deploymentID, y_deployment_ids, prefix) - ) - media(x) <- - media(x) %>% - dplyr::mutate( - deploymentID = prefix_duplicates(deploymentID, y_deployment_ids, prefix), - mediaID = prefix_duplicates(mediaID, y_media_ids, prefix), - eventID = prefix_duplicates(eventID, y_event_ids, prefix) - ) - observations(x) <- - observations(x) %>% - dplyr::mutate( - deploymentID = prefix_duplicates(deploymentID, y_deployment_ids, prefix), - observationID = prefix_duplicates(observationID, y_observation_ids, prefix), - mediaID = prefix_duplicates(mediaID, y_media_ids, prefix), - eventID = prefix_duplicates(eventID, y_event_ids, prefix) - ) - - return(x) -} diff --git a/R/utils-merge.R b/R/utils-merge.R new file mode 100644 index 00000000..ce5290d9 --- /dev/null +++ b/R/utils-merge.R @@ -0,0 +1,148 @@ +#' Merge two character vectors and prefix duplicates +#' +#' Merges two character vectors and adds a prefix to any values in `a` that +#' occur in `b` and vice versa. +#' +#' @param a,b Character vectors. +#' @param prefixes Prefixes (e.g. `c("a", "b")`) to add to `a` and `b` values +#' respectively, as `_`. +#' @return `c(a, b)` with prefixed duplicates. +#' @family helper functions +#' @noRd +#' @examples +#' a <- c("k", "l", "m", "n", 1, 1, NA) +#' b <- c( "m", 1, 1, "o", NA) +#' merge_vectors(a, b, prefixes = c("a", "b")) +merge_vectors <- function(a, b, prefixes) { + a_prefix <- prefixes[1] + b_prefix <- prefixes[2] + a_prefixed <- purrr:::map2_chr(a, a %in% b, + ~ if (!is.na(.x) && .y) { paste(a_prefix, .x, sep = "_") } else { .x } + ) + b_prefixed <- purrr:::map2_chr(b, b %in% a, + ~ if (!is.na(.x) && .y) { paste(b_prefix, .x, sep = "_") } else { .x } + ) + c(a_prefixed, b_prefixed) +} + +#' Merge resources +#' +#' Merges the resources of Camera Trap Data Package `x` and with the additional +#' resources of `y`. +#' Resource names that occur in both `x` and `y` get a prefix. +#' @inheritParams merge_camtrapdp +#' @param prefixes Prefixes (e.g. `c("x", "y")`) to add to duplicate values. +#' @return A list with resources. +#' @family helper functions +#' @noRd +merge_resources <- function(x, y, prefixes) { + # Combine all resources of x with the additional resources of y + x_resources <- x$resources + y_resources <- purrr::keep(y$resources, ~ .x$name %in% additional_resources(y)) + resources <- c(x_resources, y_resources) + + # Prefix duplicate resource names + resource_names <- merge_vectors( + purrr::map_chr(x_resources, "name"), + purrr::map_chr(y_resources, "name"), + prefixes + ) + update_name <- function(resource, name) { + resource$name <- name + return(resource) + } + purrr::map2(resources, resource_names, update_name) +} + +#' Merge deployments +#' +#' Merges the deployments of Camera Trap Data Packages `x` and `y`. +#' Values in `deploymentID` that occur in both `x` and `y` get a prefix. +#' @inheritParams merge_camtrapdp +#' @param prefixes Prefixes (e.g. `c("x", "y")`) to add to duplicate values. +#' @return A [tibble::tibble()] data frame with deployments. +#' @family helper functions +#' @noRd +merge_deployments <- function(x, y, prefixes) { + x_deployments <- deployments(x) + y_deployments <- deployments(y) + dplyr::bind_rows(x_deployments, y_deployments) %>% + dplyr::mutate( + deploymentID = merge_vectors( + purrr::pluck(x_deployments, "deploymentID"), + purrr::pluck(y_deployments, "deploymentID"), + prefixes + ) + ) +} + +#' Merge media +#' +#' Merges the media of Camera Trap Data Packages `x` and `y`. +#' Values in `mediaID`, `deploymentID` or `eventID` that occur in both `x` and +#' `y` get a prefix. +#' @inheritParams merge_camtrapdp +#' @param prefixes Prefixes (e.g. `c("x", "y")`) to add to duplicate values. +#' @return A [tibble::tibble()] data frame with media. +#' @family helper functions +#' @noRd +merge_media <- function(x, y, prefixes) { + x_media <- media(x) + y_media <- media(y) + dplyr::bind_rows(x_media, y_media) %>% + dplyr::mutate( + mediaID = merge_vectors( + purrr::pluck(x_media, "mediaID"), + purrr::pluck(y_media, "mediaID"), + prefixes + ), + deploymentID = merge_vectors( + purrr::pluck(x_media, "deploymentID"), + purrr::pluck(y_media, "deploymentID"), + prefixes + ), + eventID = merge_vectors( + purrr::pluck(x_media, "eventID"), + purrr::pluck(y_media, "eventID"), + prefixes + ) + ) +} + +#' Merge observations +#' +#' Merges the observations of Camera Trap Data Packages `x` and `y`. +#' Values in `observationID`, `deploymentID`, `mediaID` or `eventID` that occur +#' in both `x` and `y` get a prefix. +#' @inheritParams merge_camtrapdp +#' @param prefixes Prefixes (e.g. `c("x", "y")`) to add to duplicate values. +#' @return A [tibble::tibble()] data frame with observations. +#' @family helper functions +#' @noRd +merge_observations <- function(x, y, prefixes) { + x_observations <- observations(x) + y_observations <- observations(y) + dplyr::bind_rows(x_observations, y_observations) %>% + dplyr::mutate( + observationID = merge_vectors( + purrr::pluck(x_observations, "observationID"), + purrr::pluck(y_observations, "observationID"), + prefixes + ), + deploymentID = merge_vectors( + purrr::pluck(x_observations, "deploymentID"), + purrr::pluck(y_observations, "deploymentID"), + prefixes + ), + mediaID = merge_vectors( + purrr::pluck(x_observations, "mediaID"), + purrr::pluck(y_observations, "mediaID"), + prefixes + ), + eventID = merge_vectors( + purrr::pluck(x_observations, "eventID"), + purrr::pluck(y_observations, "eventID"), + prefixes + ) + ) +} diff --git a/R/utils.R b/R/utils.R index 5c9e3d2f..9d1cb246 100644 --- a/R/utils.R +++ b/R/utils.R @@ -49,59 +49,6 @@ additional_resources <- function(x) { resource_names[!resource_names %in% camtrapdp_resource_names] } -#' Merge additional resources -#' -#' Merges resources that are different from the required Camera Trap Data -#' Package resources (deployments, media and observations). Resources with the -#' same name are not combined, but prefixes are added to the resource names. -#' -#' @param xy_merged Merged Camera Trap Data Package -#' @inheritParams merge_camtrapdp -#' -#' @return `xy_merged` Merged Camera Trap Data Package -#' @family helper functions -#' @noRd -merge_additional_resources <- function(xy_merged, x, y, prefix) { - camtrapdp_resources <- c("deployments", "media", "observations") - x_resource_names <- frictionless::resources(x) - y_resource_names <- frictionless::resources(y) - x_additional_resources <- - x_resource_names[!x_resource_names %in% camtrapdp_resources] - y_additional_resources <- - y_resource_names[!y_resource_names %in% camtrapdp_resources] - - all_additional_resources <- c(x_additional_resources, y_additional_resources) - - if (length(all_additional_resources) > 0) { - duplicated_resources <- duplicated(all_additional_resources) - duplicated_names <- all_additional_resources[duplicated_resources] - - # Add prefixes to resource names that are not unique - if (any(duplicated_resources)) { - purrr::map(duplicated_names, function(duplicated_name) { - xy_index <- - which(purrr::map(xy_merged$resources, "name") == duplicated_name) - y_index <- which(purrr::map(y$resources, "name") == duplicated_name) - xy_merged$resources[[xy_index]]$name <- - paste0(prefix[1], "_", duplicated_name) - y$resources[[y_index]]$name <- paste0(prefix[2], "_", duplicated_name) - xy_merged$resources <<- append(xy_merged$resources, y$resources[y_index]) - }) - } - - # Add unique resources from y - y_unique_resources <- - y_additional_resources[!y_additional_resources %in% duplicated_names] - purrr::map(y_unique_resources, function(resource_name) { - index <- which(purrr::map(y$resources, "name") == resource_name) - resource <- y$resources[index] - xy_merged$resources <<- append(xy_merged$resources, resource) - }) - } - - return(xy_merged) -} - #' Normalize list elements #' #' Converts each list element to a named vector with consistent handling of From 78585d0c17b4302a478e518fa5cc434fcad6c57d Mon Sep 17 00:00:00 2001 From: Peter Desmet Date: Tue, 5 Nov 2024 16:29:28 +0100 Subject: [PATCH 127/142] Update tests --- tests/testthat/test-merge_camtrapdp.R | 208 +++++++++++--------------- 1 file changed, 90 insertions(+), 118 deletions(-) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index dc399723..9eac4228 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -8,7 +8,8 @@ test_that("merge_camtrapdp() returns a valid camtrapdp object", { expect_no_error(check_camtrapdp(merge_camtrapdp(x, y))) }) -test_that("merge_camtrapdp() returns error on duplicate or missing package name", { +test_that("merge_camtrapdp() returns error on duplicate or missing dataset + name", { skip_if_offline() x <- example_dataset() @@ -45,100 +46,71 @@ test_that("merge_camtrapdp() returns error on duplicate or missing package name" expect_no_error(merge_camtrapdp(x, y)) }) -test_that("merge_camtrapdp() returns unique deploymentIDs, mediaIDs and - observationIDs", { +test_that("merge_camtrapdp() adds prefixes to identifiers to keep them unique", { skip_if_offline() x <- example_dataset() %>% filter_deployments(deploymentID %in% c("00a2c20d", "29b7d356")) y <- example_dataset() %>% - filter_deployments(deploymentID %in% c("577b543a", "62c200a9")) + filter_deployments(deploymentID %in% c("29b7d356", "577b543a")) x$name <- "x" y$name <- "y" - xy_merged <- merge_camtrapdp(x, y) + xy <- merge_camtrapdp(x, y) - deploymentIDs <- purrr::pluck(deployments(xy_merged), "deploymentID") - mediaIDs <- purrr::pluck(media(xy_merged), "mediaID") - observationIDs <- purrr::pluck(observations(xy_merged), "observationID") - - expect_false(any(duplicated(deploymentIDs))) - expect_false(any(duplicated(mediaIDs))) - expect_false(any(duplicated(observationIDs))) - - expect_true("00a2c20d" %in% deployments(xy_merged)$deploymentID) - expect_true("577b543a" %in% deployments(xy_merged)$deploymentID) -}) - -test_that("merge_camtrapdp() adds default prefixes to all values of identifiers - (deploymentID, mediaID, observationID and eventID) with duplicates - between packages, but not for mediaID = NA", { - skip_if_offline() - x <- example_dataset() - y <- example_dataset() - x$id <- "1" - y$id <- "2" - xy_merged <- merge_camtrapdp(x, y) + # No duplicate primary keys + deployment_ids <- purrr::pluck(deployments(xy), "deploymentID") + media_ids <- purrr::pluck(media(xy), "mediaID") + observation_ids <- purrr::pluck(observations(xy), "observationID") + expect_false(any(duplicated(deployment_ids))) + expect_false(any(duplicated(media_ids))) + expect_false(any(duplicated(observation_ids))) # deploymentID - expect_true("1_00a2c20d" %in% deployments(xy_merged)$deploymentID) - expect_true("2_00a2c20d" %in% deployments(xy_merged)$deploymentID) - expect_true("1_00a2c20d" %in% media(xy_merged)$deploymentID) - expect_true("2_00a2c20d" %in% media(xy_merged)$deploymentID) - expect_true("1_00a2c20d" %in% observations(xy_merged)$deploymentID) - expect_true("2_00a2c20d" %in% observations(xy_merged)$deploymentID) + merged_deployment_ids <- c("00a2c20d", "x_29b7d356", "y_29b7d356", "577b543a") + expect_in(merged_deployment_ids, deployments(xy)$deploymentID) + expect_in(merged_deployment_ids, media(xy)$deploymentID) + expect_in(merged_deployment_ids, observations(xy)$deploymentID) + + # eventID + merged_event_ids <- c("4bb69c45", "x_8f5ffbf2", "y_8f5ffbf2", "5fbf69a4") + expect_in(merged_event_ids, media(xy)$eventID) + expect_in(merged_event_ids, observations(xy)$eventID) # mediaID - expect_true("1_07840dcc" %in% media(xy_merged)$mediaID) - expect_true("2_07840dcc" %in% media(xy_merged)$mediaID) - expect_true("1_07840dcc" %in% observations(xy_merged)$mediaID) - expect_true("2_07840dcc" %in% observations(xy_merged)$mediaID) - expect_false("1_NA" %in% observations(xy_merged)$mediaID) - expect_true(NA %in% observations(xy_merged)$mediaID) + merged_media_ids <- c("07840dcc", "x_3e65dfaa", "y_3e65dfaa", "44201e9e") + expect_in(merged_media_ids, media(xy)$mediaID) + expect_in(merged_media_ids, observations(xy)$mediaID) # observationID - expect_true("1_705e6036" %in% observations(xy_merged)$observationID) - expect_true("2_705e6036" %in% observations(xy_merged)$observationID) - - # eventID - expect_true("1_4bb69c45" %in% media(xy_merged)$eventID) - expect_true("2_4bb69c45" %in% media(xy_merged)$eventID) - expect_true("1_4bb69c45" %in% observations(xy_merged)$eventID) - expect_true("2_4bb69c45" %in% observations(xy_merged)$eventID) + merged_observation_ids <- c("705e6036", "x_ef2f7140", "y_ef2f7140", "d350d2bc") + expect_in(merged_observation_ids, observations(xy)$observationID) }) -test_that("merge_camtrapdp() adds default prefixes to the names of - additional resources that are not unique and not required by Camera - Trap Data Package standard", { +test_that("merge_camtrapdp() adds prefixes to additional resources to keep them + unique", { skip_if_offline() x <- example_dataset() - y <- example_dataset() - x$id <- "1" - y$id <- "2" - x$resources <- append( - y$resources, - list(list( - name = "annotations", - data = list(id = 1L, comment = "albino fox")) - )) - y$resources <- append( - y$resources, - list(list(name = "foo", description = "blabla"))) - xy_merged <- merge_camtrapdp(x, y) - - resource_names <- purrr::map(xy_merged$resources, ~ .[["name"]]) %>% unlist() - expected_names <- c( - "deployments", "media", "observations", "1_individuals", "annotations", - "2_individuals", "foo") - - expect_identical(resource_names, expected_names) + y <- x + y <- frictionless::add_resource(y, "iris", iris) + x$name <- "x" + y$name <- "y" + xy <- merge_camtrapdp(x, y) + + expect_identical( + frictionless::resources(xy), + c( + "deployments", "media", "observations", "x_individuals", "y_individuals", + "iris" + ) + ) }) test_that("merge_camtrapdp() returns the expected metadata ", { skip_if_offline() x <- example_dataset() y <- example_dataset() - x$id <- "1" - y$id <- "2" - xy_merged <- merge_camtrapdp(x, y) + x$name <- "x" + y$name <- "y" + xy <- merge_camtrapdp(x, y) # Can't compare with x$licenses because remove_duplicates switches order of # subelements @@ -147,30 +119,30 @@ test_that("merge_camtrapdp() returns the expected metadata ", { list(scope = "media", path = "http://creativecommons.org/licenses/by/4.0/")) # Check metadata - expect_equal(length(xy_merged$resources), 5) - expect_identical(xy_merged$profile, "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/camtrap-dp-profile.json") - expect_identical(xy_merged$name, NA) - expect_identical(xy_merged$id, NULL) - expect_identical(xy_merged$title, NA) - expect_identical(xy_merged$contributors, x$contributors) + expect_equal(length(xy$resources), 5) + expect_identical(xy$profile, "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/camtrap-dp-profile.json") + expect_identical(xy$name, NA) + expect_identical(xy$id, NULL) + expect_identical(xy$title, NA) + expect_identical(xy$contributors, x$contributors) expect_identical( - xy_merged$description, + xy$description, paste(x$description, y$description, sep = "/n") ) - expect_identical(xy_merged$version, "1.0") - expect_identical(xy_merged$keywords, x$keywords) - expect_identical(xy_merged$image, NULL) - expect_identical(xy_merged$homepage, NULL) - expect_identical(xy_merged$sources, x$sources) - expect_identical(xy_merged$licenses, licenses) - expect_identical(xy_merged$bibliographicCitation, NULL) - expect_identical(xy_merged$project, list(x$project, y$project)) - expect_identical(xy_merged$coordinatePrecision, x$coordinatePrecision) - expect_identical(xy_merged$spatial, x$spatial) - expect_identical(xy_merged$temporal, x$temporal) - expect_identical(xy_merged$taxonomic, x$taxonomic) - expect_identical(xy_merged$references, x$references) - expect_identical(xy_merged$directory, ".") + expect_identical(xy$version, "1.0") + expect_identical(xy$keywords, x$keywords) + expect_identical(xy$image, NULL) + expect_identical(xy$homepage, NULL) + expect_identical(xy$sources, x$sources) + expect_identical(xy$licenses, licenses) + expect_identical(xy$bibliographicCitation, NULL) + expect_identical(xy$project, list(x$project, y$project)) + expect_identical(xy$coordinatePrecision, x$coordinatePrecision) + expect_identical(xy$spatial, x$spatial) + expect_identical(xy$temporal, x$temporal) + expect_identical(xy$taxonomic, x$taxonomic) + expect_identical(xy$references, x$references) + expect_identical(xy$directory, ".") relatedIdentifiers_merged <- list( list( @@ -199,7 +171,7 @@ test_that("merge_camtrapdp() returns the expected metadata ", { ) ) - expect_identical(xy_merged$relatedIdentifiers, relatedIdentifiers_merged) + expect_identical(xy$relatedIdentifiers, relatedIdentifiers_merged) # Check data }) @@ -218,10 +190,10 @@ test_that("merge_camtrapdp() returns the expected metadata when merging two download.file(url, zip_file, mode = 'wb', quiet = TRUE) unzip(zip_file, exdir = temp_dir) y <- read_camtrapdp(datapackage_file) - y$id <- "y" + y$name <- "y" # Merge - xy_merged <- merge_camtrapdp(x, y) + xy <- merge_camtrapdp(x, y) # Check metadata profile <- "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/camtrap-dp-profile.json" @@ -467,29 +439,29 @@ test_that("merge_camtrapdp() returns the expected metadata when merging two ) ) - expect_identical(xy_merged$resources, x$resources) - expect_identical(xy_merged$profile, profile) - expect_identical(xy_merged$name, NA) - expect_identical(xy_merged$id, NULL) - expect_identical(xy_merged$title, NA) - expect_identical(xy_merged$contributors, contributors) + expect_identical(xy$resources, x$resources) + expect_identical(xy$profile, profile) + expect_identical(xy$name, NA) + expect_identical(xy$id, NULL) + expect_identical(xy$title, NA) + expect_identical(xy$contributors, contributors) expect_identical( - xy_merged$description, + xy$description, paste(x$description, y$description, sep = "/n") ) - expect_identical(xy_merged$version, "1.0") - expect_identical(xy_merged$keywords, c(x$keywords, y$keywords)) - expect_identical(xy_merged$image, NULL) - expect_identical(xy_merged$homepage, NULL) - expect_identical(xy_merged$sources, sources) - expect_identical(xy_merged$licenses, licenses) - expect_identical(xy_merged$bibliographicCitation, NULL) - expect_identical(xy_merged$project, list(x$project, y$project)) - expect_identical(xy_merged$coordinatePrecision, coordinatePrecision) - expect_identical(xy_merged$spatial, spatial) - expect_identical(xy_merged$temporal, temporal) - expect_identical(xy_merged$taxonomic, taxonomic) - expect_identical(xy_merged$references, references) - expect_identical(xy_merged$directory, ".") - expect_identical(xy_merged$relatedIdentifiers, relatedIdentifiers_merged) + expect_identical(xy$version, "1.0") + expect_identical(xy$keywords, c(x$keywords, y$keywords)) + expect_identical(xy$image, NULL) + expect_identical(xy$homepage, NULL) + expect_identical(xy$sources, sources) + expect_identical(xy$licenses, licenses) + expect_identical(xy$bibliographicCitation, NULL) + expect_identical(xy$project, list(x$project, y$project)) + expect_identical(xy$coordinatePrecision, coordinatePrecision) + expect_identical(xy$spatial, spatial) + expect_identical(xy$temporal, temporal) + expect_identical(xy$taxonomic, taxonomic) + expect_identical(xy$references, references) + expect_identical(xy$directory, ".") + expect_identical(xy$relatedIdentifiers, relatedIdentifiers_merged) }) From db04d9e0120bac6c13dd8ce78fd3018e892226ea Mon Sep 17 00:00:00 2001 From: Peter Desmet Date: Thu, 7 Nov 2024 16:32:06 +0100 Subject: [PATCH 128/142] Use snapshot files to see if expected metadata is in merged camtrapdp - This reduces test length - Snapshot is a datapackage.json which is easier to compare - Properties = NULL won't be present - Properties = NA will have NULL - I checked if the snaps correspond with the previous tests --- .../datapackage_different_xy.json | 353 +++++++++++++++ .../datapackage_identical_xy.json | 274 ++++++++++++ tests/testthat/test-merge_camtrapdp.R | 418 +++--------------- 3 files changed, 685 insertions(+), 360 deletions(-) create mode 100644 tests/testthat/_snaps/merge_camtrapdp/datapackage_different_xy.json create mode 100644 tests/testthat/_snaps/merge_camtrapdp/datapackage_identical_xy.json diff --git a/tests/testthat/_snaps/merge_camtrapdp/datapackage_different_xy.json b/tests/testthat/_snaps/merge_camtrapdp/datapackage_different_xy.json new file mode 100644 index 00000000..4d47d527 --- /dev/null +++ b/tests/testthat/_snaps/merge_camtrapdp/datapackage_different_xy.json @@ -0,0 +1,353 @@ +{ + "resources": [ + { + "name": "deployments", + "path": "deployments.csv", + "profile": "tabular-data-resource", + "format": "csv", + "mediatype": "text/csv", + "encoding": "utf-8", + "schema": "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/deployments-table-schema.json" + }, + { + "name": "media", + "path": "media.csv", + "profile": "tabular-data-resource", + "format": "csv", + "mediatype": "text/csv", + "encoding": "utf-8", + "schema": "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/media-table-schema.json" + }, + { + "name": "observations", + "path": "observations.csv", + "profile": "tabular-data-resource", + "format": "csv", + "mediatype": "text/csv", + "encoding": "utf-8", + "schema": "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/observations-table-schema.json" + }, + { + "name": "individuals", + "description": "Custom table/resource not part of the Camtrap DP model. Included to showcase that extending with more resources is possible.", + "data": [ + { + "id": 1, + "individualName": "Reinaert", + "scientificName": "Vulpes vulpes" + } + ] + } + ], + "profile": "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/camtrap-dp-profile.json", + "name": null, + "created": "2024-11-07T16:26:01Z", + "title": null, + "contributors": [ + { + "title": "Axel Neukermans", + "email": "axel.neukermans@inbo.be", + "path": "https://orcid.org/0000-0003-0272-9180", + "role": "contributor", + "organization": "Research Institute for Nature and Forest (INBO)" + }, + { + "title": "Danny Van der beeck", + "email": "daniel.vanderbeeck@gmail.com" + }, + { + "title": "Emma Cartuyvels", + "email": "emma.cartuyvels@inbo.be", + "role": "principalInvestigator", + "organization": "Research Institute for Nature and Forest (INBO)" + }, + { + "title": "Peter Desmet", + "email": "peter.desmet@inbo.be", + "path": "https://orcid.org/0000-0002-8442-8025", + "role": "contact", + "organization": "Research Institute for Nature and Forest (INBO)" + }, + { + "title": "Research Institute for Nature and Forest (INBO)", + "path": "https://inbo.be", + "role": "rightsHolder" + }, + { + "title": "Research Institute for Nature and Forest (INBO)", + "path": "https://inbo.be", + "role": "publisher" + }, + { + "title": "Julian Evans", + "email": "jevansbio@gmail.com", + "role": "principalInvestigator", + "organization": "University of Amsterdam", + "firstName": "Julian", + "lastName": "Evans" + }, + { + "title": "Rotem Zilber", + "email": "r.kadanzilber@uva.nl", + "role": "principalInvestigator", + "organization": "University of Amsterdam", + "firstName": "Rotem", + "lastName": "Zilber" + }, + { + "title": "W. Daniel Kissling", + "email": "wdkissling@gmail.com", + "path": "https://www.danielkissling.de/", + "role": "principalInvestigator", + "organization": "University of Amsterdam", + "firstName": "W. Daniel ", + "lastName": "Kissling" + } + ], + "description": "MICA - Muskrat and coypu camera trap observations in Belgium, the Netherlands and Germany is an occurrence dataset published by the Research Institute of Nature and Forest (INBO). It is part of the LIFE project MICA, in which innovative techniques are tested for a more efficient control of muskrat and coypu populations, both invasive species. This dataset is a sample of the original dataset and serves as an example of a Camera Trap Data Package (Camtrap DP)./nCamera trap pilot 2 was a test of the difference in species detection and data accumulation between a Snyper Commander camera with a regular lens (52°) and one with a wide lens (100°). The cameras were deployed at 30 cm above the ground within the herbivore exclosure Zeeveld Noord in the Amsterdam Water Supply Dunes from 14th of August 2021 to 24th of September 2021. During this pilot, a solar panel failure caused the cameras to stop recording data from the 24th of August 2021 to the 6th of September (14 days). During annotation, only days in which both cameras were operational were annotated. This led to a total of 1,113 images over 28 days from the two cameras. A detailed description of the dataset can be found in a data paper published in the journal Data in Brief (Evans et al. 2024, https://doi.org/10.1016/j.dib.2024.110544).", + "version": "1.0", + "keywords": ["camera traps", "public awareness campaign", "flood protection", "flood control", "damage prevention", "animal damage", "pest control", "invasive alien species", "muskrat", "coypu", "dune ecosystem", "mammals", "exclosure", "camera trap", "biodiversity monitoring", "Natura 2000", "herbivore", "wildlife camera"], + "sources": [ + { + "title": "Agouti", + "path": "https://www.agouti.eu", + "email": "agouti@wur.nl", + "version": "v3.21" + }, + { + "title": "Agouti", + "path": "https://www.agouti.eu", + "email": "agouti@wur.nl", + "version": "v4" + } + ], + "licenses": [ + { + "name": "CC0-1.0", + "scope": "data" + }, + { + "scope": "media", + "path": "http://creativecommons.org/licenses/by/4.0/" + }, + { + "name": "CC-BY-4.0", + "scope": "data" + } + ], + "project": [ + { + "id": "86cabc14-d475-4439-98a7-e7b590bed60e", + "title": "Management of Invasive Coypu and muskrAt in Europe", + "acronym": "MICA", + "description": "Invasive alien species such as the coypu and muskrat pose a major threat to biodiversity and cost millions of euros annually. By feeding on rushes and reeds, these animals cause serious damage to the environment in which they live and endangered species suffer from habitat loss. The disappearance of reeds and digging in dikes represents a safety risk for humans in the lowland areas. With the LIFE project MICA (), the partners from the participating countries want to develop a transnational plan for the management of coypu and muskrat populations in Europe and aim to reduce their population. The objective of an effective population control of coypu and muskrat is to protect lowlands from flooding, to prevent crop damage and loss of biodiversity. The objective of the project is to serve as a pilot and demonstration project in which ‘best practices’ are tested and new techniques are developed for a more efficient control of muskrat and coypu populations. By involving organisations from Belgium, Germany and the Netherlands, the project also promotes international cooperation and knowledge exchange in the field of muskrat and coypu management.", + "samplingDesign": "targeted", + "path": "https://lifemica.eu", + "captureMethod": ["activityDetection", "timeLapse"], + "individualAnimals": false, + "observationLevel": ["media", "event"] + }, + { + "id": "93b1ff03-de48-470e-a997-88d7b33501ce", + "title": "Data from three camera trapping pilots in the Amsterdam Water Supply Dunes of the Netherlands", + "acronym": "ARISE-MDS", + "description": "Three pilot studies were conducted to test the autonomous deployment of wireless 4G wildlife cameras with solar panels and automated data transmission in the coastal dunes of the Amsterdam Water Supply Dunes, The Netherlands. Monitoring and management of grazing mammals such as the European rabbit (Oryctolagus cuniculus) and the European fallow deer (Dama dama) is of key interest in this nature reserve, as they slow down the rate of natural succession and alter plant species composition and vegetation structure through grazing and digging. These grazers as well as predators such as the red fox (Vulpes vulpes) have typically been monitored using traditional survey methods such as transect and areal counts, but only once or twice a year. The study was aimed to test the feasibility and methodology of running long-term autonomous monitoring networks with wildlife cameras. This involved the testing of power usage, data transmission, and data accumulation, and the robustness of cameras to herbivore damage. Furthermore, the pilots specifically tested how the detection of focal species (rabbits, deer, foxes) differs with deployment heights, camera lens types and the placement in different habitats. The pilot also provides labelled images for the development of deep learning algorithms to automatically identify species.\r\n\r\nWork was carried out as part of the development of the monitoring demonstration sites of a large-scale research infrastructure project ARISE.", + "path": "https://www.arise-biodiversity.nl/teammonitoringdemonstration", + "samplingDesign": "opportunistic", + "captureMethod": "activityDetection", + "observationLevel": "event", + "individualAnimals": false + } + ], + "coordinatePrecision": 0.001, + "spatial": { + "type": "Polygon", + "coordinates": [ + [ + [4.013, 50.699], + [5.659, 50.699], + [5.659, 52.356], + [4.013, 52.356], + [4.013, 50.699] + ] + ] + }, + "temporal": { + "start": "2020-05-30", + "end": "2022-03-18" + }, + "taxonomic": [ + { + "scientificName": "Anas platyrhynchos", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/DGP6", + "taxonRank": "species", + "family": null, + "order.": null, + "vernacularNames": { + "eng": "mallard", + "nld": "wilde eend" + } + }, + { + "scientificName": "Anas strepera", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/DGPL", + "taxonRank": "species", + "family": null, + "order.": null, + "vernacularNames": { + "eng": "gadwall", + "nld": "krakeend" + } + }, + { + "scientificName": "Apodemus sylvaticus", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/FRJJ", + "taxonRank": "species", + "family": "Muridae", + "order.": "Rodentia", + "vernacularNames": { + "eng": "wood mouse", + "nld": "bosmuis" + } + }, + { + "scientificName": "Ardea", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/32FH", + "taxonRank": "genus", + "family": null, + "order.": null, + "vernacularNames": { + "eng": "great herons", + "nld": "reigers" + } + }, + { + "scientificName": "Ardea cinerea", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/GCHS", + "taxonRank": "species", + "family": null, + "order.": null, + "vernacularNames": { + "eng": "grey heron", + "nld": "blauwe reiger" + } + }, + { + "scientificName": "Aves", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/V2", + "taxonRank": "class", + "family": null, + "order.": null, + "vernacularNames": { + "eng": "bird sp.", + "nld": "vogel" + } + }, + { + "scientificName": "Corvus corone", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/YNHJ", + "taxonRank": "species", + "family": "Corvidae", + "order.": "Passeriformes", + "vernacularNames": { + "eng": "carrion crow", + "nld": "zwarte kraai" + } + }, + { + "scientificName": "Homo sapiens", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/6MB3T", + "taxonRank": "species", + "family": "Hominidae", + "order.": "Primates", + "vernacularNames": { + "eng": "human", + "nld": "mens" + } + }, + { + "scientificName": "Martes foina", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/3Y9VW", + "taxonRank": "species", + "family": null, + "order.": null, + "vernacularNames": { + "eng": "beech marten", + "nld": "steenmarter" + } + }, + { + "scientificName": "Mustela putorius", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/44QYC", + "taxonRank": "species", + "family": null, + "order.": null, + "vernacularNames": { + "eng": "European polecat", + "nld": "bunzing" + } + }, + { + "scientificName": "Oryctolagus cuniculus", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/74ZBP", + "taxonRank": "species", + "family": "Leporidae", + "order.": "Lagomorpha", + "vernacularNames": { + "eng": "European rabbit", + "nld": "Europees konijn" + } + }, + { + "scientificName": "Rattus norvegicus", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/4RM67", + "taxonRank": "species", + "family": null, + "order.": null, + "vernacularNames": { + "eng": "brown rat", + "nld": "bruine rat" + } + }, + { + "scientificName": "Vulpes vulpes", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/5BSG3", + "taxonRank": "species", + "family": "Canidae", + "order.": "Carnivora", + "vernacularNames": { + "eng": "red fox", + "nld": "vos" + } + } + ], + "relatedIdentifiers": [ + { + "relationType": "IsDerivedFrom", + "relatedIdentifier": "https://doi.org/10.15468/5tb6ze", + "resourceTypeGeneral": "Dataset", + "relatedIdentifierType": "DOI" + }, + { + "relationType": "IsSupplementTo", + "relatedIdentifier": "https://inbo.github.io/camtraptor/", + "resourceTypeGeneral": "Software", + "relatedIdentifierType": "URL" + }, + { + "relationType": "IsPublishedIn", + "relatedIdentifier": "https://doi.org/10.1016/j.dib.2024.110544", + "resourceTypeGeneral": "DataPaper", + "relatedIdentifierType": "DOI" + }, + { + "relationType": "IsDerivedFrom", + "relatedIdentifier": "7cca70f5-ef8c-4f86-85fb-8f070937d7ab", + "resourceTypeGeneral": "Data package", + "relatedIdentifierType": "id" + } + ], + "references": [ + "Evans, J.C., Zilber, R., & Kissling, W.D. (2024). Data from three camera trapping pilots in the Amsterdam Water Supply Dunes of the Netherlands. Data in Brief, 54, 110544. https://doi.org/10.1016/j.dib.2024.110544" + ] +} diff --git a/tests/testthat/_snaps/merge_camtrapdp/datapackage_identical_xy.json b/tests/testthat/_snaps/merge_camtrapdp/datapackage_identical_xy.json new file mode 100644 index 00000000..cc485412 --- /dev/null +++ b/tests/testthat/_snaps/merge_camtrapdp/datapackage_identical_xy.json @@ -0,0 +1,274 @@ +{ + "resources": [ + { + "name": "deployments", + "path": "deployments.csv", + "profile": "tabular-data-resource", + "format": "csv", + "mediatype": "text/csv", + "encoding": "utf-8", + "schema": "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/deployments-table-schema.json" + }, + { + "name": "media", + "path": "media.csv", + "profile": "tabular-data-resource", + "format": "csv", + "mediatype": "text/csv", + "encoding": "utf-8", + "schema": "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/media-table-schema.json" + }, + { + "name": "observations", + "path": "observations.csv", + "profile": "tabular-data-resource", + "format": "csv", + "mediatype": "text/csv", + "encoding": "utf-8", + "schema": "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/observations-table-schema.json" + }, + { + "name": "x_individuals", + "description": "Custom table/resource not part of the Camtrap DP model. Included to showcase that extending with more resources is possible.", + "data": [ + { + "id": 1, + "individualName": "Reinaert", + "scientificName": "Vulpes vulpes" + } + ] + }, + { + "name": "y_individuals", + "description": "Custom table/resource not part of the Camtrap DP model. Included to showcase that extending with more resources is possible.", + "data": [ + { + "id": 1, + "individualName": "Reinaert", + "scientificName": "Vulpes vulpes" + } + ] + } + ], + "profile": "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/camtrap-dp-profile.json", + "name": null, + "created": "2024-11-07T16:25:57Z", + "title": null, + "contributors": [ + { + "title": "Axel Neukermans", + "email": "axel.neukermans@inbo.be", + "path": "https://orcid.org/0000-0003-0272-9180", + "role": "contributor", + "organization": "Research Institute for Nature and Forest (INBO)" + }, + { + "title": "Danny Van der beeck", + "email": "daniel.vanderbeeck@gmail.com" + }, + { + "title": "Emma Cartuyvels", + "email": "emma.cartuyvels@inbo.be", + "role": "principalInvestigator", + "organization": "Research Institute for Nature and Forest (INBO)" + }, + { + "title": "Peter Desmet", + "email": "peter.desmet@inbo.be", + "path": "https://orcid.org/0000-0002-8442-8025", + "role": "contact", + "organization": "Research Institute for Nature and Forest (INBO)" + }, + { + "title": "Research Institute for Nature and Forest (INBO)", + "path": "https://inbo.be", + "role": "rightsHolder" + }, + { + "title": "Research Institute for Nature and Forest (INBO)", + "path": "https://inbo.be", + "role": "publisher" + } + ], + "description": "MICA - Muskrat and coypu camera trap observations in Belgium, the Netherlands and Germany is an occurrence dataset published by the Research Institute of Nature and Forest (INBO). It is part of the LIFE project MICA, in which innovative techniques are tested for a more efficient control of muskrat and coypu populations, both invasive species. This dataset is a sample of the original dataset and serves as an example of a Camera Trap Data Package (Camtrap DP)./nMICA - Muskrat and coypu camera trap observations in Belgium, the Netherlands and Germany is an occurrence dataset published by the Research Institute of Nature and Forest (INBO). It is part of the LIFE project MICA, in which innovative techniques are tested for a more efficient control of muskrat and coypu populations, both invasive species. This dataset is a sample of the original dataset and serves as an example of a Camera Trap Data Package (Camtrap DP).", + "version": "1.0", + "keywords": ["camera traps", "public awareness campaign", "flood protection", "flood control", "damage prevention", "animal damage", "pest control", "invasive alien species", "muskrat", "coypu"], + "sources": [ + { + "title": "Agouti", + "path": "https://www.agouti.eu", + "email": "agouti@wur.nl", + "version": "v3.21" + } + ], + "licenses": [ + { + "name": "CC0-1.0", + "scope": "data" + }, + { + "scope": "media", + "path": "http://creativecommons.org/licenses/by/4.0/" + } + ], + "project": [ + { + "id": "86cabc14-d475-4439-98a7-e7b590bed60e", + "title": "Management of Invasive Coypu and muskrAt in Europe", + "acronym": "MICA", + "description": "Invasive alien species such as the coypu and muskrat pose a major threat to biodiversity and cost millions of euros annually. By feeding on rushes and reeds, these animals cause serious damage to the environment in which they live and endangered species suffer from habitat loss. The disappearance of reeds and digging in dikes represents a safety risk for humans in the lowland areas. With the LIFE project MICA (), the partners from the participating countries want to develop a transnational plan for the management of coypu and muskrat populations in Europe and aim to reduce their population. The objective of an effective population control of coypu and muskrat is to protect lowlands from flooding, to prevent crop damage and loss of biodiversity. The objective of the project is to serve as a pilot and demonstration project in which ‘best practices’ are tested and new techniques are developed for a more efficient control of muskrat and coypu populations. By involving organisations from Belgium, Germany and the Netherlands, the project also promotes international cooperation and knowledge exchange in the field of muskrat and coypu management.", + "samplingDesign": "targeted", + "path": "https://lifemica.eu", + "captureMethod": ["activityDetection", "timeLapse"], + "individualAnimals": false, + "observationLevel": ["media", "event"] + }, + { + "id": "86cabc14-d475-4439-98a7-e7b590bed60e", + "title": "Management of Invasive Coypu and muskrAt in Europe", + "acronym": "MICA", + "description": "Invasive alien species such as the coypu and muskrat pose a major threat to biodiversity and cost millions of euros annually. By feeding on rushes and reeds, these animals cause serious damage to the environment in which they live and endangered species suffer from habitat loss. The disappearance of reeds and digging in dikes represents a safety risk for humans in the lowland areas. With the LIFE project MICA (), the partners from the participating countries want to develop a transnational plan for the management of coypu and muskrat populations in Europe and aim to reduce their population. The objective of an effective population control of coypu and muskrat is to protect lowlands from flooding, to prevent crop damage and loss of biodiversity. The objective of the project is to serve as a pilot and demonstration project in which ‘best practices’ are tested and new techniques are developed for a more efficient control of muskrat and coypu populations. By involving organisations from Belgium, Germany and the Netherlands, the project also promotes international cooperation and knowledge exchange in the field of muskrat and coypu management.", + "samplingDesign": "targeted", + "path": "https://lifemica.eu", + "captureMethod": ["activityDetection", "timeLapse"], + "individualAnimals": false, + "observationLevel": ["media", "event"] + } + ], + "coordinatePrecision": 0.001, + "spatial": { + "type": "Polygon", + "coordinates": [ + [ + [4.013, 50.699], + [5.659, 50.699], + [5.659, 51.496], + [4.013, 51.496], + [4.013, 50.699] + ] + ] + }, + "temporal": { + "start": "2020-05-30", + "end": "2021-04-18" + }, + "taxonomic": [ + { + "scientificName": "Anas platyrhynchos", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/DGP6", + "taxonRank": "species", + "vernacularNames": { + "eng": "mallard", + "nld": "wilde eend" + } + }, + { + "scientificName": "Anas strepera", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/DGPL", + "taxonRank": "species", + "vernacularNames": { + "eng": "gadwall", + "nld": "krakeend" + } + }, + { + "scientificName": "Ardea", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/32FH", + "taxonRank": "genus", + "vernacularNames": { + "eng": "great herons", + "nld": "reigers" + } + }, + { + "scientificName": "Ardea cinerea", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/GCHS", + "taxonRank": "species", + "vernacularNames": { + "eng": "grey heron", + "nld": "blauwe reiger" + } + }, + { + "scientificName": "Aves", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/V2", + "taxonRank": "class", + "vernacularNames": { + "eng": "bird sp.", + "nld": "vogel" + } + }, + { + "scientificName": "Homo sapiens", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/6MB3T", + "taxonRank": "species", + "vernacularNames": { + "eng": "human", + "nld": "mens" + } + }, + { + "scientificName": "Martes foina", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/3Y9VW", + "taxonRank": "species", + "vernacularNames": { + "eng": "beech marten", + "nld": "steenmarter" + } + }, + { + "scientificName": "Mustela putorius", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/44QYC", + "taxonRank": "species", + "vernacularNames": { + "eng": "European polecat", + "nld": "bunzing" + } + }, + { + "scientificName": "Rattus norvegicus", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/4RM67", + "taxonRank": "species", + "vernacularNames": { + "eng": "brown rat", + "nld": "bruine rat" + } + }, + { + "scientificName": "Vulpes vulpes", + "taxonID": "https://www.checklistbank.org/dataset/COL2023/taxon/5BSG3", + "taxonRank": "species", + "vernacularNames": { + "eng": "red fox", + "nld": "vos" + } + } + ], + "relatedIdentifiers": [ + { + "relationType": "IsDerivedFrom", + "relatedIdentifier": "https://doi.org/10.15468/5tb6ze", + "resourceTypeGeneral": "Dataset", + "relatedIdentifierType": "DOI" + }, + { + "relationType": "IsSupplementTo", + "relatedIdentifier": "https://inbo.github.io/camtraptor/", + "resourceTypeGeneral": "Software", + "relatedIdentifierType": "URL" + }, + { + "relationType": "IsDerivedFrom", + "relatedIdentifier": "7cca70f5-ef8c-4f86-85fb-8f070937d7ab", + "resourceTypeGeneral": "Data package", + "relatedIdentifierType": "id" + }, + { + "relationType": "IsDerivedFrom", + "relatedIdentifier": "y", + "resourceTypeGeneral": "Data package", + "relatedIdentifierType": "id" + } + ], + "references": [] +} diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index 9eac4228..a4ea9093 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -1,15 +1,15 @@ test_that("merge_camtrapdp() returns a valid camtrapdp object", { skip_if_offline() x <- example_dataset() - y <- x x$name <- "x" + y <- x y$name <- "y" expect_no_error(check_camtrapdp(merge_camtrapdp(x, y))) }) -test_that("merge_camtrapdp() returns error on duplicate or missing dataset - name", { +test_that("merge_camtrapdp() returns error on missing/invalid/duplicate dataset + name(s)", { skip_if_offline() x <- example_dataset() @@ -46,14 +46,18 @@ test_that("merge_camtrapdp() returns error on duplicate or missing dataset expect_no_error(merge_camtrapdp(x, y)) }) -test_that("merge_camtrapdp() adds prefixes to identifiers to keep them unique", { +test_that("merge_camtrapdp() adds prefixes to identifiers in the data to keep + them unique", { skip_if_offline() + + # Datasets with overlapping deployments: a, b and b, c x <- example_dataset() %>% - filter_deployments(deploymentID %in% c("00a2c20d", "29b7d356")) - y <- example_dataset() %>% - filter_deployments(deploymentID %in% c("29b7d356", "577b543a")) + filter_deployments(deploymentID %in% c("00a2c20d", "29b7d356")) # a, b x$name <- "x" + y <- example_dataset() %>% + filter_deployments(deploymentID %in% c("29b7d356", "577b543a")) # b, c y$name <- "y" + xy <- merge_camtrapdp(x, y) # No duplicate primary keys @@ -85,13 +89,15 @@ test_that("merge_camtrapdp() adds prefixes to identifiers to keep them unique", expect_in(merged_observation_ids, observations(xy)$observationID) }) -test_that("merge_camtrapdp() adds prefixes to additional resources to keep them - unique", { +test_that("merge_camtrapdp() adds prefixes to additional resource names to keep + them unique", { skip_if_offline() + + # Datasets with overlapping resources: individuals and individuals, iris x <- example_dataset() + x$name <- "x" y <- x y <- frictionless::add_resource(y, "iris", iris) - x$name <- "x" y$name <- "y" xy <- merge_camtrapdp(x, y) @@ -104,364 +110,56 @@ test_that("merge_camtrapdp() adds prefixes to additional resources to keep them ) }) -test_that("merge_camtrapdp() returns the expected metadata ", { +test_that("merge_camtrapdp() returns the expected datapackage.json when merging + identical datasets", { skip_if_offline() + temp_dir <- tempdir() + on.exit(unlink(temp_dir, recursive = TRUE)) + + # Datasets that are identical except for name and id x <- example_dataset() - y <- example_dataset() x$name <- "x" + y <- x y$name <- "y" + y$id <- "y" xy <- merge_camtrapdp(x, y) - # Can't compare with x$licenses because remove_duplicates switches order of - # subelements - licenses <- list( - list(name = "CC0-1.0", scope = "data"), - list(scope = "media", path = "http://creativecommons.org/licenses/by/4.0/")) - - # Check metadata - expect_equal(length(xy$resources), 5) - expect_identical(xy$profile, "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/camtrap-dp-profile.json") - expect_identical(xy$name, NA) - expect_identical(xy$id, NULL) - expect_identical(xy$title, NA) - expect_identical(xy$contributors, x$contributors) - expect_identical( - xy$description, - paste(x$description, y$description, sep = "/n") + # Write to file + write_camtrapdp(xy, temp_dir) + file.rename( + file.path(temp_dir, "datapackage.json"), + file.path(temp_dir, "datapackage_identical_xy.json") ) - expect_identical(xy$version, "1.0") - expect_identical(xy$keywords, x$keywords) - expect_identical(xy$image, NULL) - expect_identical(xy$homepage, NULL) - expect_identical(xy$sources, x$sources) - expect_identical(xy$licenses, licenses) - expect_identical(xy$bibliographicCitation, NULL) - expect_identical(xy$project, list(x$project, y$project)) - expect_identical(xy$coordinatePrecision, x$coordinatePrecision) - expect_identical(xy$spatial, x$spatial) - expect_identical(xy$temporal, x$temporal) - expect_identical(xy$taxonomic, x$taxonomic) - expect_identical(xy$references, x$references) - expect_identical(xy$directory, ".") - relatedIdentifiers_merged <- list( - list( - relationType = "IsDerivedFrom", - relatedIdentifier = "https://doi.org/10.15468/5tb6ze", - resourceTypeGeneral = "Dataset", - relatedIdentifierType = "DOI" - ), - list( - relationType = "IsSupplementTo", - relatedIdentifier = "https://inbo.github.io/camtraptor/", - resourceTypeGeneral = "Software", - relatedIdentifierType = "URL" - ), - list( - relationType = "IsDerivedFrom", - relatedIdentifier = "1", - resourceTypeGeneral = "Data package", - relatedIdentifierType = "id" - ), - list( - relationType = "IsDerivedFrom", - relatedIdentifier = "2", - resourceTypeGeneral = "Data package", - relatedIdentifierType = "id" - ) - ) - - expect_identical(xy$relatedIdentifiers, relatedIdentifiers_merged) - - # Check data + expect_snapshot_file(file.path(temp_dir, "datapackage_identical_xy.json")) }) -test_that("merge_camtrapdp() returns the expected metadata when merging two - different Data Packages", { - skip_if_offline() - x <- example_dataset() - - # Download second Camera Trap Data package - temp_dir <- tempdir() - on.exit(unlink(temp_dir, recursive = TRUE)) - zip_file <- file.path(temp_dir, "dataset.zip") - datapackage_file <- file.path(temp_dir, "datapackage.json") - url <- "https://ipt.nlbif.nl/archive.do?r=awd_pilot2" - download.file(url, zip_file, mode = 'wb', quiet = TRUE) - unzip(zip_file, exdir = temp_dir) - y <- read_camtrapdp(datapackage_file) - y$name <- "y" - - # Merge - xy <- merge_camtrapdp(x, y) - - # Check metadata - profile <- "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/camtrap-dp-profile.json" - contributors <- list( - list( - title = "Axel Neukermans", - email = "axel.neukermans@inbo.be", - path = "https://orcid.org/0000-0003-0272-9180", - role = "contributor", - organization = "Research Institute for Nature and Forest (INBO)" - ), - list( - title = "Danny Van der beeck", - email = "daniel.vanderbeeck@gmail.com" - ), - list( - title = "Emma Cartuyvels", - email = "emma.cartuyvels@inbo.be", - role = "principalInvestigator", - organization = "Research Institute for Nature and Forest (INBO)" - ), - list( - title = "Peter Desmet", - email = "peter.desmet@inbo.be", - path = "https://orcid.org/0000-0002-8442-8025", - role = "contact", - organization = "Research Institute for Nature and Forest (INBO)" - ), - list( - title = "Research Institute for Nature and Forest (INBO)", - path = "https://inbo.be", - role = "rightsHolder" - ), - list( - title = "Research Institute for Nature and Forest (INBO)", - path = "https://inbo.be", - role = "publisher" - ), - list( - title = "Julian Evans", - email = "jevansbio@gmail.com", - role = "principalInvestigator", - organization = "University of Amsterdam", - firstName = "Julian", - lastName = "Evans" - ), - list( - title = "Rotem Zilber", - email = "r.kadanzilber@uva.nl", - role = "principalInvestigator", - organization = "University of Amsterdam", - firstName = "Rotem", - lastName = "Zilber" - ), - list( - title = "W. Daniel Kissling", - email = "wdkissling@gmail.com", - path = "https://www.danielkissling.de/", - role = "principalInvestigator", - organization = "University of Amsterdam", - firstName = "W. Daniel ", - lastName = "Kissling" - ) - ) - - description <- "MICA - Muskrat and coypu camera trap observations in Belgium, the Netherlands and Germany is an occurrence dataset published by the Research Institute of Nature and Forest (INBO). It is part of the LIFE project MICA, in which innovative techniques are tested for a more efficient control of muskrat and coypu populations, both invasive species. This dataset is a sample of the original dataset and serves as an example of a Camera Trap Data Package (Camtrap DP)./nCamera trap pilot 2 was a test of the difference in species detection and data accumulation between a Snyper Commander camera with a regular lens (52°) and one with a wide lens (100°). The cameras were deployed at 30 cm above the ground within the herbivore exclosure Zeeveld Noord in the Amsterdam Water Supply Dunes from 14th of August 2021 to 24th of September 2021. During this pilot, a solar panel failure caused the cameras to stop recording data from the 24th of August 2021 to the 6th of September (14 days). During annotation, only days in which both cameras were operational were annotated. This led to a total of 1,113 images over 28 days from the two cameras. A detailed description of the dataset can be found in a data paper published in the journal Data in Brief (Evans et al. 2024, https://doi.org/10.1016/j.dib.2024.110544)." - - sources <- list( - list( - title = "Agouti", - path = "https://www.agouti.eu", - email = "agouti@wur.nl", - version = "v3.21" - ), - list( - title = "Agouti", - path = "https://www.agouti.eu", - email = "agouti@wur.nl", - version = "v4" - ) - ) - - licenses <- list( - list(name = "CC0-1.0", scope = "data"), - list(scope = "media", path = "http://creativecommons.org/licenses/by/4.0/"), - list(name = "CC-BY-4.0", scope = "data") - ) - - coordinatePrecision <- 0.001 - - spatial <- list( - type = "Polygon", - coordinates = structure( - c( - 4.013, 5.659, 5.659, 4.013, 4.013, - 50.699, 50.699, 52.35604, 52.35604, 50.699 - ), - dim = c(1L, 5L, 2L) - ) - ) - - temporal <- list(start = "2020-05-30", end = "2022-03-18") - - taxonomic <- list( - list( - scientificName = "Anas platyrhynchos", - taxonID = "https://www.checklistbank.org/dataset/COL2023/taxon/DGP6", - taxonRank = "species", - family = NA_character_, - order. = NA_character_, - vernacularNames = list(eng = "mallard", nld = "wilde eend") - ), - list( - scientificName = "Anas strepera", - taxonID = "https://www.checklistbank.org/dataset/COL2023/taxon/DGPL", - taxonRank = "species", - family = NA_character_, - order. = NA_character_, - vernacularNames = list(eng = "gadwall", nld = "krakeend") - ), - list( - scientificName = "Apodemus sylvaticus", - taxonID = "https://www.checklistbank.org/dataset/COL2023/taxon/FRJJ", - taxonRank = "species", - family = "Muridae", - order. = "Rodentia", - vernacularNames = list(eng = "wood mouse", nld = "bosmuis") - ), - list( - scientificName = "Ardea", - taxonID = "https://www.checklistbank.org/dataset/COL2023/taxon/32FH", - taxonRank = "genus", - family = NA_character_, - order. = NA_character_, - vernacularNames = list(eng = "great herons", nld = "reigers") - ), - list( - scientificName = "Ardea cinerea", - taxonID = "https://www.checklistbank.org/dataset/COL2023/taxon/GCHS", - taxonRank = "species", - family = NA_character_, - order. = NA_character_, - vernacularNames = list(eng = "grey heron", nld = "blauwe reiger") - ), - list( - scientificName = "Aves", - taxonID = "https://www.checklistbank.org/dataset/COL2023/taxon/V2", - taxonRank = "class", - family = NA_character_, - order. = NA_character_, - vernacularNames = list(eng = "bird sp.", nld = "vogel") - ), - list( - scientificName = "Corvus corone", - taxonID = "https://www.checklistbank.org/dataset/COL2023/taxon/YNHJ", - taxonRank = "species", - family = "Corvidae", - order. = "Passeriformes", - vernacularNames = list(eng = "carrion crow", nld = "zwarte kraai") - ), - list( - scientificName = "Homo sapiens", - taxonID = "https://www.checklistbank.org/dataset/COL2023/taxon/6MB3T", - taxonRank = "species", - family = "Hominidae", - order. = "Primates", - vernacularNames = list(eng = "human", nld = "mens") - ), - list( - scientificName = "Martes foina", - taxonID = "https://www.checklistbank.org/dataset/COL2023/taxon/3Y9VW", - taxonRank = "species", - family = NA_character_, - order. = NA_character_, - vernacularNames = list(eng = "beech marten", nld = "steenmarter") - ), - list( - scientificName = "Mustela putorius", - taxonID = "https://www.checklistbank.org/dataset/COL2023/taxon/44QYC", - taxonRank = "species", - family = NA_character_, - order. = NA_character_, - vernacularNames = list(eng = "European polecat", nld = "bunzing") - ), - list( - scientificName = "Oryctolagus cuniculus", - taxonID = "https://www.checklistbank.org/dataset/COL2023/taxon/74ZBP", - taxonRank = "species", - family = "Leporidae", - order. = "Lagomorpha", - vernacularNames = list(eng = "European rabbit", nld = "Europees konijn") - ), - list( - scientificName = "Rattus norvegicus", - taxonID = "https://www.checklistbank.org/dataset/COL2023/taxon/4RM67", - taxonRank = "species", - family = NA_character_, - order. = NA_character_, - vernacularNames = list(eng = "brown rat", nld = "bruine rat") - ), - list( - scientificName = "Vulpes vulpes", - taxonID = "https://www.checklistbank.org/dataset/COL2023/taxon/5BSG3", - taxonRank = "species", - family = "Canidae", - order. = "Carnivora", - vernacularNames = list(eng = "red fox", nld = "vos") - ) - ) - - references <- list("Evans, J.C., Zilber, R., & Kissling, W.D. (2024). Data from three camera trapping pilots in the Amsterdam Water Supply Dunes of the Netherlands. Data in Brief, 54, 110544. https://doi.org/10.1016/j.dib.2024.110544") - - relatedIdentifiers_merged <- list( - list( - relationType = "IsDerivedFrom", - relatedIdentifier = "https://doi.org/10.15468/5tb6ze", - resourceTypeGeneral = "Dataset", - relatedIdentifierType = "DOI" - ), - list( - relationType = "IsSupplementTo", - relatedIdentifier = "https://inbo.github.io/camtraptor/", - resourceTypeGeneral = "Software", - relatedIdentifierType = "URL" - ), - list( - relationType = "IsPublishedIn", - relatedIdentifier = "https://doi.org/10.1016/j.dib.2024.110544", - resourceTypeGeneral = "DataPaper", - relatedIdentifierType = "DOI" - ), - list( - relationType = "IsDerivedFrom", - relatedIdentifier = "7cca70f5-ef8c-4f86-85fb-8f070937d7ab", - resourceTypeGeneral = "Data package", - relatedIdentifierType = "id" - ), - list( - relationType = "IsDerivedFrom", - relatedIdentifier = "y", - resourceTypeGeneral = "Data package", - relatedIdentifierType = "id" - ) - ) - - expect_identical(xy$resources, x$resources) - expect_identical(xy$profile, profile) - expect_identical(xy$name, NA) - expect_identical(xy$id, NULL) - expect_identical(xy$title, NA) - expect_identical(xy$contributors, contributors) - expect_identical( - xy$description, - paste(x$description, y$description, sep = "/n") - ) - expect_identical(xy$version, "1.0") - expect_identical(xy$keywords, c(x$keywords, y$keywords)) - expect_identical(xy$image, NULL) - expect_identical(xy$homepage, NULL) - expect_identical(xy$sources, sources) - expect_identical(xy$licenses, licenses) - expect_identical(xy$bibliographicCitation, NULL) - expect_identical(xy$project, list(x$project, y$project)) - expect_identical(xy$coordinatePrecision, coordinatePrecision) - expect_identical(xy$spatial, spatial) - expect_identical(xy$temporal, temporal) - expect_identical(xy$taxonomic, taxonomic) - expect_identical(xy$references, references) - expect_identical(xy$directory, ".") - expect_identical(xy$relatedIdentifiers, relatedIdentifiers_merged) +test_that("merge_camtrapdp() returns the expected datapackage.json when merging + different datasets", { + skip_if_offline() + temp_dir <- tempdir() + on.exit(unlink(temp_dir, recursive = TRUE)) + + # Datasets that are different: example_dataset + awd_pilot2 + x <- example_dataset() + x$name <- "x" + y_url <- "https://ipt.nlbif.nl/archive.do?r=awd_pilot2" + zip_file <- file.path(temp_dir, "dataset.zip") + datapackage_file <- file.path(temp_dir, "datapackage.json") + download.file(y_url, zip_file, mode = "wb") + unzip(zip_file, exdir = temp_dir) + y <- read_camtrapdp(datapackage_file) + xy <- merge_camtrapdp(x, y) + + # Write to file + temp_dir_merged <- file.path(temp_dir, "merged") + write_camtrapdp(xy, temp_dir_merged) + file.rename( + file.path(temp_dir_merged, "datapackage.json"), + file.path(temp_dir_merged, "datapackage_different_xy.json") + ) + + expect_snapshot_file( + file.path(temp_dir_merged, "datapackage_different_xy.json") + ) }) From 32a9177da460ae2f73d795a970c9f7054eff377b Mon Sep 17 00:00:00 2001 From: Peter Desmet Date: Thu, 7 Nov 2024 16:32:47 +0100 Subject: [PATCH 129/142] Test writing of merged as part of merged tests --- tests/testthat/test-merge_camtrapdp.R | 6 ++++++ tests/testthat/test-write_camtrapdp.R | 25 ------------------------- 2 files changed, 6 insertions(+), 25 deletions(-) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index a4ea9093..93f307b2 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -132,6 +132,9 @@ test_that("merge_camtrapdp() returns the expected datapackage.json when merging ) expect_snapshot_file(file.path(temp_dir, "datapackage_identical_xy.json")) + expect_no_error( + read_camtrapdp(file.path(temp_dir, "datapackage_identical_xy.json")) + ) }) test_that("merge_camtrapdp() returns the expected datapackage.json when merging @@ -162,4 +165,7 @@ test_that("merge_camtrapdp() returns the expected datapackage.json when merging expect_snapshot_file( file.path(temp_dir_merged, "datapackage_different_xy.json") ) + expect_no_error( + read_camtrapdp(file.path(temp_dir_merged, "datapackage_different_xy.json")) + ) }) diff --git a/tests/testthat/test-write_camtrapdp.R b/tests/testthat/test-write_camtrapdp.R index cd3bd68b..d35ce671 100644 --- a/tests/testthat/test-write_camtrapdp.R +++ b/tests/testthat/test-write_camtrapdp.R @@ -29,31 +29,6 @@ test_that("write_camtrapdp() writes a (filtered) dataset that can be read", { expect_lt(nrow(observations(x_written)), nrow(observations(x))) }) -test_that("write_camtrapdp() writes a merged dataset that can be read", { - skip_if_offline() - x <- example_dataset() - - # Download second Camera Trap Data package - temp_dir <- tempdir() - on.exit(unlink(temp_dir, recursive = TRUE)) - zip_file <- file.path(temp_dir, "dataset.zip") - datapackage_file <- file.path(temp_dir, "datapackage.json") - url <- "https://ipt.nlbif.nl/archive.do?r=awd_pilot2" - download.file(url, zip_file, mode = "wb") - unzip(zip_file, exdir = temp_dir) - y <- read_camtrapdp(datapackage_file) - - # Merge - xy_merged <- merge_camtrapdp(x, y) - - # Write - write_camtrapdp(xy_merged, file.path(temp_dir, "processed"), compress = TRUE) - - expect_no_error( - read_camtrapdp(file.path(temp_dir, "processed", "datapackage.json")) - ) -}) - test_that("write_camtrapdp() writes the unaltered example dataset as is", { skip_if_offline() x <- example_dataset() From 0f9a28aac86b69a163bf45db34d2692eebb9fb0a Mon Sep 17 00:00:00 2001 From: Peter Desmet Date: Thu, 7 Nov 2024 16:33:07 +0100 Subject: [PATCH 130/142] Don't use subdirs in tempdir() --- tests/testthat/test-write_camtrapdp.R | 6 +++--- tests/testthat/test-write_dwc.R | 8 ++++---- tests/testthat/test-write_eml.R | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/testthat/test-write_camtrapdp.R b/tests/testthat/test-write_camtrapdp.R index d35ce671..a329a20b 100644 --- a/tests/testthat/test-write_camtrapdp.R +++ b/tests/testthat/test-write_camtrapdp.R @@ -17,7 +17,7 @@ test_that("write_camtrapdp() writes datapackage.json and CSV files to a test_that("write_camtrapdp() writes a (filtered) dataset that can be read", { skip_if_offline() x <- example_dataset() - temp_dir <- file.path(tempdir(), "package") + temp_dir <- tempdir() on.exit(unlink(temp_dir, recursive = TRUE)) # Filter deployments and write to disk write_camtrapdp(filter_deployments(x, deploymentID == "00a2c20d"), temp_dir) @@ -32,7 +32,7 @@ test_that("write_camtrapdp() writes a (filtered) dataset that can be read", { test_that("write_camtrapdp() writes the unaltered example dataset as is", { skip_if_offline() x <- example_dataset() - temp_dir <- file.path(tempdir(), "package") + temp_dir <- tempdir() on.exit(unlink(temp_dir, recursive = TRUE)) write_camtrapdp(x, temp_dir) @@ -81,7 +81,7 @@ test_that("write_camtrapdp() writes the unaltered example dataset as is", { test_that("write_camtrapdp() can write compressed files", { skip_if_offline() x <- example_dataset() - temp_dir <- file.path(tempdir(), "package") + temp_dir <- tempdir() on.exit(unlink(temp_dir, recursive = TRUE)) write_camtrapdp(x, temp_dir, compress = TRUE) diff --git a/tests/testthat/test-write_dwc.R b/tests/testthat/test-write_dwc.R index 3c985659..d09e706e 100644 --- a/tests/testthat/test-write_dwc.R +++ b/tests/testthat/test-write_dwc.R @@ -19,7 +19,7 @@ test_that("write_dwc() writes CSV and meta.xml files to a directory and test_that("write_dwc() returns the expected Darwin Core terms as columns", { skip_if_offline() x <- example_dataset() - temp_dir <- file.path(tempdir(), "dwc") + temp_dir <- tempdir() on.exit(unlink(temp_dir, recursive = TRUE)) result <- suppressMessages(write_dwc(x, temp_dir)) @@ -90,7 +90,7 @@ test_that("write_dwc() returns the expected Darwin Core mapping for the example dataset", { skip_if_offline() x <- example_dataset() - temp_dir <- file.path(tempdir(), "dwc") + temp_dir <- tempdir() on.exit(unlink(temp_dir, recursive = TRUE)) suppressMessages(write_dwc(x, temp_dir)) @@ -102,7 +102,7 @@ test_that("write_dwc() returns the expected Darwin Core mapping for the example test_that("write_dwc() returns files that comply with the info in meta.xml", { skip_if_offline() x <- example_dataset() - temp_dir <- file.path(tempdir(), "dwc") + temp_dir <- tempdir() on.exit(unlink(temp_dir, recursive = TRUE)) suppressMessages(write_dwc(x, temp_dir)) @@ -117,7 +117,7 @@ test_that("write_dwc() returns output when taxonID is missing", { optional_cols <- c("taxon.taxonID") observations(x) <- dplyr::select(observations(x), -dplyr::all_of(optional_cols)) - temp_dir <- file.path(tempdir(), "dwc") + temp_dir <- tempdir() on.exit(unlink(temp_dir, recursive = TRUE)) expect_no_error(suppressMessages(write_dwc(x, temp_dir))) diff --git a/tests/testthat/test-write_eml.R b/tests/testthat/test-write_eml.R index f40bfb49..e581946a 100644 --- a/tests/testthat/test-write_eml.R +++ b/tests/testthat/test-write_eml.R @@ -22,7 +22,7 @@ test_that("write_eml() returns the expected eml.xml file for the example dataset", { skip_if_offline() x <- example_dataset() - temp_dir <- file.path(tempdir(), "eml") + temp_dir <- tempdir() on.exit(unlink(temp_dir, recursive = TRUE)) result <- suppressMessages(write_eml(x, temp_dir)) result$packageId <- "random_uuid" # Overwrite generated UUID @@ -36,7 +36,7 @@ test_that("write_eml() returns the expected eml.xml file for the example test_that("write_eml() supports disabling the derived paragraph", { skip_if_offline() x <- example_dataset() - temp_dir <- file.path(tempdir(), "eml") + temp_dir <- tempdir() on.exit(unlink(temp_dir, recursive = TRUE)) result <- suppressMessages(write_eml(x, temp_dir, derived_paragraph = FALSE)) @@ -49,7 +49,7 @@ test_that("write_eml() sets contact/metadata provider to first creator if none x <- example_dataset() # Remove contributor with role == contact x$contributors[[4]] <- NULL - temp_dir <- file.path(tempdir(), "eml") + temp_dir <- tempdir() on.exit(unlink(temp_dir, recursive = TRUE)) result <- suppressMessages(write_eml(x, temp_dir)) From ece86df21e206dc64dd997874f8ea9795059cce0 Mon Sep 17 00:00:00 2001 From: Peter Desmet Date: Thu, 7 Nov 2024 16:37:15 +0100 Subject: [PATCH 131/142] Correct typo --- R/utils-merge.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utils-merge.R b/R/utils-merge.R index ce5290d9..7b4745a2 100644 --- a/R/utils-merge.R +++ b/R/utils-merge.R @@ -27,7 +27,7 @@ merge_vectors <- function(a, b, prefixes) { #' Merge resources #' -#' Merges the resources of Camera Trap Data Package `x` and with the additional +#' Merges the resources of Camera Trap Data Package `x` with the additional #' resources of `y`. #' Resource names that occur in both `x` and `y` get a prefix. #' @inheritParams merge_camtrapdp From c386affb7609116f66835bf2d02a9c6488211b26 Mon Sep 17 00:00:00 2001 From: Peter Desmet Date: Thu, 7 Nov 2024 16:47:37 +0100 Subject: [PATCH 132/142] Don't keep created in snapshot --- R/merge_camtrapdp.R | 2 +- .../merge_camtrapdp/datapackage_different_xy.json | 1 - .../merge_camtrapdp/datapackage_identical_xy.json | 1 - tests/testthat/test-merge_camtrapdp.R | 13 +++++++------ 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 503830a8..3923b7bb 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -97,7 +97,7 @@ merge_camtrapdp <- function(x, y) { observations(xy_merged) <- merge_observations(x, y, prefixes) # Merge/update metadata - xy_merged$name <- NULL + xy_merged$name <- NA xy_merged$id <- NULL xy_merged$created <- format(Sys.time(), "%Y-%m-%dT%H:%M:%SZ") xy_merged$title <- NA diff --git a/tests/testthat/_snaps/merge_camtrapdp/datapackage_different_xy.json b/tests/testthat/_snaps/merge_camtrapdp/datapackage_different_xy.json index 4d47d527..da7ca147 100644 --- a/tests/testthat/_snaps/merge_camtrapdp/datapackage_different_xy.json +++ b/tests/testthat/_snaps/merge_camtrapdp/datapackage_different_xy.json @@ -41,7 +41,6 @@ ], "profile": "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/camtrap-dp-profile.json", "name": null, - "created": "2024-11-07T16:26:01Z", "title": null, "contributors": [ { diff --git a/tests/testthat/_snaps/merge_camtrapdp/datapackage_identical_xy.json b/tests/testthat/_snaps/merge_camtrapdp/datapackage_identical_xy.json index cc485412..64934c7a 100644 --- a/tests/testthat/_snaps/merge_camtrapdp/datapackage_identical_xy.json +++ b/tests/testthat/_snaps/merge_camtrapdp/datapackage_identical_xy.json @@ -52,7 +52,6 @@ ], "profile": "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/camtrap-dp-profile.json", "name": null, - "created": "2024-11-07T16:25:57Z", "title": null, "contributors": [ { diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index 93f307b2..9aae4cb6 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -50,14 +50,13 @@ test_that("merge_camtrapdp() adds prefixes to identifiers in the data to keep them unique", { skip_if_offline() - # Datasets with overlapping deployments: a, b and b, c + # Merge datasets with overlapping deployments: a, b and b, c x <- example_dataset() %>% filter_deployments(deploymentID %in% c("00a2c20d", "29b7d356")) # a, b x$name <- "x" y <- example_dataset() %>% filter_deployments(deploymentID %in% c("29b7d356", "577b543a")) # b, c y$name <- "y" - xy <- merge_camtrapdp(x, y) # No duplicate primary keys @@ -93,7 +92,7 @@ test_that("merge_camtrapdp() adds prefixes to additional resource names to keep them unique", { skip_if_offline() - # Datasets with overlapping resources: individuals and individuals, iris + # Merge datasets with overlapping resources: individuals and individuals, iris x <- example_dataset() x$name <- "x" y <- x @@ -116,13 +115,14 @@ test_that("merge_camtrapdp() returns the expected datapackage.json when merging temp_dir <- tempdir() on.exit(unlink(temp_dir, recursive = TRUE)) - # Datasets that are identical except for name and id + # Merge datasets that are identical except for name and id x <- example_dataset() x$name <- "x" y <- x y$name <- "y" y$id <- "y" xy <- merge_camtrapdp(x, y) + xy$created <- NULL # Write to file write_camtrapdp(xy, temp_dir) @@ -143,16 +143,17 @@ test_that("merge_camtrapdp() returns the expected datapackage.json when merging temp_dir <- tempdir() on.exit(unlink(temp_dir, recursive = TRUE)) - # Datasets that are different: example_dataset + awd_pilot2 + # Merge datasets that are different: example_dataset + awd_pilot2 x <- example_dataset() x$name <- "x" y_url <- "https://ipt.nlbif.nl/archive.do?r=awd_pilot2" zip_file <- file.path(temp_dir, "dataset.zip") datapackage_file <- file.path(temp_dir, "datapackage.json") - download.file(y_url, zip_file, mode = "wb") + download.file(y_url, zip_file, mode = "wb", quiet = TRUE) unzip(zip_file, exdir = temp_dir) y <- read_camtrapdp(datapackage_file) xy <- merge_camtrapdp(x, y) + xy$created <- NULL # Write to file temp_dir_merged <- file.path(temp_dir, "merged") From 050b099a8120489f05f3d68c713856374cfe0553 Mon Sep 17 00:00:00 2001 From: Peter Desmet Date: Wed, 20 Nov 2024 16:41:38 +0100 Subject: [PATCH 133/142] Change variable name to xy --- R/merge_camtrapdp.R | 53 +++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 3923b7bb..c73f25b0 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -85,34 +85,36 @@ merge_camtrapdp <- function(x, y) { } prefixes <- c(x$name, y$name) - # Create xy_merged from x - xy_merged <- x + # Create xy from x + xy <- x # Merge resources - xy_merged$resources <- merge_resources(x, y, prefixes) + xy$resources <- merge_resources(x, y, prefixes) # Merge data - deployments(xy_merged) <- merge_deployments(x, y, prefixes) - media(xy_merged) <- merge_media(x, y, prefixes) - observations(xy_merged) <- merge_observations(x, y, prefixes) + deployments(xy) <- merge_deployments(x, y, prefixes) + media(xy) <- merge_media(x, y, prefixes) + observations(xy) <- merge_observations(x, y, prefixes) # Merge/update metadata - xy_merged$name <- NA - xy_merged$id <- NULL - xy_merged$created <- format(Sys.time(), "%Y-%m-%dT%H:%M:%SZ") - xy_merged$title <- NA - xy_merged$contributors <- remove_duplicates(c(x$contributors, y$contributors)) - xy_merged$description <- paste(x$description, y$description, sep = "/n") - xy_merged$version <- "1.0" - xy_merged$keywords <- unique(c(x$keywords, y$keywords)) - xy_merged$image <- NULL - xy_merged$homepage <- NULL - xy_merged$sources <- remove_duplicates(c(x$sources, y$sources)) - xy_merged$licenses <- remove_duplicates(c(x$licenses, y$licenses)) - xy_merged$project <- list(x$project, y$project) - xy_merged$bibliographicCitation <- NULL - xy_merged$coordinatePrecision <- + xy$name <- NA + xy$id <- NULL + xy$created <- format(Sys.time(), "%Y-%m-%dT%H:%M:%SZ") + xy$title <- NA + xy$contributors <- remove_duplicates(c(x$contributors, y$contributors)) + xy$description <- paste(x$description, y$description, sep = "/n") + xy$version <- "1.0" + xy$keywords <- unique(c(x$keywords, y$keywords)) + xy$image <- NULL + xy$homepage <- NULL + xy$sources <- remove_duplicates(c(x$sources, y$sources)) + xy$licenses <- remove_duplicates(c(x$licenses, y$licenses)) + xy$project <- list(x$project, y$project) + xy$bibliographicCitation <- NULL + xy$coordinatePrecision <- max(x$coordinatePrecision, y$coordinatePrecision, na.rm = TRUE) + xy$references <- unique(c(x$references, y$references)) + xy$directory <- "." if (!is.null(x$id)) { relatedIdentifiers_x <- list( @@ -139,14 +141,13 @@ merge_camtrapdp <- function(x, y) { c(x$relatedIdentifiers, y$relatedIdentifiers, new_relatedIdentifiers) ) - xy_merged$references <- unique(c(x$references, y$references)) - xy_merged$directory <- "." - xy_merged <- - xy_merged %>% + # Update scopes + xy <- + xy %>% update_spatial() %>% update_temporal() %>% update_taxonomic() - return(xy_merged) + return(xy) } From ade305b73ed9001b63b5e41db5f5b4a81a494c3c Mon Sep 17 00:00:00 2001 From: Peter Desmet Date: Wed, 20 Nov 2024 16:42:30 +0100 Subject: [PATCH 134/142] Set name and title to NULL --- R/merge_camtrapdp.R | 25 ++++++++++--------- man/merge_camtrapdp.Rd | 21 ++++++++-------- .../datapackage_different_xy.json | 2 -- .../datapackage_identical_xy.json | 2 -- 4 files changed, 24 insertions(+), 26 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index c73f25b0..0883ba16 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -27,26 +27,27 @@ #' - Additional resources are retained, with the resource name kept unique. #' #' Metadata properties are merged as follows: -#' - **name**: Set to `NA`. +#' - **name**: Removed. #' - **id**: Removed. #' - **created**: Set to current timestamp. -#' - **title**: Set to `NA`. -#' - **contributors**: A combination is made and duplicates are removed. -#' - **description**: A combination is made. +#' - **title**: Removed. +#' - **contributors**: Combined, with duplicates removed. +#' - **description**: Combined as two paragraphs. #' - **version**: Set to `1.0`. -#' - **keywords**: A combination is made and duplicates are removed. +#' - **keywords**: Combined, with duplicates removed. #' - **image**: Removed. #' - **homepage**: Removed. -#' - **sources**: A combination is made and duplicates are removed. -#' - **licenses**: A combination is made and duplicates are removed. +#' - **sources**: Combined, with duplicates removed. +#' - **licenses**: Combined, with duplicates removed. #' - **bibliographicCitation**: Removed. #' - **project**: List of the projects. #' - **coordinatePrecision**: Set to the least precise `coordinatePrecision`. #' - **spatial**: Reset based on the new deployments. #' - **temporal**: Reset based on the new deployments. -#' - **taxonomic**: A combination is made and duplicates are removed. -#' - **relatedIdentifiers**: A combination is made and duplicates are removed. -#' - **references**: A combination is made and duplicates are removed. +#' - **taxonomic**: Combined, with duplicates removed. +#' - **relatedIdentifiers**: Combined, with duplicates removed. +#' - **references**: Combined, with duplicates removed. +#' - Custom properties of `x` are also retained. #' @examples #' x <- example_dataset() %>% #' filter_deployments(deploymentID %in% c("00a2c20d", "29b7d356")) @@ -97,11 +98,11 @@ merge_camtrapdp <- function(x, y) { observations(xy) <- merge_observations(x, y, prefixes) # Merge/update metadata - xy$name <- NA + xy$name <- NULL xy$id <- NULL xy$created <- format(Sys.time(), "%Y-%m-%dT%H:%M:%SZ") - xy$title <- NA xy$contributors <- remove_duplicates(c(x$contributors, y$contributors)) + xy$title <- NULL xy$description <- paste(x$description, y$description, sep = "/n") xy$version <- "1.0" xy$keywords <- unique(c(x$keywords, y$keywords)) diff --git a/man/merge_camtrapdp.Rd b/man/merge_camtrapdp.Rd index 026c939f..fabb3ad6 100644 --- a/man/merge_camtrapdp.Rd +++ b/man/merge_camtrapdp.Rd @@ -42,26 +42,27 @@ and \code{eventID} kept unique. Metadata properties are merged as follows: \itemize{ -\item \strong{name}: Set to \code{NA}. +\item \strong{name}: Removed. \item \strong{id}: Removed. \item \strong{created}: Set to current timestamp. -\item \strong{title}: Set to \code{NA}. -\item \strong{contributors}: A combination is made and duplicates are removed. -\item \strong{description}: A combination is made. +\item \strong{title}: Removed. +\item \strong{contributors}: Combined, with duplicates removed. +\item \strong{description}: Combined as two paragraphs. \item \strong{version}: Set to \code{1.0}. -\item \strong{keywords}: A combination is made and duplicates are removed. +\item \strong{keywords}: Combined, with duplicates removed. \item \strong{image}: Removed. \item \strong{homepage}: Removed. -\item \strong{sources}: A combination is made and duplicates are removed. -\item \strong{licenses}: A combination is made and duplicates are removed. +\item \strong{sources}: Combined, with duplicates removed. +\item \strong{licenses}: Combined, with duplicates removed. \item \strong{bibliographicCitation}: Removed. \item \strong{project}: List of the projects. \item \strong{coordinatePrecision}: Set to the least precise \code{coordinatePrecision}. \item \strong{spatial}: Reset based on the new deployments. \item \strong{temporal}: Reset based on the new deployments. -\item \strong{taxonomic}: A combination is made and duplicates are removed. -\item \strong{relatedIdentifiers}: A combination is made and duplicates are removed. -\item \strong{references}: A combination is made and duplicates are removed. +\item \strong{taxonomic}: Combined, with duplicates removed. +\item \strong{relatedIdentifiers}: Combined, with duplicates removed. +\item \strong{references}: Combined, with duplicates removed. +\item Custom properties of \code{x} are also retained. } } diff --git a/tests/testthat/_snaps/merge_camtrapdp/datapackage_different_xy.json b/tests/testthat/_snaps/merge_camtrapdp/datapackage_different_xy.json index da7ca147..5bed4b70 100644 --- a/tests/testthat/_snaps/merge_camtrapdp/datapackage_different_xy.json +++ b/tests/testthat/_snaps/merge_camtrapdp/datapackage_different_xy.json @@ -40,8 +40,6 @@ } ], "profile": "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/camtrap-dp-profile.json", - "name": null, - "title": null, "contributors": [ { "title": "Axel Neukermans", diff --git a/tests/testthat/_snaps/merge_camtrapdp/datapackage_identical_xy.json b/tests/testthat/_snaps/merge_camtrapdp/datapackage_identical_xy.json index 64934c7a..f43eb8a1 100644 --- a/tests/testthat/_snaps/merge_camtrapdp/datapackage_identical_xy.json +++ b/tests/testthat/_snaps/merge_camtrapdp/datapackage_identical_xy.json @@ -51,8 +51,6 @@ } ], "profile": "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.1/camtrap-dp-profile.json", - "name": null, - "title": null, "contributors": [ { "title": "Axel Neukermans", From cec0e7948bc074ca456f4d6857f742e14b321057 Mon Sep 17 00:00:00 2001 From: Peter Desmet Date: Wed, 20 Nov 2024 16:45:16 +0100 Subject: [PATCH 135/142] Use simple unique() to detect duplicates + remove helpers --- R/merge_camtrapdp.R | 7 +- R/utils.R | 163 ------------------ .../datapackage_different_xy.json | 14 +- .../datapackage_identical_xy.json | 4 +- 4 files changed, 14 insertions(+), 174 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 0883ba16..487f7021 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -101,19 +101,20 @@ merge_camtrapdp <- function(x, y) { xy$name <- NULL xy$id <- NULL xy$created <- format(Sys.time(), "%Y-%m-%dT%H:%M:%SZ") - xy$contributors <- remove_duplicates(c(x$contributors, y$contributors)) xy$title <- NULL + xy$contributors <- unique(c(x$contributors, y$contributors)) xy$description <- paste(x$description, y$description, sep = "/n") xy$version <- "1.0" xy$keywords <- unique(c(x$keywords, y$keywords)) xy$image <- NULL xy$homepage <- NULL - xy$sources <- remove_duplicates(c(x$sources, y$sources)) - xy$licenses <- remove_duplicates(c(x$licenses, y$licenses)) xy$project <- list(x$project, y$project) + xy$sources <- unique(c(x$sources, y$sources)) + xy$licenses <- unique(c(x$licenses, y$licenses)) xy$bibliographicCitation <- NULL xy$coordinatePrecision <- max(x$coordinatePrecision, y$coordinatePrecision, na.rm = TRUE) + xy$relatedIdentifiers <- unique(c(x$relatedIdentifiers, y$relatedIdentifiers)) xy$references <- unique(c(x$references, y$references)) xy$directory <- "." diff --git a/R/utils.R b/R/utils.R index 9d1cb246..6e985a93 100644 --- a/R/utils.R +++ b/R/utils.R @@ -49,169 +49,6 @@ additional_resources <- function(x) { resource_names[!resource_names %in% camtrapdp_resource_names] } -#' Normalize list elements -#' -#' Converts each list element to a named vector with consistent handling of -#' missing values (NA), using determined `unique_names`. -#' -#' @param data_list list to be normalized. -#' @param unique_names the names that the list must have. -#' @return named vector with all `unique_names` present. -#' @family helper functions -#' @noRd -#' @examples -#' data_list <- list( -#' title = "Peter Desmet", -#' email = "peter.desmet@inbo.be", -#' organization = "Research Institute for Nature and Forest (INBO)" -#' ) -#' unique_names <- c("title", "email", "path", "role", "organization") -#' normalize_list(data_list, unique_names) -normalize_list <- function(data_list, unique_names) { - vector <- purrr::map_vec( - unique_names, - ~ ifelse(!is.null(data_list[[.x]]), data_list[[.x]], NA) - ) - names(vector) <- unique_names - return(vector) -} - -#' Check if one element is equal to or a subset of another and vice versa -#' -#' @param element1,element2 elements to compare. -#' @return logical. -#' @family helper functions -#' @noRd -#' @examples -#' element1 <- list( -#' title = "Peter Desmet", -#' email = "peter.desmet@inbo.be", -#' organization = "Research Institute for Nature and Forest (INBO)" -#' ) -#' element2 <- list( -#' title = "Peter Desmet", -#' email = "peter.desmet@inbo.be", -#' path = "https://orcid.org/0000-0002-8442-8025", -#' role = "principalInvestigator", -#' organization = "Research Institute for Nature and Forest (INBO)" -#' ) -#' is_subset(element1, element2) -is_subset <- function(element1, element2) { - all( - purrr::map_vec(names(element1), function(field) { - if (is.na(element1[[field]])) { - TRUE - } else if (is.na(element2[[field]])) { - TRUE - } else { - element1[[field]] == element2[[field]] - } - }) - ) -} - -#' Update a list of unique elements -#' -#' Updates a list of unique elements by adding a new element if it is not a -#' subset of any existing element in the list. It also removes any elements that -#' are subsets of the new element. -#' -#' @param unique_data A list of elements. Each element must be a vector or -#' list. -#' @param current_element A vector or list representing the current element to -#' be added to the list. -#' @return `unique_data`, a list of unique elements updated with the current -#' element, ensuring no element is a subset of another. -#' @family helper functions -#' @noRd -#' @examples -#' unique_data <- list(c(1, 2, 3), c(4, 5), c(1, 2, 3, 4, 5)) -#' current_element <- c(2, 3) -#' update_unique(unique_data, current_element) -update_unique <- function(unique_data, current_element) { - # Check if current element is already a subset of any element in unique_data - is_already_present <- - any( - purrr::map_lgl(unique_data, ~ is_subset(current_element, .x)) - ) - if (!is_already_present) { - # Remove subsets from unique_data - subsets_to_remove <- - purrr::map_lgl(unique_data, ~ is_subset(.x, current_element)) - unique_data <- - unique_data[!subsets_to_remove] %>% - c(list(current_element)) - } - return(unique_data) -} - -#' Remove duplicates and subsets -#' -#' Removes duplicate and subset elements from a list of lists. Elements are -#' considered subsets if all their non-NA fields match. -#' -#' @param data_list List of lists, where each inner list represents an element -#' with named fields. -#' @return List of lists with duplicates and subsets removed. -#' @family helper functions -#' @noRd -#' @examples -#' data_list <- list( -#' list( -#' title = "Axel Neukermans", -#' email = "axel.neukermans@inbo.be", -#' path = "https://orcid.org/0000-0003-0272-9180", -#' role = "contributor", -#' organization = "Research Institute for Nature and Forest (INBO)" -#' ), -#' list( -#' title = "Peter Desmet", -#' email = "peter.desmet@inbo.be", -#' path = "https://orcid.org/0000-0002-8442-8025", -#' role = "principalInvestigator", -#' organization = "Research Institute for Nature and Forest (INBO)" -#' ), -#' list( -#' title = "Research Institute for Nature and Forest (INBO)", -#' path = "https://inbo.be", -#' role = "rightsHolder" -#' ), -#' list( -#' title = "Peter Desmet", -#' email = "peter.desmet@inbo.be", -#' organization = "Research Institute for Nature and Forest (INBO)" -#' ), -#' list( -#' title = "Research Institute for Nature and Forest (INBO)", -#' path = "https://inbo.be", -#' role = "rightsHolder" -#' ) -#' ) -#' remove_duplicates(data_list) -remove_duplicates <- function(data_list) { - # Find all unique field names - unique_names <- - purrr::map(data_list, names) %>% - unlist() %>% - unique() - - # Normalize all elements - normalized_data <- - purrr::map(data_list, ~ normalize_list(.x, unique_names)) - - # Reduce the list to unique elements using update_unique() - unique_data <- Reduce(update_unique, normalized_data, init = list()) - - # Convert back to original list format and remove NA's - unique_data_list <- - purrr::map(unique_data, function(x) { - x <- as.list(x) - x[!sapply(x, is.na)] - }) - - return(unique_data_list) -} - #' Create list of contributors in EML format #' #' @param contributor_list List of contributors diff --git a/tests/testthat/_snaps/merge_camtrapdp/datapackage_different_xy.json b/tests/testthat/_snaps/merge_camtrapdp/datapackage_different_xy.json index 5bed4b70..1c16e00d 100644 --- a/tests/testthat/_snaps/merge_camtrapdp/datapackage_different_xy.json +++ b/tests/testthat/_snaps/merge_camtrapdp/datapackage_different_xy.json @@ -78,25 +78,25 @@ { "title": "Julian Evans", "email": "jevansbio@gmail.com", - "role": "principalInvestigator", "organization": "University of Amsterdam", + "role": "principalInvestigator", "firstName": "Julian", "lastName": "Evans" }, { "title": "Rotem Zilber", "email": "r.kadanzilber@uva.nl", - "role": "principalInvestigator", "organization": "University of Amsterdam", + "role": "principalInvestigator", "firstName": "Rotem", "lastName": "Zilber" }, { "title": "W. Daniel Kissling", - "email": "wdkissling@gmail.com", "path": "https://www.danielkissling.de/", - "role": "principalInvestigator", + "email": "wdkissling@gmail.com", "organization": "University of Amsterdam", + "role": "principalInvestigator", "firstName": "W. Daniel ", "lastName": "Kissling" } @@ -124,8 +124,8 @@ "scope": "data" }, { - "scope": "media", - "path": "http://creativecommons.org/licenses/by/4.0/" + "path": "http://creativecommons.org/licenses/by/4.0/", + "scope": "media" }, { "name": "CC-BY-4.0", @@ -154,6 +154,8 @@ "captureMethod": "activityDetection", "observationLevel": "event", "individualAnimals": false + "name": "CC-BY-4.0", + "scope": "media" } ], "coordinatePrecision": 0.001, diff --git a/tests/testthat/_snaps/merge_camtrapdp/datapackage_identical_xy.json b/tests/testthat/_snaps/merge_camtrapdp/datapackage_identical_xy.json index f43eb8a1..7151a2ae 100644 --- a/tests/testthat/_snaps/merge_camtrapdp/datapackage_identical_xy.json +++ b/tests/testthat/_snaps/merge_camtrapdp/datapackage_identical_xy.json @@ -104,8 +104,6 @@ "scope": "data" }, { - "scope": "media", - "path": "http://creativecommons.org/licenses/by/4.0/" } ], "project": [ @@ -130,6 +128,8 @@ "captureMethod": ["activityDetection", "timeLapse"], "individualAnimals": false, "observationLevel": ["media", "event"] + "path": "http://creativecommons.org/licenses/by/4.0/", + "scope": "media" } ], "coordinatePrecision": 0.001, From ae7e4617a288b9c580e3849a3b7dfb93f434df28 Mon Sep 17 00:00:00 2001 From: Peter Desmet Date: Wed, 20 Nov 2024 16:46:25 +0100 Subject: [PATCH 136/142] Update test comments --- tests/testthat/test-merge_camtrapdp.R | 45 +++++++++++++-------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index 9aae4cb6..dd97ecff 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -46,6 +46,27 @@ test_that("merge_camtrapdp() returns error on missing/invalid/duplicate dataset expect_no_error(merge_camtrapdp(x, y)) }) +test_that("merge_camtrapdp() adds prefixes to additional resource names to keep + them unique", { + skip_if_offline() + + # Merge datasets with overlapping resources: individuals and individuals, iris + x <- example_dataset() + x$name <- "x" + y <- x + y <- frictionless::add_resource(y, "iris", iris) + y$name <- "y" + xy <- merge_camtrapdp(x, y) + + expect_identical( + frictionless::resources(xy), + c( + "deployments", "media", "observations", "x_individuals", "y_individuals", + "iris" + ) + ) +}) + test_that("merge_camtrapdp() adds prefixes to identifiers in the data to keep them unique", { skip_if_offline() @@ -88,39 +109,17 @@ test_that("merge_camtrapdp() adds prefixes to identifiers in the data to keep expect_in(merged_observation_ids, observations(xy)$observationID) }) -test_that("merge_camtrapdp() adds prefixes to additional resource names to keep - them unique", { - skip_if_offline() - - # Merge datasets with overlapping resources: individuals and individuals, iris - x <- example_dataset() - x$name <- "x" - y <- x - y <- frictionless::add_resource(y, "iris", iris) - y$name <- "y" - xy <- merge_camtrapdp(x, y) - - expect_identical( - frictionless::resources(xy), - c( - "deployments", "media", "observations", "x_individuals", "y_individuals", - "iris" - ) - ) -}) - test_that("merge_camtrapdp() returns the expected datapackage.json when merging identical datasets", { skip_if_offline() temp_dir <- tempdir() on.exit(unlink(temp_dir, recursive = TRUE)) - # Merge datasets that are identical except for name and id + # Merge datasets that are identical except for name x <- example_dataset() x$name <- "x" y <- x y$name <- "y" - y$id <- "y" xy <- merge_camtrapdp(x, y) xy$created <- NULL From 2af4b626d6ecb4b7bdb741e5441791d0758aa330 Mon Sep 17 00:00:00 2001 From: Peter Desmet Date: Wed, 20 Nov 2024 16:46:54 +0100 Subject: [PATCH 137/142] Import rlang for %||% --- NAMESPACE | 1 + R/camtrapdp-package.R | 1 + 2 files changed, 2 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 98c7e65d..a7d40d3d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -24,6 +24,7 @@ export(version) export(write_camtrapdp) export(write_dwc) export(write_eml) +import(rlang) importFrom(dplyr,"%>%") importFrom(dplyr,.data) importFrom(memoise,memoise) diff --git a/R/camtrapdp-package.R b/R/camtrapdp-package.R index e282e3ee..8555b226 100644 --- a/R/camtrapdp-package.R +++ b/R/camtrapdp-package.R @@ -3,5 +3,6 @@ ## usethis namespace: start #' @importFrom dplyr %>% .data +#' @import rlang ## usethis namespace: end NULL From e06c49078f4f337201461b0cd746b1eb2f3024dc Mon Sep 17 00:00:00 2001 From: Peter Desmet Date: Wed, 20 Nov 2024 16:48:31 +0100 Subject: [PATCH 138/142] Combine project info, rather than having 2 Otherwise the info is not valid --- R/merge_camtrapdp.R | 23 +++++++++++-- man/merge_camtrapdp.Rd | 10 +++++- .../datapackage_different_xy.json | 31 +++++------------ .../datapackage_identical_xy.json | 33 +++++-------------- 4 files changed, 48 insertions(+), 49 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index 487f7021..d50be19b 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -40,7 +40,15 @@ #' - **sources**: Combined, with duplicates removed. #' - **licenses**: Combined, with duplicates removed. #' - **bibliographicCitation**: Removed. -#' - **project**: List of the projects. +#' - **project$id**: Removed. +#' - **project$title**: Combined. +#' - **project$acronym**: Removed. +#' - **project$description**: Combined as two paragraphs. +#' - **project$path**: Removed. +#' - **project$samplingDesign**: Sampling design of `x`. +#' - **project$captureMethod**: Combined, with duplicates removed. +#' - **project$individuals**: `TRUE` if one of the datasets has `TRUE`. +#' - **project$observationLevel**: Combined, with duplicates removed. #' - **coordinatePrecision**: Set to the least precise `coordinatePrecision`. #' - **spatial**: Reset based on the new deployments. #' - **temporal**: Reset based on the new deployments. @@ -108,10 +116,21 @@ merge_camtrapdp <- function(x, y) { xy$keywords <- unique(c(x$keywords, y$keywords)) xy$image <- NULL xy$homepage <- NULL - xy$project <- list(x$project, y$project) xy$sources <- unique(c(x$sources, y$sources)) xy$licenses <- unique(c(x$licenses, y$licenses)) xy$bibliographicCitation <- NULL + xy$project$id <- NULL + xy$project$title <- paste(x$project$title, y$project$title, sep = " / ") + xy$project$acronym <- NULL + xy$project$description <- + paste(x$project$description, y$project$description, sep = "/n") + xy$project$path <- NULL + xy$project$samplingDesign <- x$project$samplingDesign # Second one ignored + xy$project$captureMethod <- + unique(c(x$project$captureMethod, y$project$captureMethod)) + xy$project$individuals <- any(x$project$individuals, y$project$individiuals) + xy$project$observationLevel <- + unique(c(x$project$observationLevel, y$project$observationLevel)) xy$coordinatePrecision <- max(x$coordinatePrecision, y$coordinatePrecision, na.rm = TRUE) xy$relatedIdentifiers <- unique(c(x$relatedIdentifiers, y$relatedIdentifiers)) diff --git a/man/merge_camtrapdp.Rd b/man/merge_camtrapdp.Rd index fabb3ad6..07e7b908 100644 --- a/man/merge_camtrapdp.Rd +++ b/man/merge_camtrapdp.Rd @@ -55,7 +55,15 @@ Metadata properties are merged as follows: \item \strong{sources}: Combined, with duplicates removed. \item \strong{licenses}: Combined, with duplicates removed. \item \strong{bibliographicCitation}: Removed. -\item \strong{project}: List of the projects. +\item \strong{project$id}: Removed. +\item \strong{project$title}: Combined. +\item \strong{project$acronym}: Removed. +\item \strong{project$description}: Combined as two paragraphs. +\item \strong{project$path}: Removed. +\item \strong{project$samplingDesign}: Sampling design of \code{x}. +\item \strong{project$captureMethod}: Combined, with duplicates removed. +\item \strong{project$individuals}: \code{TRUE} if one of the datasets has \code{TRUE}. +\item \strong{project$observationLevel}: Combined, with duplicates removed. \item \strong{coordinatePrecision}: Set to the least precise \code{coordinatePrecision}. \item \strong{spatial}: Reset based on the new deployments. \item \strong{temporal}: Reset based on the new deployments. diff --git a/tests/testthat/_snaps/merge_camtrapdp/datapackage_different_xy.json b/tests/testthat/_snaps/merge_camtrapdp/datapackage_different_xy.json index 1c16e00d..15bc8bc7 100644 --- a/tests/testthat/_snaps/merge_camtrapdp/datapackage_different_xy.json +++ b/tests/testthat/_snaps/merge_camtrapdp/datapackage_different_xy.json @@ -130,34 +130,21 @@ { "name": "CC-BY-4.0", "scope": "data" - } - ], - "project": [ - { - "id": "86cabc14-d475-4439-98a7-e7b590bed60e", - "title": "Management of Invasive Coypu and muskrAt in Europe", - "acronym": "MICA", - "description": "Invasive alien species such as the coypu and muskrat pose a major threat to biodiversity and cost millions of euros annually. By feeding on rushes and reeds, these animals cause serious damage to the environment in which they live and endangered species suffer from habitat loss. The disappearance of reeds and digging in dikes represents a safety risk for humans in the lowland areas. With the LIFE project MICA (), the partners from the participating countries want to develop a transnational plan for the management of coypu and muskrat populations in Europe and aim to reduce their population. The objective of an effective population control of coypu and muskrat is to protect lowlands from flooding, to prevent crop damage and loss of biodiversity. The objective of the project is to serve as a pilot and demonstration project in which ‘best practices’ are tested and new techniques are developed for a more efficient control of muskrat and coypu populations. By involving organisations from Belgium, Germany and the Netherlands, the project also promotes international cooperation and knowledge exchange in the field of muskrat and coypu management.", - "samplingDesign": "targeted", - "path": "https://lifemica.eu", - "captureMethod": ["activityDetection", "timeLapse"], - "individualAnimals": false, - "observationLevel": ["media", "event"] }, { - "id": "93b1ff03-de48-470e-a997-88d7b33501ce", - "title": "Data from three camera trapping pilots in the Amsterdam Water Supply Dunes of the Netherlands", - "acronym": "ARISE-MDS", - "description": "Three pilot studies were conducted to test the autonomous deployment of wireless 4G wildlife cameras with solar panels and automated data transmission in the coastal dunes of the Amsterdam Water Supply Dunes, The Netherlands. Monitoring and management of grazing mammals such as the European rabbit (Oryctolagus cuniculus) and the European fallow deer (Dama dama) is of key interest in this nature reserve, as they slow down the rate of natural succession and alter plant species composition and vegetation structure through grazing and digging. These grazers as well as predators such as the red fox (Vulpes vulpes) have typically been monitored using traditional survey methods such as transect and areal counts, but only once or twice a year. The study was aimed to test the feasibility and methodology of running long-term autonomous monitoring networks with wildlife cameras. This involved the testing of power usage, data transmission, and data accumulation, and the robustness of cameras to herbivore damage. Furthermore, the pilots specifically tested how the detection of focal species (rabbits, deer, foxes) differs with deployment heights, camera lens types and the placement in different habitats. The pilot also provides labelled images for the development of deep learning algorithms to automatically identify species.\r\n\r\nWork was carried out as part of the development of the monitoring demonstration sites of a large-scale research infrastructure project ARISE.", - "path": "https://www.arise-biodiversity.nl/teammonitoringdemonstration", - "samplingDesign": "opportunistic", - "captureMethod": "activityDetection", - "observationLevel": "event", - "individualAnimals": false "name": "CC-BY-4.0", "scope": "media" } ], + "project": { + "title": "Management of Invasive Coypu and muskrAt in Europe / Data from three camera trapping pilots in the Amsterdam Water Supply Dunes of the Netherlands", + "description": "Invasive alien species such as the coypu and muskrat pose a major threat to biodiversity and cost millions of euros annually. By feeding on rushes and reeds, these animals cause serious damage to the environment in which they live and endangered species suffer from habitat loss. The disappearance of reeds and digging in dikes represents a safety risk for humans in the lowland areas. With the LIFE project MICA (), the partners from the participating countries want to develop a transnational plan for the management of coypu and muskrat populations in Europe and aim to reduce their population. The objective of an effective population control of coypu and muskrat is to protect lowlands from flooding, to prevent crop damage and loss of biodiversity. The objective of the project is to serve as a pilot and demonstration project in which ‘best practices’ are tested and new techniques are developed for a more efficient control of muskrat and coypu populations. By involving organisations from Belgium, Germany and the Netherlands, the project also promotes international cooperation and knowledge exchange in the field of muskrat and coypu management./nThree pilot studies were conducted to test the autonomous deployment of wireless 4G wildlife cameras with solar panels and automated data transmission in the coastal dunes of the Amsterdam Water Supply Dunes, The Netherlands. Monitoring and management of grazing mammals such as the European rabbit (Oryctolagus cuniculus) and the European fallow deer (Dama dama) is of key interest in this nature reserve, as they slow down the rate of natural succession and alter plant species composition and vegetation structure through grazing and digging. These grazers as well as predators such as the red fox (Vulpes vulpes) have typically been monitored using traditional survey methods such as transect and areal counts, but only once or twice a year. The study was aimed to test the feasibility and methodology of running long-term autonomous monitoring networks with wildlife cameras. This involved the testing of power usage, data transmission, and data accumulation, and the robustness of cameras to herbivore damage. Furthermore, the pilots specifically tested how the detection of focal species (rabbits, deer, foxes) differs with deployment heights, camera lens types and the placement in different habitats. The pilot also provides labelled images for the development of deep learning algorithms to automatically identify species.\r\n\r\nWork was carried out as part of the development of the monitoring demonstration sites of a large-scale research infrastructure project ARISE.", + "samplingDesign": "targeted", + "captureMethod": ["activityDetection", "timeLapse"], + "individualAnimals": false, + "observationLevel": ["media", "event"], + "individuals": false + }, "coordinatePrecision": 0.001, "spatial": { "type": "Polygon", diff --git a/tests/testthat/_snaps/merge_camtrapdp/datapackage_identical_xy.json b/tests/testthat/_snaps/merge_camtrapdp/datapackage_identical_xy.json index 7151a2ae..d93e72e4 100644 --- a/tests/testthat/_snaps/merge_camtrapdp/datapackage_identical_xy.json +++ b/tests/testthat/_snaps/merge_camtrapdp/datapackage_identical_xy.json @@ -104,34 +104,19 @@ "scope": "data" }, { - } - ], - "project": [ - { - "id": "86cabc14-d475-4439-98a7-e7b590bed60e", - "title": "Management of Invasive Coypu and muskrAt in Europe", - "acronym": "MICA", - "description": "Invasive alien species such as the coypu and muskrat pose a major threat to biodiversity and cost millions of euros annually. By feeding on rushes and reeds, these animals cause serious damage to the environment in which they live and endangered species suffer from habitat loss. The disappearance of reeds and digging in dikes represents a safety risk for humans in the lowland areas. With the LIFE project MICA (), the partners from the participating countries want to develop a transnational plan for the management of coypu and muskrat populations in Europe and aim to reduce their population. The objective of an effective population control of coypu and muskrat is to protect lowlands from flooding, to prevent crop damage and loss of biodiversity. The objective of the project is to serve as a pilot and demonstration project in which ‘best practices’ are tested and new techniques are developed for a more efficient control of muskrat and coypu populations. By involving organisations from Belgium, Germany and the Netherlands, the project also promotes international cooperation and knowledge exchange in the field of muskrat and coypu management.", - "samplingDesign": "targeted", - "path": "https://lifemica.eu", - "captureMethod": ["activityDetection", "timeLapse"], - "individualAnimals": false, - "observationLevel": ["media", "event"] - }, - { - "id": "86cabc14-d475-4439-98a7-e7b590bed60e", - "title": "Management of Invasive Coypu and muskrAt in Europe", - "acronym": "MICA", - "description": "Invasive alien species such as the coypu and muskrat pose a major threat to biodiversity and cost millions of euros annually. By feeding on rushes and reeds, these animals cause serious damage to the environment in which they live and endangered species suffer from habitat loss. The disappearance of reeds and digging in dikes represents a safety risk for humans in the lowland areas. With the LIFE project MICA (), the partners from the participating countries want to develop a transnational plan for the management of coypu and muskrat populations in Europe and aim to reduce their population. The objective of an effective population control of coypu and muskrat is to protect lowlands from flooding, to prevent crop damage and loss of biodiversity. The objective of the project is to serve as a pilot and demonstration project in which ‘best practices’ are tested and new techniques are developed for a more efficient control of muskrat and coypu populations. By involving organisations from Belgium, Germany and the Netherlands, the project also promotes international cooperation and knowledge exchange in the field of muskrat and coypu management.", - "samplingDesign": "targeted", - "path": "https://lifemica.eu", - "captureMethod": ["activityDetection", "timeLapse"], - "individualAnimals": false, - "observationLevel": ["media", "event"] "path": "http://creativecommons.org/licenses/by/4.0/", "scope": "media" } ], + "project": { + "title": "Management of Invasive Coypu and muskrAt in Europe / Management of Invasive Coypu and muskrAt in Europe", + "description": "Invasive alien species such as the coypu and muskrat pose a major threat to biodiversity and cost millions of euros annually. By feeding on rushes and reeds, these animals cause serious damage to the environment in which they live and endangered species suffer from habitat loss. The disappearance of reeds and digging in dikes represents a safety risk for humans in the lowland areas. With the LIFE project MICA (), the partners from the participating countries want to develop a transnational plan for the management of coypu and muskrat populations in Europe and aim to reduce their population. The objective of an effective population control of coypu and muskrat is to protect lowlands from flooding, to prevent crop damage and loss of biodiversity. The objective of the project is to serve as a pilot and demonstration project in which ‘best practices’ are tested and new techniques are developed for a more efficient control of muskrat and coypu populations. By involving organisations from Belgium, Germany and the Netherlands, the project also promotes international cooperation and knowledge exchange in the field of muskrat and coypu management./nInvasive alien species such as the coypu and muskrat pose a major threat to biodiversity and cost millions of euros annually. By feeding on rushes and reeds, these animals cause serious damage to the environment in which they live and endangered species suffer from habitat loss. The disappearance of reeds and digging in dikes represents a safety risk for humans in the lowland areas. With the LIFE project MICA (), the partners from the participating countries want to develop a transnational plan for the management of coypu and muskrat populations in Europe and aim to reduce their population. The objective of an effective population control of coypu and muskrat is to protect lowlands from flooding, to prevent crop damage and loss of biodiversity. The objective of the project is to serve as a pilot and demonstration project in which ‘best practices’ are tested and new techniques are developed for a more efficient control of muskrat and coypu populations. By involving organisations from Belgium, Germany and the Netherlands, the project also promotes international cooperation and knowledge exchange in the field of muskrat and coypu management.", + "samplingDesign": "targeted", + "captureMethod": ["activityDetection", "timeLapse"], + "individualAnimals": false, + "observationLevel": ["media", "event"], + "individuals": false + }, "coordinatePrecision": 0.001, "spatial": { "type": "Polygon", From 712ee32ea05fefe6a82e3f8041a121b5663ae11b Mon Sep 17 00:00:00 2001 From: Peter Desmet Date: Wed, 20 Nov 2024 16:49:09 +0100 Subject: [PATCH 139/142] Add DOIs as properly-defined relatedIdentifiers --- R/merge_camtrapdp.R | 39 +++++++------------ .../datapackage_different_xy.json | 6 --- .../datapackage_identical_xy.json | 12 ------ tests/testthat/test-merge_camtrapdp.R | 33 ++++++++++++++++ 4 files changed, 48 insertions(+), 42 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index d50be19b..a4762e5e 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -137,31 +137,22 @@ merge_camtrapdp <- function(x, y) { xy$references <- unique(c(x$references, y$references)) xy$directory <- "." - if (!is.null(x$id)) { - relatedIdentifiers_x <- list( - relationType = "IsDerivedFrom", - relatedIdentifier = as.character(x$id), - resourceTypeGeneral = "Data package", - relatedIdentifierType = "id" - ) - } else { - relatedIdentifiers_x <- list() - } - if (!is.null(y$id)) { - relatedIdentifiers_y <- list( - relationType = "IsDerivedFrom", - relatedIdentifier = as.character(y$id), - resourceTypeGeneral = "Data package", - relatedIdentifierType = "id" - ) - } else { - relatedIdentifiers_y <- list() - } - new_relatedIdentifiers <- list(relatedIdentifiers_x, relatedIdentifiers_y) - xy_merged$relatedIdentifiers <- remove_duplicates( - c(x$relatedIdentifiers, y$relatedIdentifiers, new_relatedIdentifiers) - ) + # Add package$id to related identifiers if it is a DOI + add_related_id <- function(id, related_ids) { + if (grepl("doi", id %||% "")) { + new_related_id <- list( + relationType = "isDerivedFrom", + relatedIdentifier = id, + resourceTypeGeneral = "Dataset", + relatedIdentifierType = "DOI" + ) + related_ids <- c(related_ids, list(new_related_id)) + } + return(related_ids) + } + xy$relatedIdentifiers <- add_related_id(x$id, xy$relatedIdentifiers) + xy$relatedIdentifiers <- add_related_id(y$id, xy$relatedIdentifiers) # Update scopes xy <- diff --git a/tests/testthat/_snaps/merge_camtrapdp/datapackage_different_xy.json b/tests/testthat/_snaps/merge_camtrapdp/datapackage_different_xy.json index 15bc8bc7..f1b9cbbf 100644 --- a/tests/testthat/_snaps/merge_camtrapdp/datapackage_different_xy.json +++ b/tests/testthat/_snaps/merge_camtrapdp/datapackage_different_xy.json @@ -325,12 +325,6 @@ "relatedIdentifier": "https://doi.org/10.1016/j.dib.2024.110544", "resourceTypeGeneral": "DataPaper", "relatedIdentifierType": "DOI" - }, - { - "relationType": "IsDerivedFrom", - "relatedIdentifier": "7cca70f5-ef8c-4f86-85fb-8f070937d7ab", - "resourceTypeGeneral": "Data package", - "relatedIdentifierType": "id" } ], "references": [ diff --git a/tests/testthat/_snaps/merge_camtrapdp/datapackage_identical_xy.json b/tests/testthat/_snaps/merge_camtrapdp/datapackage_identical_xy.json index d93e72e4..ae3c458f 100644 --- a/tests/testthat/_snaps/merge_camtrapdp/datapackage_identical_xy.json +++ b/tests/testthat/_snaps/merge_camtrapdp/datapackage_identical_xy.json @@ -238,18 +238,6 @@ "relatedIdentifier": "https://inbo.github.io/camtraptor/", "resourceTypeGeneral": "Software", "relatedIdentifierType": "URL" - }, - { - "relationType": "IsDerivedFrom", - "relatedIdentifier": "7cca70f5-ef8c-4f86-85fb-8f070937d7ab", - "resourceTypeGeneral": "Data package", - "relatedIdentifierType": "id" - }, - { - "relationType": "IsDerivedFrom", - "relatedIdentifier": "y", - "resourceTypeGeneral": "Data package", - "relatedIdentifierType": "id" } ], "references": [] diff --git a/tests/testthat/test-merge_camtrapdp.R b/tests/testthat/test-merge_camtrapdp.R index dd97ecff..b0075148 100644 --- a/tests/testthat/test-merge_camtrapdp.R +++ b/tests/testthat/test-merge_camtrapdp.R @@ -169,3 +169,36 @@ test_that("merge_camtrapdp() returns the expected datapackage.json when merging read_camtrapdp(file.path(temp_dir_merged, "datapackage_different_xy.json")) ) }) + +test_that("merge_camtrapdp() adds the DOI of the original datasets as related + identifiers", { + skip_if_offline() + + # Merge datasets that are identical except for name and DOI + x <- example_dataset() + x$name <- "x" + x$id <- "https://doi.org/x" + y <- x + y$name <- "y" + y$id <- "http://doi.org/y" + xy <- merge_camtrapdp(x, y) + + expect_identical( + xy$relatedIdentifiers[[3]], # 1 and 2 were present in x and y (identical) + list( + relationType = "isDerivedFrom", + relatedIdentifier = "https://doi.org/x", + resourceTypeGeneral = "Dataset", + relatedIdentifierType = "DOI" + ) + ) + expect_identical( + xy$relatedIdentifiers[[4]], + list( + relationType = "isDerivedFrom", + relatedIdentifier = "http://doi.org/y", + resourceTypeGeneral = "Dataset", + relatedIdentifierType = "DOI" + ) + ) +}) From 4c998497da7a2d3b4bba77fe5e134727149b90ad Mon Sep 17 00:00:00 2001 From: Peter Desmet Date: Thu, 21 Nov 2024 09:44:40 +0100 Subject: [PATCH 140/142] Fix example --- R/merge_camtrapdp.R | 4 ++-- man/merge_camtrapdp.Rd | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/merge_camtrapdp.R b/R/merge_camtrapdp.R index a4762e5e..026d8959 100644 --- a/R/merge_camtrapdp.R +++ b/R/merge_camtrapdp.R @@ -61,8 +61,8 @@ #' filter_deployments(deploymentID %in% c("00a2c20d", "29b7d356")) #' y <- example_dataset() %>% #' filter_deployments(deploymentID %in% c("577b543a", "62c200a9")) -#' x$id <- "x" -#' y$id <- "y" +#' x$name <- "x" +#' y$name <- "y" #' merge_camtrapdp(x, y) merge_camtrapdp <- function(x, y) { check_camtrapdp(x) diff --git a/man/merge_camtrapdp.Rd b/man/merge_camtrapdp.Rd index 07e7b908..7d157ab2 100644 --- a/man/merge_camtrapdp.Rd +++ b/man/merge_camtrapdp.Rd @@ -79,8 +79,8 @@ x <- example_dataset() \%>\% filter_deployments(deploymentID \%in\% c("00a2c20d", "29b7d356")) y <- example_dataset() \%>\% filter_deployments(deploymentID \%in\% c("577b543a", "62c200a9")) -x$id <- "x" -y$id <- "y" +x$name <- "x" +y$name <- "y" merge_camtrapdp(x, y) } \seealso{ From 72f73107eafc1f69461c9a863cf4f38353938cc6 Mon Sep 17 00:00:00 2001 From: Peter Desmet Date: Thu, 21 Nov 2024 10:15:23 +0100 Subject: [PATCH 141/142] Use chuck for required properties Co-Authored-By: Pieter Huybrechts <48065851+PietrH@users.noreply.github.com> --- R/utils-merge.R | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/R/utils-merge.R b/R/utils-merge.R index 7b4745a2..ef7c678a 100644 --- a/R/utils-merge.R +++ b/R/utils-merge.R @@ -69,8 +69,8 @@ merge_deployments <- function(x, y, prefixes) { dplyr::bind_rows(x_deployments, y_deployments) %>% dplyr::mutate( deploymentID = merge_vectors( - purrr::pluck(x_deployments, "deploymentID"), - purrr::pluck(y_deployments, "deploymentID"), + purrr::chuck(x_deployments, "deploymentID"), + purrr::chuck(y_deployments, "deploymentID"), prefixes ) ) @@ -92,13 +92,13 @@ merge_media <- function(x, y, prefixes) { dplyr::bind_rows(x_media, y_media) %>% dplyr::mutate( mediaID = merge_vectors( - purrr::pluck(x_media, "mediaID"), - purrr::pluck(y_media, "mediaID"), + purrr::chuck(x_media, "mediaID"), + purrr::chuck(y_media, "mediaID"), prefixes ), deploymentID = merge_vectors( - purrr::pluck(x_media, "deploymentID"), - purrr::pluck(y_media, "deploymentID"), + purrr::chuck(x_media, "deploymentID"), + purrr::chuck(y_media, "deploymentID"), prefixes ), eventID = merge_vectors( @@ -125,13 +125,13 @@ merge_observations <- function(x, y, prefixes) { dplyr::bind_rows(x_observations, y_observations) %>% dplyr::mutate( observationID = merge_vectors( - purrr::pluck(x_observations, "observationID"), - purrr::pluck(y_observations, "observationID"), + purrr::chuck(x_observations, "observationID"), + purrr::chuck(y_observations, "observationID"), prefixes ), deploymentID = merge_vectors( - purrr::pluck(x_observations, "deploymentID"), - purrr::pluck(y_observations, "deploymentID"), + purrr::chuck(x_observations, "deploymentID"), + purrr::chuck(y_observations, "deploymentID"), prefixes ), mediaID = merge_vectors( From 722b71faf4391139b721c832d72ac036a5f3faf5 Mon Sep 17 00:00:00 2001 From: Peter Desmet Date: Thu, 21 Nov 2024 15:22:33 +0100 Subject: [PATCH 142/142] Update doc --- R/utils.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/R/utils.R b/R/utils.R index 6e985a93..f4625a0a 100644 --- a/R/utils.R +++ b/R/utils.R @@ -72,16 +72,16 @@ create_eml_contributors <- function(contributor_list) { #' Replace NULL values recursively #' -#' Replaces NULL values with NA by recursively iterating through each element of -#' the input list. +#' Replaces `NULL` values with `NA` by recursively iterating through each +#' element of the input list. #' -#' @param x A nested list. -#' @return A nested list identical to the input x, but with all NULL values -#' replaced by NA. +#' @param list A nested list. +#' @return `x`, but with all `NULL` values replaced. +#' `NA`. #' @family helper functions #' @noRd -replace_null_recursive <- function(x) { - purrr::map(x, function(element) { +replace_null_recursive <- function(list) { + purrr::map(list, function(element) { if (is.list(element) && !is.null(element)) { replace_null_recursive(element) } else {