From 86a4a16161e726a5fff6ce7cfb789d49d9a3ab89 Mon Sep 17 00:00:00 2001 From: cpauvert Date: Fri, 3 May 2024 16:52:26 +0200 Subject: [PATCH 01/27] draft first {target} factories for {maldipickr} --- dev/maldipickr-workflow-with-targets.Rmd | 143 +++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 dev/maldipickr-workflow-with-targets.Rmd diff --git a/dev/maldipickr-workflow-with-targets.Rmd b/dev/maldipickr-workflow-with-targets.Rmd new file mode 100644 index 0000000..b105581 --- /dev/null +++ b/dev/maldipickr-workflow-with-targets.Rmd @@ -0,0 +1,143 @@ +--- +title: "maldipickr Workflow with {targets}" +output: html_document +editor_options: + chunk_output_type: console +--- + +```{r development, include=FALSE} +library(testthat) +``` + +```{r development-load} +# Load already included functions if relevant +pkgload::load_all(export_all = FALSE) +``` + + +Simultaneous bacterial isolation efforts require rapid dereplication of bacteria, if not their identification, to reduce the redundancy of isolates. + +To this end, the workhorse functions of [`{maldipickr}`](https://github.com/ClavelLab/maldipickr) +help import and then analyse MALDI-TOF spectra to dereplicate and cherry-pick mass spectrometry spectra. +The vignettes ["Import data from Bruker MALDI Biotyper"](https://clavellab.github.io/maldipickr/articles/import-data-from-bruker-maldi-biotyper.html) and ["Dereplicate Bruker MALDI Biotyper spectra"](https://clavellab.github.io/maldipickr/articles/dereplicate-bruker-maldi-biotyper-spectra.html) explain these functions in details if need be. + + +This vignette showcase functions to facilitate reproducible and trustworthy workflow development `{maldipickr}` (using [`{targets}`](https://docs.ropensci.org/targets/)). We invite readers unfamiliar with `{targets}` to read the short and well-written walkthrough ["How to use {targets}"](https://books.ropensci.org/targets/walkthrough.html) to understand how `{targets}` help analysts "coordinate the pieces of computationally demanding analysis projects" and "skips costly runtime for tasks that are already up to date" (source: ). + + +These helpers in `{maldipickr}` are called `{targets}` factories, which means they return target objects or lists of target objects. + + + +# tar_import_and_process_spectra + +```{r function-tar_import_and_process_spectra} +#' Title +#' +#' Description +#' +#' @param name +#' @param raw_spectra_directories +#' @param tolerance +#' +#' @return +#' +#' @export +tar_import_and_process_spectra <- function( + name, + raw_spectra_directories, + tolerance, + format = targets::tar_option_get("format")) { + rlang::check_installed(c("targets", "tarchetypes"), + reason = "to facilitate {maldipickr} workflow development" + ) + targets::tar_assert_chr(name) + targets::tar_assert_path(raw_spectra_directories) + targets::tar_assert_dbl(tolerance) + + name_plates <- paste0(name, "_plates") + name_spectra_raw <- paste0(name, "_spectra_raw") + name_checks <- paste0(name, "_checks") + name_spectra_stats <- paste0(name, "_spectra_stats") + name_valid_spectra <- paste0(name, "_valid_spectra") + name_processed <- paste0(name, "_processed") + + sym_plates <- as.symbol(name_plates) + sym_spectra_raw <- as.symbol(name_spectra_raw) + sym_checks <- as.symbol(name_checks) + sym_spectra_stats <- as.symbol(name_spectra_stats) + sym_valid_spectra <- as.symbol(name_valid_spectra) + + list( + tarchetypes::tar_files_input_raw(name_plates, + raw_spectra_directories, + format = "file" + ), + targets::tar_target_raw(name_spectra_raw, + command = substitute(suppressWarnings(import_biotyper_spectra(sym_plates)), + env = list(sym_plates = sym_plates) + ), + pattern = substitute(map(sym_plates), + env = list(sym_plates = sym_plates) + ), + format = format + ), + targets::tar_target_raw(name_checks, + command = substitute(check_spectra(sym_spectra_raw, tolerance), + env = list(tolerance = tolerance, sym_spectra_raw = sym_spectra_raw) + ), + pattern = substitute(map(sym_spectra_raw), env = list(sym_spectra_raw = sym_spectra_raw)), + format = format + ), + targets::tar_target_raw(name_spectra_stats, + command = substitute( + gather_spectra_stats(sym_checks) %>% + dplyr::mutate(maldi_plate = sym_plates), + env = list(sym_checks = sym_checks, sym_plates = sym_plates) + ), + pattern = substitute(map(sym_checks, sym_plates), + env = list(sym_checks = sym_checks, sym_plates = sym_plates) + ), + iteration = "vector", format = format + ), + # Filter-out non empty spectra and unusual spectra + targets::tar_target_raw(name_valid_spectra, + command = substitute(remove_spectra(sym_spectra_raw, sym_checks), + env = list(sym_spectra_raw = sym_spectra_raw, sym_checks = sym_checks) + ), + pattern = substitute(map(sym_spectra_raw, sym_checks), + env = list(sym_spectra_raw = sym_spectra_raw, sym_checks = sym_checks) + ), + format = format + ), + targets::tar_target_raw(name_processed, + command = substitute(process_spectra(sym_valid_spectra), + env = list(sym_valid_spectra = sym_valid_spectra) + ), + pattern = substitute(map(sym_valid_spectra), + env = list(sym_valid_spectra = sym_valid_spectra) + ), + format = format + ) + ) +} +``` + +```{r example-tar_import_and_process_spectra} +tar_import_and_process_spectra() +``` + +```{r tests-tar_import_and_process_spectra} +test_that("tar_import_and_process_spectra works", { + expect_true(inherits(tar_import_and_process_spectra, "function")) +}) +``` + + +```{r development-inflate, eval=FALSE} +# Run but keep eval=FALSE to avoid infinite loop +# Execute in the console directly +fusen::inflate(flat_file = "dev/maldipickr-workflow-with-targets.Rmd", + vignette_name = "maldipickr Workflow with {targets}") +``` + From 55575eef2dfa969429ad61a114355f50e3a08f99 Mon Sep 17 00:00:00 2001 From: cpauvert Date: Fri, 3 May 2024 16:53:42 +0200 Subject: [PATCH 02/27] add {targets} and {tarchetypes} as optional dependencies if one uses the {target} factories --- DESCRIPTION | 2 + dev/0-dev_history.Rmd | 5 +- renv.lock | 165 +++++++++++++++++++++++++++++++++++++++--- 3 files changed, 161 insertions(+), 11 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b30a490..65ddfbd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -43,6 +43,8 @@ Suggests: knitr, rmarkdown, spelling, + tarchetypes (>= 0.9.0), + targets (>= 1.7.0), testthat VignetteBuilder: knitr diff --git a/dev/0-dev_history.Rmd b/dev/0-dev_history.Rmd index cac68ce..00b4a6f 100644 --- a/dev/0-dev_history.Rmd +++ b/dev/0-dev_history.Rmd @@ -166,7 +166,7 @@ usethis::use_import_from( usethis::use_import_from("tidyselect", "starts_with") # To solve "no visible binding variable error from devtools:check # https://dplyr.tidyverse.org/articles/programming.html#eliminating-r-cmd-check-notes -usethis::use_import_from("rlang", ".data") +usethis::use_import_from("rlang", ".data","check_installed") usethis::use_import_from( "MALDIquant", c( @@ -180,6 +180,9 @@ usethis::use_import_from( usethis::use_import_from("readBrukerFlexData", "readBrukerFlexDir") usethis::use_import_from("stats", "median") usethis::use_import_from("utils", c("read.delim", "read.csv")) + +usethis::use_package("targets", type = "Suggests", min_version = "1.7.0") +usethis::use_package("tarchetypes", type = "Suggests", min_version = "0.9.0") ``` # Add a logo diff --git a/renv.lock b/renv.lock index f33a689..040bd00 100644 --- a/renv.lock +++ b/renv.lock @@ -32,6 +32,22 @@ ], "Hash": "49d887aead86e8a4096282ea0e8fa591" }, + "Matrix": { + "Package": "Matrix", + "Version": "1.5-4.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "graphics", + "grid", + "lattice", + "methods", + "stats", + "utils" + ], + "Hash": "38082d362d317745fb932e13956dccbb" + }, "R6": { "Package": "R6", "Version": "2.5.1", @@ -105,6 +121,16 @@ ], "Hash": "543776ae6848fde2f48ff3816d0628bc" }, + "base64url": { + "Package": "base64url", + "Version": "1.4", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "backports" + ], + "Hash": "0c54cf3a08cc0e550fbd64ad33166143" + }, "bit": { "Package": "bit", "Version": "4.0.5", @@ -220,6 +246,16 @@ ], "Hash": "3f038e5ac7f41d4ac41ce658c85e3042" }, + "codetools": { + "Package": "codetools", + "Version": "0.2-19", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "c089a619a7fae175d149d89164f8c7d8" + }, "commonmark": { "Package": "commonmark", "Version": "1.9.0", @@ -239,13 +275,13 @@ }, "cpp11": { "Package": "cpp11", - "Version": "0.4.6", + "Version": "0.4.7", "Source": "Repository", "Repository": "RSPM", "Requirements": [ "R" ], - "Hash": "707fae4bbf73697ec8d85f9d7076c061" + "Hash": "5a295d7d963cc5035284dcdbaf334f4e" }, "crayon": { "Package": "crayon", @@ -283,6 +319,17 @@ ], "Hash": "9123f3ef96a2c1a93927d828b2fe7d4c" }, + "data.table": { + "Package": "data.table", + "Version": "1.14.8", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "methods" + ], + "Hash": "b4c06e554f33344e044ccd7fdca750a9" + }, "desc": { "Package": "desc", "Version": "1.4.2", @@ -683,6 +730,29 @@ ], "Hash": "e957e989ea17f937964f0d46b0f0bca0" }, + "igraph": { + "Package": "igraph", + "Version": "2.0.3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "Matrix", + "R", + "cli", + "cpp11", + "grDevices", + "graphics", + "lifecycle", + "magrittr", + "methods", + "pkgconfig", + "rlang", + "stats", + "utils", + "vctrs" + ], + "Hash": "c3b7d801d722e26e4cd888e042bf9af5" + }, "ini": { "Package": "ini", "Version": "0.3.1", @@ -712,7 +782,7 @@ }, "knitr": { "Package": "knitr", - "Version": "1.45", + "Version": "1.46", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -724,7 +794,7 @@ "xfun", "yaml" ], - "Hash": "1ec462871063897135c1bcbe0fc8f07d" + "Hash": "6e008ab1d696a5283c79765fa7b56b47" }, "later": { "Package": "later", @@ -737,6 +807,21 @@ ], "Hash": "40401c9cf2bc2259dfe83311c9384710" }, + "lattice": { + "Package": "lattice", + "Version": "0.21-8", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "grDevices", + "graphics", + "grid", + "stats", + "utils" + ], + "Hash": "0b8a6d63c8770f02a8b5635f3c431e6b" + }, "lifecycle": { "Package": "lifecycle", "Version": "1.0.4", @@ -1138,14 +1223,14 @@ }, "rlang": { "Package": "rlang", - "Version": "1.1.2", + "Version": "1.1.3", "Source": "Repository", "Repository": "RSPM", "Requirements": [ "R", "utils" ], - "Hash": "50a6dbdc522936ca35afc5e2082ea91b" + "Hash": "42548638fae05fd9a9b5f3f437fbbbe2" }, "rmarkdown": { "Package": "rmarkdown", @@ -1240,6 +1325,16 @@ ], "Hash": "6bd4d33b50ff927191ec9acbf52fd056" }, + "secretbase": { + "Package": "secretbase", + "Version": "0.5.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R" + ], + "Hash": "c476c18b22b21849c9f4d29b3cd720a5" + }, "sessioninfo": { "Package": "sessioninfo", "Version": "1.2.2", @@ -1358,6 +1453,55 @@ ], "Hash": "15b594369e70b975ba9f064295983499" }, + "tarchetypes": { + "Package": "tarchetypes", + "Version": "0.9.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "dplyr", + "fs", + "parallel", + "rlang", + "secretbase", + "targets", + "tibble", + "tidyselect", + "utils", + "vctrs", + "withr" + ], + "Hash": "094a74cdaa048b727d52aebf4c80ed7a" + }, + "targets": { + "Package": "targets", + "Version": "1.7.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "R6", + "base64url", + "callr", + "cli", + "codetools", + "data.table", + "igraph", + "knitr", + "ps", + "rlang", + "secretbase", + "stats", + "tibble", + "tidyselect", + "tools", + "utils", + "vctrs", + "yaml" + ], + "Hash": "27ee47827a89430be0d38b78c0cb8fb7" + }, "testthat": { "Package": "testthat", "Version": "3.2.1", @@ -1443,7 +1587,7 @@ }, "tidyselect": { "Package": "tidyselect", - "Version": "1.2.0", + "Version": "1.2.1", "Source": "Repository", "Repository": "RSPM", "Requirements": [ @@ -1455,7 +1599,7 @@ "vctrs", "withr" ], - "Hash": "79540e5fcd9e0435af547d885f184fd5" + "Hash": "829f27b9c4919c16b593794a6344d6c0" }, "tinytex": { "Package": "tinytex", @@ -1612,14 +1756,15 @@ }, "xfun": { "Package": "xfun", - "Version": "0.40", + "Version": "0.43", "Source": "Repository", "Repository": "RSPM", "Requirements": [ + "grDevices", "stats", "tools" ], - "Hash": "be07d23211245fc7d4209f54c4e4ffc8" + "Hash": "ab6371d8653ce5f2f9290f4ec7b42a8e" }, "xml2": { "Package": "xml2", From 2d1454375ccde5f16b9391ab4627d70d8a63de22 Mon Sep 17 00:00:00 2001 From: cpauvert Date: Fri, 3 May 2024 17:37:47 +0200 Subject: [PATCH 03/27] ignore the guidelines for contributing during pkg build --- .Rbuildignore | 1 + dev/0-dev_history.Rmd | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.Rbuildignore b/.Rbuildignore index 6790fd0..f6c9814 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -19,3 +19,4 @@ ^CRAN-SUBMISSION$ ^revdep$ ^CODE_OF_CONDUCT\.md$ +^CONTRIBUTING\.md$ diff --git a/dev/0-dev_history.Rmd b/dev/0-dev_history.Rmd index 00b4a6f..a8ee976 100644 --- a/dev/0-dev_history.Rmd +++ b/dev/0-dev_history.Rmd @@ -195,6 +195,6 @@ usethis::use_logo("logo-maldipickr.svg")# later to fix #43 # Ignore files during package build ```{r build_ignore, eval=FALSE} -usethis::use_build_ignore(c("_pkgdown.yml", "docs", "pkgdown", "processed", "codecov.yml")) +usethis::use_build_ignore(c("_pkgdown.yml", "docs", "pkgdown", "processed", "codecov.yml", "CONTRIBUTING.md")) ``` From 5a97e9ff642ce208ccc483b245fa827a01707b10 Mon Sep 17 00:00:00 2001 From: cpauvert Date: Fri, 3 May 2024 18:00:18 +0200 Subject: [PATCH 04/27] forgotten line removal after compilation --- man/maldipickr-package.Rd | 2 -- 1 file changed, 2 deletions(-) diff --git a/man/maldipickr-package.Rd b/man/maldipickr-package.Rd index df8a49e..78439b4 100644 --- a/man/maldipickr-package.Rd +++ b/man/maldipickr-package.Rd @@ -6,8 +6,6 @@ \alias{maldipickr-package} \title{maldipickr: Dereplicate and Cherry-Pick Mass Spectrometry Spectra} \description{ -\if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} - Convenient wrapper functions for the analysis of matrix-assisted laser desorption/ionization-time-of-flight (MALDI-TOF) spectra data in order to select only representative spectra (also called cherry-pick). The package covers the preprocessing and dereplication steps (based on Strejcek, Smrhova, Junkova and Uhlik (2018) \doi{10.3389/fmicb.2018.01294}) needed to cluster MALDI-TOF spectra before the final cherry-picking step. It enables the easy exclusion of spectra and/or clusters to accommodate complex cherry-picking strategies. Alternatively, cherry-picking using taxonomic identification MALDI-TOF data is made easy with functions to import inconsistently formatted reports. } \seealso{ From b5692bd5634ee4fabcc2ff50208b16740d90f3d0 Mon Sep 17 00:00:00 2001 From: cpauvert Date: Fri, 3 May 2024 18:13:04 +0200 Subject: [PATCH 05/27] add inflated changes for import target factory BUT issue: targets and tarchetypes have been moved to Imports instead of Suggests --- DESCRIPTION | 4 +- NAMESPACE | 1 + R/tar_import_and_process_spectra.R | 112 ++++++++++++++++++ dev/config_fusen.yaml | 13 ++ dev/maldipickr-workflow-with-targets.Rmd | 43 +++++-- man/tar_import_and_process_spectra.Rd | 53 +++++++++ .../test-tar_import_and_process_spectra.R | 5 + .../maldipickr-workflow-with-targets.Rmd | 67 +++++++++++ 8 files changed, 284 insertions(+), 14 deletions(-) create mode 100644 R/tar_import_and_process_spectra.R create mode 100644 man/tar_import_and_process_spectra.Rd create mode 100644 tests/testthat/test-tar_import_and_process_spectra.R create mode 100644 vignettes/maldipickr-workflow-with-targets.Rmd diff --git a/DESCRIPTION b/DESCRIPTION index 65ddfbd..fe6a5ca 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -34,6 +34,8 @@ Imports: readBrukerFlexData, rlang, stats, + tarchetypes (>= 0.9.0), + targets (>= 1.7.0), tibble, tidyr, tidyselect, @@ -43,8 +45,6 @@ Suggests: knitr, rmarkdown, spelling, - tarchetypes (>= 0.9.0), - targets (>= 1.7.0), testthat VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 472712a..1dbf5fe 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,6 +15,7 @@ export(read_biotyper_report) export(read_many_biotyper_reports) export(remove_spectra) export(set_reference_spectra) +export(tar_import_and_process_spectra) importFrom(MALDIquant,binPeaks) importFrom(MALDIquant,calibrateIntensity) importFrom(MALDIquant,createMassSpectrum) diff --git a/R/tar_import_and_process_spectra.R b/R/tar_import_and_process_spectra.R new file mode 100644 index 0000000..7b36c91 --- /dev/null +++ b/R/tar_import_and_process_spectra.R @@ -0,0 +1,112 @@ +# WARNING - Generated by {fusen} from dev/maldipickr-workflow-with-targets.Rmd: do not edit by hand + +#' Import and process checked spectra using targets +#' +#' +#' Given a vector of paths to `acqus` and `acqu` MALDI Biotyper directories, this targets +#' factory facilitates the steps from raw spectra to quality-checked processed spectra. +#' +#' @param name A character indicating the prefix of all targets created by the factory. +#' For instance, `name = anaerobe` will create the target `anaerobe_spectra_raw` among others. +#' @param raw_spectra_directories A vector of paths to directories containing MALDI Biotyper spectra files. This is similar to the `biotyper_directory` parameter from [import_biotyper_spectra], but as a character vector. +#' @inheritParams check_spectra +#' @inheritParams targets::tar_target_raw +#' +#' @return A target object +#' +#' @export +#' @examples +#' if (Sys.getenv("TAR_LONG_EXAMPLES") == "true") { +#' targets::tar_dir({ # tar_dir() runs code from a temporary directory. +#' targets::tar_script({ +#' library(maldipickr) +#' list( +#' tar_spectra_import( +#' name = "example", +#' raw_spectra_directories = system.file( +#' "toy-species-spectra", +#' package = "maldipickr"), +#' tolerance = 1 +#' ) +#' )},ask = FALSE) +#' targets::tar_make() +#' }) +#' } +tar_import_and_process_spectra <- function( + name, + raw_spectra_directories, + tolerance, + format = targets::tar_option_get("format")) { + rlang::check_installed(c("targets", "tarchetypes"), + reason = "to facilitate {maldipickr} workflow development" + ) + targets::tar_assert_chr(name) + targets::tar_assert_path(raw_spectra_directories) + targets::tar_assert_dbl(tolerance) + + name_plates <- paste0(name, "_plates") + name_spectra_raw <- paste0(name, "_spectra_raw") + name_checks <- paste0(name, "_checks") + name_spectra_stats <- paste0(name, "_spectra_stats") + name_valid_spectra <- paste0(name, "_valid_spectra") + name_processed <- paste0(name, "_processed") + + sym_plates <- as.symbol(name_plates) + sym_spectra_raw <- as.symbol(name_spectra_raw) + sym_checks <- as.symbol(name_checks) + sym_spectra_stats <- as.symbol(name_spectra_stats) + sym_valid_spectra <- as.symbol(name_valid_spectra) + + list( + tarchetypes::tar_files_input_raw(name_plates, + raw_spectra_directories, + format = "file" + ), + targets::tar_target_raw(name_spectra_raw, + command = substitute(suppressWarnings(import_biotyper_spectra(sym_plates)), + env = list(sym_plates = sym_plates) + ), + pattern = substitute(map(sym_plates), + env = list(sym_plates = sym_plates) + ), + format = format + ), + targets::tar_target_raw(name_checks, + command = substitute(check_spectra(sym_spectra_raw, tolerance), + env = list(tolerance = tolerance, sym_spectra_raw = sym_spectra_raw) + ), + pattern = substitute(map(sym_spectra_raw), env = list(sym_spectra_raw = sym_spectra_raw)), + format = format + ), + targets::tar_target_raw(name_spectra_stats, + command = substitute( + gather_spectra_stats(sym_checks) %>% + dplyr::mutate(maldi_plate = sym_plates), + env = list(sym_checks = sym_checks, sym_plates = sym_plates) + ), + pattern = substitute(map(sym_checks, sym_plates), + env = list(sym_checks = sym_checks, sym_plates = sym_plates) + ), + iteration = "vector", format = format + ), + # Filter-out non empty spectra and unusual spectra + targets::tar_target_raw(name_valid_spectra, + command = substitute(remove_spectra(sym_spectra_raw, sym_checks), + env = list(sym_spectra_raw = sym_spectra_raw, sym_checks = sym_checks) + ), + pattern = substitute(map(sym_spectra_raw, sym_checks), + env = list(sym_spectra_raw = sym_spectra_raw, sym_checks = sym_checks) + ), + format = format + ), + targets::tar_target_raw(name_processed, + command = substitute(process_spectra(sym_valid_spectra), + env = list(sym_valid_spectra = sym_valid_spectra) + ), + pattern = substitute(map(sym_valid_spectra), + env = list(sym_valid_spectra = sym_valid_spectra) + ), + format = format + ) + ) +} diff --git a/dev/config_fusen.yaml b/dev/config_fusen.yaml index 53d3421..5d23198 100644 --- a/dev/config_fusen.yaml +++ b/dev/config_fusen.yaml @@ -79,3 +79,16 @@ keep: - tests/testthat/test-remove_spectra_logical.R - tests/testthat/test-remove_spectra.R vignettes: [] +maldipickr-workflow-with-targets.Rmd: + path: dev/maldipickr-workflow-with-targets.Rmd + state: active + R: R/tar_import_and_process_spectra.R + tests: tests/testthat/test-tar_import_and_process_spectra.R + vignettes: vignettes/maldipickr-workflow-with-targets.Rmd + inflate: + flat_file: dev/maldipickr-workflow-with-targets.Rmd + vignette_name: maldipickr Workflow with targets + open_vignette: true + check: true + document: true + overwrite: 'yes' diff --git a/dev/maldipickr-workflow-with-targets.Rmd b/dev/maldipickr-workflow-with-targets.Rmd index b105581..c0cf2e5 100644 --- a/dev/maldipickr-workflow-with-targets.Rmd +++ b/dev/maldipickr-workflow-with-targets.Rmd @@ -1,5 +1,5 @@ --- -title: "maldipickr Workflow with {targets}" +title: "maldipickr Workflow with targets" output: html_document editor_options: chunk_output_type: console @@ -18,29 +18,33 @@ pkgload::load_all(export_all = FALSE) Simultaneous bacterial isolation efforts require rapid dereplication of bacteria, if not their identification, to reduce the redundancy of isolates. To this end, the workhorse functions of [`{maldipickr}`](https://github.com/ClavelLab/maldipickr) -help import and then analyse MALDI-TOF spectra to dereplicate and cherry-pick mass spectrometry spectra. +help import and then analyze MALDI-TOF spectra to dereplicate and cherry-pick mass spectrometry spectra. The vignettes ["Import data from Bruker MALDI Biotyper"](https://clavellab.github.io/maldipickr/articles/import-data-from-bruker-maldi-biotyper.html) and ["Dereplicate Bruker MALDI Biotyper spectra"](https://clavellab.github.io/maldipickr/articles/dereplicate-bruker-maldi-biotyper-spectra.html) explain these functions in details if need be. -This vignette showcase functions to facilitate reproducible and trustworthy workflow development `{maldipickr}` (using [`{targets}`](https://docs.ropensci.org/targets/)). We invite readers unfamiliar with `{targets}` to read the short and well-written walkthrough ["How to use {targets}"](https://books.ropensci.org/targets/walkthrough.html) to understand how `{targets}` help analysts "coordinate the pieces of computationally demanding analysis projects" and "skips costly runtime for tasks that are already up to date" (source: ). +This vignette showcase functions to facilitate reproducible and trustworthy workflow development `{maldipickr}` (using [`{targets}`](https://docs.ropensci.org/targets/)). We invite readers unfamiliar with {targets} to read the short and well-written walk-through ["How to use {targets}"](https://books.ropensci.org/targets/walkthrough.html) to understand how {targets} help analysts "coordinate the pieces of computationally demanding analysis projects" and "skips costly runtime for tasks that are already up to date" (source: ). -These helpers in `{maldipickr}` are called `{targets}` factories, which means they return target objects or lists of target objects. +These helpers in `{maldipickr}` are called {targets} factories, which means they return target objects or lists of target objects. # tar_import_and_process_spectra ```{r function-tar_import_and_process_spectra} -#' Title +#' Import and process checked spectra using targets #' -#' Description #' -#' @param name -#' @param raw_spectra_directories -#' @param tolerance +#' Given a vector of paths to `acqus` and `acqu` MALDI Biotyper directories, this targets +#' factory facilitates the steps from raw spectra to quality-checked processed spectra. +#' +#' @param name A character indicating the prefix of all targets created by the factory. +#' For instance, `name = anaerobe` will create the target `anaerobe_spectra_raw` among others. +#' @param raw_spectra_directories A vector of paths to directories containing MALDI Biotyper spectra files. This is similar to the `biotyper_directory` parameter from [import_biotyper_spectra], but as a character vector. +#' @inheritParams check_spectra +#' @inheritParams targets::tar_target_raw #' -#' @return +#' @return A target object #' #' @export tar_import_and_process_spectra <- function( @@ -124,7 +128,22 @@ tar_import_and_process_spectra <- function( ``` ```{r example-tar_import_and_process_spectra} -tar_import_and_process_spectra() +if (Sys.getenv("TAR_LONG_EXAMPLES") == "true") { + targets::tar_dir({ # tar_dir() runs code from a temporary directory. + targets::tar_script({ + library(maldipickr) + list( + tar_spectra_import( + name = "example", + raw_spectra_directories = system.file( + "toy-species-spectra", + package = "maldipickr"), + tolerance = 1 + ) + )},ask = FALSE) + targets::tar_make() + }) +} ``` ```{r tests-tar_import_and_process_spectra} @@ -138,6 +157,6 @@ test_that("tar_import_and_process_spectra works", { # Run but keep eval=FALSE to avoid infinite loop # Execute in the console directly fusen::inflate(flat_file = "dev/maldipickr-workflow-with-targets.Rmd", - vignette_name = "maldipickr Workflow with {targets}") + vignette_name = "maldipickr Workflow with targets") ``` diff --git a/man/tar_import_and_process_spectra.Rd b/man/tar_import_and_process_spectra.Rd new file mode 100644 index 0000000..f571888 --- /dev/null +++ b/man/tar_import_and_process_spectra.Rd @@ -0,0 +1,53 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tar_import_and_process_spectra.R +\name{tar_import_and_process_spectra} +\alias{tar_import_and_process_spectra} +\title{Import and process checked spectra using targets} +\usage{ +tar_import_and_process_spectra( + name, + raw_spectra_directories, + tolerance, + format = targets::tar_option_get("format") +) +} +\arguments{ +\item{name}{A character indicating the prefix of all targets created by the factory. +For instance, \code{name = anaerobe} will create the target \code{anaerobe_spectra_raw} among others.} + +\item{raw_spectra_directories}{A vector of paths to directories containing MALDI Biotyper spectra files. This is similar to the \code{biotyper_directory} parameter from \link{import_biotyper_spectra}, but as a character vector.} + +\item{tolerance}{A numeric indicating the accepted tolerance to the spectra length. +The default value is the machine numerical precision and is close to 1.5e-8.} + +\item{format}{Optional storage format for the target's return value. +With the exception of \code{format = "file"}, each target +gets a file in \verb{_targets/objects}, and each format is a different +way to save and load this file. See the "Storage formats" section +for a detailed list of possible data storage formats.} +} +\value{ +A target object +} +\description{ +Given a vector of paths to \code{acqus} and \code{acqu} MALDI Biotyper directories, this targets +factory facilitates the steps from raw spectra to quality-checked processed spectra. +} +\examples{ +if (Sys.getenv("TAR_LONG_EXAMPLES") == "true") { + targets::tar_dir({ # tar_dir() runs code from a temporary directory. + targets::tar_script({ + library(maldipickr) + list( + tar_spectra_import( + name = "example", + raw_spectra_directories = system.file( + "toy-species-spectra", + package = "maldipickr"), + tolerance = 1 + ) + )},ask = FALSE) + targets::tar_make() + }) +} +} diff --git a/tests/testthat/test-tar_import_and_process_spectra.R b/tests/testthat/test-tar_import_and_process_spectra.R new file mode 100644 index 0000000..612b7c9 --- /dev/null +++ b/tests/testthat/test-tar_import_and_process_spectra.R @@ -0,0 +1,5 @@ +# WARNING - Generated by {fusen} from dev/maldipickr-workflow-with-targets.Rmd: do not edit by hand + +test_that("tar_import_and_process_spectra works", { + expect_true(inherits(tar_import_and_process_spectra, "function")) +}) diff --git a/vignettes/maldipickr-workflow-with-targets.Rmd b/vignettes/maldipickr-workflow-with-targets.Rmd new file mode 100644 index 0000000..5aa9030 --- /dev/null +++ b/vignettes/maldipickr-workflow-with-targets.Rmd @@ -0,0 +1,67 @@ +--- +title: "maldipickr Workflow with targets" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{maldipickr-workflow-with-targets} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup} +library(maldipickr) +``` + + + +Simultaneous bacterial isolation efforts require rapid dereplication of bacteria, if not their identification, to reduce the redundancy of isolates. + +To this end, the workhorse functions of [`{maldipickr}`](https://github.com/ClavelLab/maldipickr) +help import and then analyze MALDI-TOF spectra to dereplicate and cherry-pick mass spectrometry spectra. +The vignettes ["Import data from Bruker MALDI Biotyper"](https://clavellab.github.io/maldipickr/articles/import-data-from-bruker-maldi-biotyper.html) and ["Dereplicate Bruker MALDI Biotyper spectra"](https://clavellab.github.io/maldipickr/articles/dereplicate-bruker-maldi-biotyper-spectra.html) explain these functions in details if need be. + + +This vignette showcase functions to facilitate reproducible and trustworthy workflow development `{maldipickr}` (using [`{targets}`](https://docs.ropensci.org/targets/)). We invite readers unfamiliar with {targets} to read the short and well-written walk-through ["How to use {targets}"](https://books.ropensci.org/targets/walkthrough.html) to understand how {targets} help analysts "coordinate the pieces of computationally demanding analysis projects" and "skips costly runtime for tasks that are already up to date" (source: ). + + +These helpers in `{maldipickr}` are called {targets} factories, which means they return target objects or lists of target objects. + + + + +# tar_import_and_process_spectra + + + + + +```{r example-tar_import_and_process_spectra} +if (Sys.getenv("TAR_LONG_EXAMPLES") == "true") { + targets::tar_dir({ # tar_dir() runs code from a temporary directory. + targets::tar_script({ + library(maldipickr) + list( + tar_spectra_import( + name = "example", + raw_spectra_directories = system.file( + "toy-species-spectra", + package = "maldipickr"), + tolerance = 1 + ) + )},ask = FALSE) + targets::tar_make() + }) +} +``` + + + + + + From cb62ce9bd7c93df9a3a020a576c07b9e876ddfc8 Mon Sep 17 00:00:00 2001 From: cpauvert Date: Fri, 3 May 2024 18:15:22 +0200 Subject: [PATCH 06/27] fix spelling issues --- README.Rmd | 2 +- README.md | 2 +- inst/WORDLIST | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.Rmd b/README.Rmd index d436aa3..9ca76f2 100644 --- a/README.Rmd +++ b/README.Rmd @@ -135,7 +135,7 @@ Please note that the [`{maldipickr}`](https://github.com/ClavelLab/maldipickr) p ## Credits -### Acknowledgements +### Acknowledgments This R package is developed for spectra data generated by the Bruker MALDI Biotyper device. The [`{maldipickr}`](https://github.com/ClavelLab/maldipickr) package is built from a suite of Rmarkdown files using the [`{fusen}`](https://thinkr-open.github.io/fusen/) package by Rochette S (2023). It relies on: diff --git a/README.md b/README.md index 4c96865..269ff51 100644 --- a/README.md +++ b/README.md @@ -171,7 +171,7 @@ By contributing to this project, you agree to abide by its terms. ## Credits -### Acknowledgements +### Acknowledgments This R package is developed for spectra data generated by the Bruker MALDI Biotyper device. The diff --git a/inst/WORDLIST b/inst/WORDLIST index 727b50f..a6ebf50 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,5 +1,4 @@ Aerts -BioTyper Bioinformatics Biotyper Bruker @@ -18,6 +17,7 @@ Desorption Dumolin Gibb Golay +Hyperlegible Jeugt Junkova MALDI @@ -34,7 +34,6 @@ Savitzky Schellaert Smrhova Spectrometry -StackOverflow Strejcek Strejeck Strimmer @@ -46,15 +45,16 @@ Verheyde Wieme acqu al +biotyper bruker codebase codecov +coolors csv dereplicate dereplication desorption doi -ekstroem et fmicb funder @@ -62,10 +62,10 @@ fusen kDa magrittr maldi -modularity natively quickstart rRNA +reprex scalable spectrometry tibble From 4fe03d84db3e15342f17c2a684f63786d9036826 Mon Sep 17 00:00:00 2001 From: cpauvert Date: Fri, 3 May 2024 18:23:06 +0200 Subject: [PATCH 07/27] fix having targets and tarchetypes as optional dependencies in Suggests --- DESCRIPTION | 4 ++-- dev/0-dev_history.Rmd | 5 +++-- dev/config_attachment.yaml | 8 ++++++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index fe6a5ca..65ddfbd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -34,8 +34,6 @@ Imports: readBrukerFlexData, rlang, stats, - tarchetypes (>= 0.9.0), - targets (>= 1.7.0), tibble, tidyr, tidyselect, @@ -45,6 +43,8 @@ Suggests: knitr, rmarkdown, spelling, + tarchetypes (>= 0.9.0), + targets (>= 1.7.0), testthat VignetteBuilder: knitr diff --git a/dev/0-dev_history.Rmd b/dev/0-dev_history.Rmd index a8ee976..b7b4c4a 100644 --- a/dev/0-dev_history.Rmd +++ b/dev/0-dev_history.Rmd @@ -107,7 +107,8 @@ What is special about using `README.Rmd` instead of just `README.md`? You can in pkgload::load_all() # Generate documentation and deal with dependencies -attachment::att_amend_desc() +# Ran once after issue with pkgs moved from Suggests to Imports +attachment::att_amend_desc(extra.suggests = c("targets","tarchetypes"),pkg_ignore = c("targets","tarchetypes"), normalize = TRUE, update.config = TRUE) # Check the package devtools::check() @@ -180,7 +181,7 @@ usethis::use_import_from( usethis::use_import_from("readBrukerFlexData", "readBrukerFlexDir") usethis::use_import_from("stats", "median") usethis::use_import_from("utils", c("read.delim", "read.csv")) - +# See attachment::att_amend_desc above, because fusen moves the confi below to Imports during inflate usethis::use_package("targets", type = "Suggests", min_version = "1.7.0") usethis::use_package("tarchetypes", type = "Suggests", min_version = "0.9.0") ``` diff --git a/dev/config_attachment.yaml b/dev/config_attachment.yaml index 46e24ec..26e25f6 100644 --- a/dev/config_attachment.yaml +++ b/dev/config_attachment.yaml @@ -3,8 +3,12 @@ path.d: DESCRIPTION dir.r: R dir.v: vignettes dir.t: tests -extra.suggests: ~ -pkg_ignore: ~ +extra.suggests: + - targets + - tarchetypes +pkg_ignore: + - targets + - tarchetypes document: yes normalize: yes inside_rmd: no From 28a0489e66f946c2f7923986c5a69278c60319ef Mon Sep 17 00:00:00 2001 From: cpauvert Date: Fri, 3 May 2024 19:11:40 +0200 Subject: [PATCH 08/27] draft gather_spectra_stats() function --- _pkgdown.yml | 1 + dev/flat_utils.Rmd | 57 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/_pkgdown.yml b/_pkgdown.yml index ad2035a..3f253db 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -40,6 +40,7 @@ reference: - title: "Miscellaneous" contents: - is_well_on_edge + - gather_spectra_stats - get_spectra_names news: diff --git a/dev/flat_utils.Rmd b/dev/flat_utils.Rmd index 58e71e3..90cd893 100644 --- a/dev/flat_utils.Rmd +++ b/dev/flat_utils.Rmd @@ -625,7 +625,64 @@ test_that("remove_spectra fails with wrong input on raw spectra", { }) ``` +# gather_spectra_stats +```{r function-gather_spectra_stats} +#' Aggregate spectra quality-check statistics +#' +#' +#' @param check_vectors A list of logical vectors from [check_spectra] +#' +#' @return A tibble of one row with the following 5 columns of integers: +#' * `n_spectra`: total number of raw spectra. +#' * `n_valid_spectra`: total number of spectra passing all quality checks +#' * `is_empty`, `is_outlier_length` and `is_not_regular`: total of spectra flagged with these irregularities. +#' +#' @seealso [check_spectra] +#' @export +gather_spectra_stats <- function(check_vectors){ + # check_vectors from maldipickr::check_spectra + # src: https://stackoverflow.com/a/51140480/21085566 + aggregated_checks <- Reduce(`|`, check_vectors) + check_stats <- vapply(check_vectors, sum, FUN.VALUE = integer(1)) %>% + tibble::as_tibble_row() + tibble::tibble( + "n_spectra" = length(aggregated_checks), + "n_valid_spectra" = n_spectra - sum(aggregated_checks) + ) %>% + dplyr::bind_cols(check_stats) %>% + return() +} +``` + +```{r example-gather_spectra_stats} +# Get an example directory of six Bruker MALDI Biotyper spectra +directory_biotyper_spectra <- system.file( + "toy-species-spectra", + package = "maldipickr" +) +# Import the six spectra +spectra_list <- import_biotyper_spectra(directory_biotyper_spectra) +# Display the list of checks, with FALSE where no anomaly is detected +checks <- check_spectra(spectra_list) +# Aggregate the statistics of quality-checked spectra +gather_spectra_stats(checks) +``` + +```{r tests-gather_spectra_stats} +test_that("gather_spectra_stats works", { + expect_equal(check_spectra( + c(MALDIquant::createMassSpectrum(1:3, 1:3), MALDIquant::createMassSpectrum(11:13, 11:13)) + ) |> gather_spectra_stats(), structure(list( + n_spectra = 2L, n_valid_spectra = 2L, is_empty = 0L, + is_outlier_length = 0L, is_not_regular = 0L + ), class = c( + "tbl_df", + "tbl", "data.frame" + ), row.names = c(NA, -1L))) +}) +``` + ```{r development-inflate, eval=FALSE} # Run but keep eval=FALSE to avoid infinite loop From e5dc66fa9d08f96cc10f907618e78ad71d067cce Mon Sep 17 00:00:00 2001 From: cpauvert Date: Fri, 3 May 2024 19:14:17 +0200 Subject: [PATCH 09/27] add test for structure and fix no visible binding issue --- dev/flat_utils.Rmd | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/dev/flat_utils.Rmd b/dev/flat_utils.Rmd index 90cd893..2857cb0 100644 --- a/dev/flat_utils.Rmd +++ b/dev/flat_utils.Rmd @@ -641,6 +641,16 @@ test_that("remove_spectra fails with wrong input on raw spectra", { #' @seealso [check_spectra] #' @export gather_spectra_stats <- function(check_vectors){ + equal_length<-unique(lengths(check_vectors)) + if(typeof(check_vectors) != "list" | + length(equal_length) != 1 | + names(check_vectors) != c("is_empty", "is_outlier_length", "is_not_regular") + ){ + stop( + "Unexpected format for checks_vectors. Are you sure this is the output of maldicpickr::check_spectra()?" + ) + } + # check_vectors from maldipickr::check_spectra # src: https://stackoverflow.com/a/51140480/21085566 aggregated_checks <- Reduce(`|`, check_vectors) @@ -648,7 +658,7 @@ gather_spectra_stats <- function(check_vectors){ tibble::as_tibble_row() tibble::tibble( "n_spectra" = length(aggregated_checks), - "n_valid_spectra" = n_spectra - sum(aggregated_checks) + "n_valid_spectra" = .data$n_spectra - sum(aggregated_checks) ) %>% dplyr::bind_cols(check_stats) %>% return() From 0017bd164e5acdb93988d05ef8719c7894a23ffc Mon Sep 17 00:00:00 2001 From: cpauvert Date: Fri, 3 May 2024 19:34:34 +0200 Subject: [PATCH 10/27] inflate the now tested gather_spectra_stats function this fixes #45 --- NAMESPACE | 1 + R/gather_spectra_stats.R | 54 ++++++++++++++++++++++ dev/config_fusen.yaml | 2 + dev/flat_utils.Rmd | 31 +++++++++---- man/gather_spectra_stats.Rd | 38 +++++++++++++++ tests/testthat/test-gather_spectra_stats.R | 23 +++++++++ 6 files changed, 141 insertions(+), 8 deletions(-) create mode 100644 R/gather_spectra_stats.R create mode 100644 man/gather_spectra_stats.Rd create mode 100644 tests/testthat/test-gather_spectra_stats.R diff --git a/NAMESPACE b/NAMESPACE index 1dbf5fe..b0a3eeb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ export("%>%") export(check_spectra) export(delineate_with_identification) export(delineate_with_similarity) +export(gather_spectra_stats) export(get_spectra_names) export(import_biotyper_spectra) export(import_spede_clusters) diff --git a/R/gather_spectra_stats.R b/R/gather_spectra_stats.R new file mode 100644 index 0000000..105b26b --- /dev/null +++ b/R/gather_spectra_stats.R @@ -0,0 +1,54 @@ +# WARNING - Generated by {fusen} from dev/flat_utils.Rmd: do not edit by hand + +#' Aggregate spectra quality-check statistics +#' +#' +#' @param check_vectors A list of logical vectors from [check_spectra] +#' +#' @return A tibble of one row with the following 5 columns of integers: +#' * `n_spectra`: total number of raw spectra. +#' * `n_valid_spectra`: total number of spectra passing all quality checks +#' * `is_empty`, `is_outlier_length` and `is_not_regular`: total of spectra flagged with these irregularities. +#' +#' @seealso [check_spectra] +#' @export +#' @examples +#' # Get an example directory of six Bruker MALDI Biotyper spectra +#' directory_biotyper_spectra <- system.file( +#' "toy-species-spectra", +#' package = "maldipickr" +#' ) +#' # Import the six spectra +#' spectra_list <- import_biotyper_spectra(directory_biotyper_spectra) +#' # Display the list of checks, with FALSE where no anomaly is detected +#' checks <- check_spectra(spectra_list) +#' # Aggregate the statistics of quality-checked spectra +#' gather_spectra_stats(checks) +gather_spectra_stats <- function(check_vectors) { + if (typeof(check_vectors) != "list" || + is.null(names(check_vectors))) { + stop( + "check_vectors is not a named list. See maldipickr::check_spectra() help page for a correct format." + ) + } + equal_length <- unique(lengths(check_vectors)) + if (length(equal_length) != 1 || + any(names(check_vectors) != c("is_empty", "is_outlier_length", "is_not_regular")) + ) { + stop( + "Unexpected format for checks_vectors. Are you sure this is the output of maldipickr::check_spectra()?" + ) + } + + # check_vectors from maldipickr::check_spectra + # src: https://stackoverflow.com/a/51140480/21085566 + aggregated_checks <- Reduce(`|`, check_vectors) + check_stats <- vapply(check_vectors, sum, FUN.VALUE = integer(1)) %>% + tibble::as_tibble_row() + tibble::tibble( + "n_spectra" = length(aggregated_checks), + "n_valid_spectra" = .data$n_spectra - sum(aggregated_checks) + ) %>% + dplyr::bind_cols(check_stats) %>% + return() +} diff --git a/dev/config_fusen.yaml b/dev/config_fusen.yaml index 5d23198..abe6578 100644 --- a/dev/config_fusen.yaml +++ b/dev/config_fusen.yaml @@ -71,11 +71,13 @@ keep: path: keep state: active R: + - R/gather_spectra_stats.R - R/remove_spectra_logical.R - R/remove_spectra.R - R/maldipickr-package.R - R/utils-pipe.R tests: + - tests/testthat/test-gather_spectra_stats.R - tests/testthat/test-remove_spectra_logical.R - tests/testthat/test-remove_spectra.R vignettes: [] diff --git a/dev/flat_utils.Rmd b/dev/flat_utils.Rmd index 2857cb0..46ee3ea 100644 --- a/dev/flat_utils.Rmd +++ b/dev/flat_utils.Rmd @@ -640,17 +640,22 @@ test_that("remove_spectra fails with wrong input on raw spectra", { #' #' @seealso [check_spectra] #' @export -gather_spectra_stats <- function(check_vectors){ - equal_length<-unique(lengths(check_vectors)) - if(typeof(check_vectors) != "list" | - length(equal_length) != 1 | - names(check_vectors) != c("is_empty", "is_outlier_length", "is_not_regular") - ){ +gather_spectra_stats <- function(check_vectors) { + if (typeof(check_vectors) != "list" || + is.null(names(check_vectors))) { stop( - "Unexpected format for checks_vectors. Are you sure this is the output of maldicpickr::check_spectra()?" + "check_vectors is not a named list. See maldipickr::check_spectra() help page for a correct format." ) } - + equal_length <- unique(lengths(check_vectors)) + if (length(equal_length) != 1 || + any(names(check_vectors) != c("is_empty", "is_outlier_length", "is_not_regular")) + ) { + stop( + "Unexpected format for checks_vectors. Are you sure this is the output of maldipickr::check_spectra()?" + ) + } + # check_vectors from maldipickr::check_spectra # src: https://stackoverflow.com/a/51140480/21085566 aggregated_checks <- Reduce(`|`, check_vectors) @@ -691,6 +696,16 @@ test_that("gather_spectra_stats works", { "tbl", "data.frame" ), row.names = c(NA, -1L))) }) +test_that("gather_spectra_stats fails",{ + expect_error( + gather_spectra_stats("spectra"), + "check_vectors is not a named list" + ) + expect_error( + gather_spectra_stats(list("foo" = c(TRUE,TRUE),"bar" = c(TRUE))), + "Unexpected format for checks_vectors. Are you sure this is the output of maldipickr::check_spectra()?" + ) +}) ``` diff --git a/man/gather_spectra_stats.Rd b/man/gather_spectra_stats.Rd new file mode 100644 index 0000000..bbc9e07 --- /dev/null +++ b/man/gather_spectra_stats.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gather_spectra_stats.R +\name{gather_spectra_stats} +\alias{gather_spectra_stats} +\title{Aggregate spectra quality-check statistics} +\usage{ +gather_spectra_stats(check_vectors) +} +\arguments{ +\item{check_vectors}{A list of logical vectors from \link{check_spectra}} +} +\value{ +A tibble of one row with the following 5 columns of integers: +\itemize{ +\item \code{n_spectra}: total number of raw spectra. +\item \code{n_valid_spectra}: total number of spectra passing all quality checks +\item \code{is_empty}, \code{is_outlier_length} and \code{is_not_regular}: total of spectra flagged with these irregularities. +} +} +\description{ +Aggregate spectra quality-check statistics +} +\examples{ +# Get an example directory of six Bruker MALDI Biotyper spectra +directory_biotyper_spectra <- system.file( + "toy-species-spectra", + package = "maldipickr" +) +# Import the six spectra +spectra_list <- import_biotyper_spectra(directory_biotyper_spectra) +# Display the list of checks, with FALSE where no anomaly is detected +checks <- check_spectra(spectra_list) +# Aggregate the statistics of quality-checked spectra +gather_spectra_stats(checks) +} +\seealso{ +\link{check_spectra} +} diff --git a/tests/testthat/test-gather_spectra_stats.R b/tests/testthat/test-gather_spectra_stats.R new file mode 100644 index 0000000..3786ef3 --- /dev/null +++ b/tests/testthat/test-gather_spectra_stats.R @@ -0,0 +1,23 @@ +# WARNING - Generated by {fusen} from dev/flat_utils.Rmd: do not edit by hand + +test_that("gather_spectra_stats works", { + expect_equal(check_spectra( + c(MALDIquant::createMassSpectrum(1:3, 1:3), MALDIquant::createMassSpectrum(11:13, 11:13)) + ) |> gather_spectra_stats(), structure(list( + n_spectra = 2L, n_valid_spectra = 2L, is_empty = 0L, + is_outlier_length = 0L, is_not_regular = 0L + ), class = c( + "tbl_df", + "tbl", "data.frame" + ), row.names = c(NA, -1L))) +}) +test_that("gather_spectra_stats fails",{ + expect_error( + gather_spectra_stats("spectra"), + "check_vectors is not a named list" + ) + expect_error( + gather_spectra_stats(list("foo" = c(TRUE,TRUE),"bar" = c(TRUE))), + "Unexpected format for checks_vectors. Are you sure this is the output of maldipickr::check_spectra()?" + ) +}) From 131caf2d930ae046ed0e7e36a989b9e3ea826fcc Mon Sep 17 00:00:00 2001 From: cpauvert Date: Fri, 3 May 2024 19:54:20 +0200 Subject: [PATCH 11/27] fix typo in target factory example --- R/tar_import_and_process_spectra.R | 2 +- dev/maldipickr-workflow-with-targets.Rmd | 2 +- man/tar_import_and_process_spectra.Rd | 2 +- vignettes/maldipickr-workflow-with-targets.Rmd | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/tar_import_and_process_spectra.R b/R/tar_import_and_process_spectra.R index 7b36c91..7d309ef 100644 --- a/R/tar_import_and_process_spectra.R +++ b/R/tar_import_and_process_spectra.R @@ -21,7 +21,7 @@ #' targets::tar_script({ #' library(maldipickr) #' list( -#' tar_spectra_import( +#' tar_import_and_process_spectra( #' name = "example", #' raw_spectra_directories = system.file( #' "toy-species-spectra", diff --git a/dev/maldipickr-workflow-with-targets.Rmd b/dev/maldipickr-workflow-with-targets.Rmd index c0cf2e5..b2cf072 100644 --- a/dev/maldipickr-workflow-with-targets.Rmd +++ b/dev/maldipickr-workflow-with-targets.Rmd @@ -133,7 +133,7 @@ if (Sys.getenv("TAR_LONG_EXAMPLES") == "true") { targets::tar_script({ library(maldipickr) list( - tar_spectra_import( + tar_import_and_process_spectra( name = "example", raw_spectra_directories = system.file( "toy-species-spectra", diff --git a/man/tar_import_and_process_spectra.Rd b/man/tar_import_and_process_spectra.Rd index f571888..686403a 100644 --- a/man/tar_import_and_process_spectra.Rd +++ b/man/tar_import_and_process_spectra.Rd @@ -39,7 +39,7 @@ if (Sys.getenv("TAR_LONG_EXAMPLES") == "true") { targets::tar_script({ library(maldipickr) list( - tar_spectra_import( + tar_import_and_process_spectra( name = "example", raw_spectra_directories = system.file( "toy-species-spectra", diff --git a/vignettes/maldipickr-workflow-with-targets.Rmd b/vignettes/maldipickr-workflow-with-targets.Rmd index 5aa9030..41297e9 100644 --- a/vignettes/maldipickr-workflow-with-targets.Rmd +++ b/vignettes/maldipickr-workflow-with-targets.Rmd @@ -47,7 +47,7 @@ if (Sys.getenv("TAR_LONG_EXAMPLES") == "true") { targets::tar_script({ library(maldipickr) list( - tar_spectra_import( + tar_import_and_process_spectra( name = "example", raw_spectra_directories = system.file( "toy-species-spectra", From 1257a2aff35f5a1359992f31f8cfc4272947d859 Mon Sep 17 00:00:00 2001 From: cpauvert Date: Fri, 3 May 2024 22:57:10 +0200 Subject: [PATCH 12/27] add section for {targets} functions fix typo --- _pkgdown.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/_pkgdown.yml b/_pkgdown.yml index 3f253db..94d1910 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -32,11 +32,16 @@ reference: - starts_with("delineate_") - title: "Cherry-pick" - desc: "Function to pinpoint and label specific spectra within clusters" + desc: "Functions to pinpoint and label specific spectra within clusters" contents: - pick_spectra - set_reference_spectra + - title: "Workflow" + desc: "Functions (i.e., targets factories) to facilitate {targets} workflow development for {maldipickr}" + contents: + - starts_with("tar_") + - title: "Miscellaneous" contents: - is_well_on_edge From 7f2d55fc73f54bfb3f1069cff065be27f89aae6a Mon Sep 17 00:00:00 2001 From: cpauvert Date: Fri, 3 May 2024 23:45:27 +0200 Subject: [PATCH 13/27] add TODOs and better explanations of target objects --- dev/maldipickr-workflow-with-targets.Rmd | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/dev/maldipickr-workflow-with-targets.Rmd b/dev/maldipickr-workflow-with-targets.Rmd index b2cf072..098f840 100644 --- a/dev/maldipickr-workflow-with-targets.Rmd +++ b/dev/maldipickr-workflow-with-targets.Rmd @@ -38,13 +38,26 @@ These helpers in `{maldipickr}` are called {targets} factories, which means they #' Given a vector of paths to `acqus` and `acqu` MALDI Biotyper directories, this targets #' factory facilitates the steps from raw spectra to quality-checked processed spectra. #' +#' # TODO: add link to [targets::tar_target] +#' #' @param name A character indicating the prefix of all targets created by the factory. #' For instance, `name = anaerobe` will create the target `anaerobe_spectra_raw` among others. #' @param raw_spectra_directories A vector of paths to directories containing MALDI Biotyper spectra files. This is similar to the `biotyper_directory` parameter from [import_biotyper_spectra], but as a character vector. #' @inheritParams check_spectra -#' @inheritParams targets::tar_target_raw +#' @inheritParams targets::tar_target_raw # TODO: add others parameters to inherit +#' +#' # TODO: restrain format to only rds or qs but for qs warn with rlang::check_installed #' -#' @return A target object +#' @return A list of target objects whose names use the `name` argument as a prefix: +#' `*_plates_files` (e.g., `anaerobe_plates_files`): +#' `*_plates` (e.g., `anaerobe_plates`): +#' `*_spectra_raw` (e.g., `anaerobe_spectra_raw`): +#' `*_checks` (e.g., `anaerobe_checks`): +#' `*_valid_spectra` (e.g., `anaerobe_valid_spectra`): +#' `*_spectra_stats` (e.g., `anaerobe_spectra_stats`): +#' `*_processed` (e.g., `anaerobe_processed`): +#' +#' @note Once the workflow is checked (with [targets::tar_manifest] or [targets::tar_visnetwork]) and run (with [targets::tar_make]), all the target objects returned can be accessed using [targets::tar_read]] (e.g., `targets::tar_read(anaerobe_spectra_stats)`). #' #' @export tar_import_and_process_spectra <- function( @@ -134,7 +147,7 @@ if (Sys.getenv("TAR_LONG_EXAMPLES") == "true") { library(maldipickr) list( tar_import_and_process_spectra( - name = "example", + name = "anaerobe", raw_spectra_directories = system.file( "toy-species-spectra", package = "maldipickr"), From 12e522011935dd634267e8f6320288be98068d6d Mon Sep 17 00:00:00 2001 From: cpauvert Date: Mon, 6 May 2024 09:05:21 +0200 Subject: [PATCH 14/27] add explicit section title for factory and link to docs --- dev/maldipickr-workflow-with-targets.Rmd | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dev/maldipickr-workflow-with-targets.Rmd b/dev/maldipickr-workflow-with-targets.Rmd index 098f840..cd81649 100644 --- a/dev/maldipickr-workflow-with-targets.Rmd +++ b/dev/maldipickr-workflow-with-targets.Rmd @@ -29,16 +29,16 @@ These helpers in `{maldipickr}` are called {targets} factories, which means they -# tar_import_and_process_spectra +# Target factory to import and process spectra ```{r function-tar_import_and_process_spectra} #' Import and process checked spectra using targets #' #' -#' Given a vector of paths to `acqus` and `acqu` MALDI Biotyper directories, this targets -#' factory facilitates the steps from raw spectra to quality-checked processed spectra. -#' -#' # TODO: add link to [targets::tar_target] +#' Given a vector of paths to MALDI Biotyper directories containing `acqus` and +#' `acqu`, this target factory facilitates the steps from raw spectra to +#' quality-checked processed spectra. See [targets::tar_target] for more +#' information about what are target objects. #' #' @param name A character indicating the prefix of all targets created by the factory. #' For instance, `name = anaerobe` will create the target `anaerobe_spectra_raw` among others. From a830afd3b1f653d8e79165d8602a6d989e66744f Mon Sep 17 00:00:00 2001 From: cpauvert Date: Mon, 6 May 2024 09:07:33 +0200 Subject: [PATCH 15/27] precise TODO for inherited options --- dev/maldipickr-workflow-with-targets.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/maldipickr-workflow-with-targets.Rmd b/dev/maldipickr-workflow-with-targets.Rmd index cd81649..e56e22d 100644 --- a/dev/maldipickr-workflow-with-targets.Rmd +++ b/dev/maldipickr-workflow-with-targets.Rmd @@ -44,7 +44,7 @@ These helpers in `{maldipickr}` are called {targets} factories, which means they #' For instance, `name = anaerobe` will create the target `anaerobe_spectra_raw` among others. #' @param raw_spectra_directories A vector of paths to directories containing MALDI Biotyper spectra files. This is similar to the `biotyper_directory` parameter from [import_biotyper_spectra], but as a character vector. #' @inheritParams check_spectra -#' @inheritParams targets::tar_target_raw # TODO: add others parameters to inherit +#' @inheritParams targets::tar_target_raw # TODO: add others parameters to inherit, maybe with ... to targets::tar_target()? #' #' # TODO: restrain format to only rds or qs but for qs warn with rlang::check_installed #' From f94d897cd24e32556673f8b51d3281dda0048602 Mon Sep 17 00:00:00 2001 From: cpauvert Date: Mon, 6 May 2024 09:59:31 +0200 Subject: [PATCH 16/27] describe the targets objects obtained with the factory plus a bit of inflated changes --- R/tar_import_and_process_spectra.R | 22 ++++++++++++++----- dev/maldipickr-workflow-with-targets.Rmd | 13 +++++------ inst/WORDLIST | 2 ++ man/tar_import_and_process_spectra.Rd | 19 ++++++++++++---- .../maldipickr-workflow-with-targets.Rmd | 4 ++-- 5 files changed, 42 insertions(+), 18 deletions(-) diff --git a/R/tar_import_and_process_spectra.R b/R/tar_import_and_process_spectra.R index 7d309ef..39cec2a 100644 --- a/R/tar_import_and_process_spectra.R +++ b/R/tar_import_and_process_spectra.R @@ -3,16 +3,28 @@ #' Import and process checked spectra using targets #' #' -#' Given a vector of paths to `acqus` and `acqu` MALDI Biotyper directories, this targets -#' factory facilitates the steps from raw spectra to quality-checked processed spectra. +#' Given a vector of paths to MALDI Biotyper directories containing `acqus` and +#' `acqu`, this target factory facilitates the steps from raw spectra to +#' quality-checked processed spectra. See [targets::tar_target] for more +#' information about what are target objects. #' #' @param name A character indicating the prefix of all targets created by the factory. #' For instance, `name = anaerobe` will create the target `anaerobe_spectra_raw` among others. #' @param raw_spectra_directories A vector of paths to directories containing MALDI Biotyper spectra files. This is similar to the `biotyper_directory` parameter from [import_biotyper_spectra], but as a character vector. #' @inheritParams check_spectra -#' @inheritParams targets::tar_target_raw +#' @inheritParams targets::tar_target_raw # TODO: add others parameters to inherit, maybe with ... to targets::tar_target()? #' -#' @return A target object +#' # TODO: restrain format to only rds or qs but for qs warn with rlang::check_installed +#' +#' @return A list of target objects whose names use the `name` argument as a prefix: +#' `*_plates_files` (e.g., `anaerobe_plates_files`) and `*_plates` (e.g., `anaerobe_plates`): are unnamed and named lists of input paths provided by `raw_spectra_directories`, respectively, as produced by [tarchetypes::tar_files_input]. +#' `*_spectra_raw` (e.g., `anaerobe_spectra_raw`): is a list-of-list of imported spectra objects produced by [import_biotyper_spectra]. +#' `*_checks` (e.g., `anaerobe_checks`): is a list-of-list of logical vectors produced by [check_spectra]. +#' `*_valid_spectra` (e.g., `anaerobe_valid_spectra`): is a list-of-list of subset of quality-checked spectra produced by [remove_spectra]. +#' `*_spectra_stats` (e.g., `anaerobe_spectra_stats`): is a tibble of statistics from the quality-check produced by [gather_spectra_stats] with a row for each input paths from `_plates_files`. +#' `*_processed` (e.g., `anaerobe_processed`): is a list-of-list of processed spectra and associated peaks produced by [process_spectra]. +#' +#' @note Once the workflow is checked (with [targets::tar_manifest] or [targets::tar_visnetwork]) and run (with [targets::tar_make]), all the target objects returned can be accessed using [targets::tar_read]] (e.g., `targets::tar_read(anaerobe_spectra_stats)`). #' #' @export #' @examples @@ -22,7 +34,7 @@ #' library(maldipickr) #' list( #' tar_import_and_process_spectra( -#' name = "example", +#' name = "anaerobe", #' raw_spectra_directories = system.file( #' "toy-species-spectra", #' package = "maldipickr"), diff --git a/dev/maldipickr-workflow-with-targets.Rmd b/dev/maldipickr-workflow-with-targets.Rmd index e56e22d..73f7979 100644 --- a/dev/maldipickr-workflow-with-targets.Rmd +++ b/dev/maldipickr-workflow-with-targets.Rmd @@ -49,13 +49,12 @@ These helpers in `{maldipickr}` are called {targets} factories, which means they #' # TODO: restrain format to only rds or qs but for qs warn with rlang::check_installed #' #' @return A list of target objects whose names use the `name` argument as a prefix: -#' `*_plates_files` (e.g., `anaerobe_plates_files`): -#' `*_plates` (e.g., `anaerobe_plates`): -#' `*_spectra_raw` (e.g., `anaerobe_spectra_raw`): -#' `*_checks` (e.g., `anaerobe_checks`): -#' `*_valid_spectra` (e.g., `anaerobe_valid_spectra`): -#' `*_spectra_stats` (e.g., `anaerobe_spectra_stats`): -#' `*_processed` (e.g., `anaerobe_processed`): +#' `*_plates_files` (e.g., `anaerobe_plates_files`) and `*_plates` (e.g., `anaerobe_plates`): are unnamed and named lists of input paths provided by `raw_spectra_directories`, respectively, as produced by [tarchetypes::tar_files_input]. +#' `*_spectra_raw` (e.g., `anaerobe_spectra_raw`): is a list-of-list of imported spectra objects produced by [import_biotyper_spectra]. +#' `*_checks` (e.g., `anaerobe_checks`): is a list-of-list of logical vectors produced by [check_spectra]. +#' `*_valid_spectra` (e.g., `anaerobe_valid_spectra`): is a list-of-list of subset of quality-checked spectra produced by [remove_spectra]. +#' `*_spectra_stats` (e.g., `anaerobe_spectra_stats`): is a tibble of statistics from the quality-check produced by [gather_spectra_stats] with a row for each input paths from `_plates_files`. +#' `*_processed` (e.g., `anaerobe_processed`): is a list-of-list of processed spectra and associated peaks produced by [process_spectra]. #' #' @note Once the workflow is checked (with [targets::tar_manifest] or [targets::tar_visnetwork]) and run (with [targets::tar_make]), all the target objects returned can be accessed using [targets::tar_read]] (e.g., `targets::tar_read(anaerobe_spectra_stats)`). #' diff --git a/inst/WORDLIST b/inst/WORDLIST index a6ebf50..065a740 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -68,5 +68,7 @@ rRNA reprex scalable spectrometry +tarchetypes tibble +visnetwork à diff --git a/man/tar_import_and_process_spectra.Rd b/man/tar_import_and_process_spectra.Rd index 686403a..98d825f 100644 --- a/man/tar_import_and_process_spectra.Rd +++ b/man/tar_import_and_process_spectra.Rd @@ -27,11 +27,22 @@ way to save and load this file. See the "Storage formats" section for a detailed list of possible data storage formats.} } \value{ -A target object +A list of target objects whose names use the \code{name} argument as a prefix: +\verb{*_plates_files} (e.g., \code{anaerobe_plates_files}) and \verb{*_plates} (e.g., \code{anaerobe_plates}): are unnamed and named lists of input paths provided by \code{raw_spectra_directories}, respectively, as produced by \link[tarchetypes:tar_files_input]{tarchetypes::tar_files_input}. +\verb{*_spectra_raw} (e.g., \code{anaerobe_spectra_raw}): is a list-of-list of imported spectra objects produced by \link{import_biotyper_spectra}. +\verb{*_checks} (e.g., \code{anaerobe_checks}): is a list-of-list of logical vectors produced by \link{check_spectra}. +\verb{*_valid_spectra} (e.g., \code{anaerobe_valid_spectra}): is a list-of-list of subset of quality-checked spectra produced by \link{remove_spectra}. +\verb{*_spectra_stats} (e.g., \code{anaerobe_spectra_stats}): is a tibble of statistics from the quality-check produced by \link{gather_spectra_stats} with a row for each input paths from \verb{_plates_files}. +\verb{*_processed} (e.g., \code{anaerobe_processed}): is a list-of-list of processed spectra and associated peaks produced by \link{process_spectra}. } \description{ -Given a vector of paths to \code{acqus} and \code{acqu} MALDI Biotyper directories, this targets -factory facilitates the steps from raw spectra to quality-checked processed spectra. +Given a vector of paths to MALDI Biotyper directories containing \code{acqus} and +\code{acqu}, this target factory facilitates the steps from raw spectra to +quality-checked processed spectra. See \link[targets:tar_target]{targets::tar_target} for more +information about what are target objects. +} +\note{ +Once the workflow is checked (with \link[targets:tar_manifest]{targets::tar_manifest} or \link[targets:tar_visnetwork]{targets::tar_visnetwork}) and run (with \link[targets:tar_make]{targets::tar_make}), all the target objects returned can be accessed using \link[targets:tar_read]{targets::tar_read}] (e.g., \code{targets::tar_read(anaerobe_spectra_stats)}). } \examples{ if (Sys.getenv("TAR_LONG_EXAMPLES") == "true") { @@ -40,7 +51,7 @@ if (Sys.getenv("TAR_LONG_EXAMPLES") == "true") { library(maldipickr) list( tar_import_and_process_spectra( - name = "example", + name = "anaerobe", raw_spectra_directories = system.file( "toy-species-spectra", package = "maldipickr"), diff --git a/vignettes/maldipickr-workflow-with-targets.Rmd b/vignettes/maldipickr-workflow-with-targets.Rmd index 41297e9..d4da5d4 100644 --- a/vignettes/maldipickr-workflow-with-targets.Rmd +++ b/vignettes/maldipickr-workflow-with-targets.Rmd @@ -35,7 +35,7 @@ These helpers in `{maldipickr}` are called {targets} factories, which means they -# tar_import_and_process_spectra +# Target factory to import and process spectra @@ -48,7 +48,7 @@ if (Sys.getenv("TAR_LONG_EXAMPLES") == "true") { library(maldipickr) list( tar_import_and_process_spectra( - name = "example", + name = "anaerobe", raw_spectra_directories = system.file( "toy-species-spectra", package = "maldipickr"), From bfb1623862673fd688dceeb112d0290c2d5f0791 Mon Sep 17 00:00:00 2001 From: cpauvert Date: Mon, 6 May 2024 13:20:40 +0200 Subject: [PATCH 17/27] make the call to factory cleaner using symbol to prefix targets names --- R/tar_import_and_process_spectra.R | 6 +++--- dev/maldipickr-workflow-with-targets.Rmd | 6 +++--- man/tar_import_and_process_spectra.Rd | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R/tar_import_and_process_spectra.R b/R/tar_import_and_process_spectra.R index 39cec2a..2e2c9ec 100644 --- a/R/tar_import_and_process_spectra.R +++ b/R/tar_import_and_process_spectra.R @@ -8,8 +8,8 @@ #' quality-checked processed spectra. See [targets::tar_target] for more #' information about what are target objects. #' -#' @param name A character indicating the prefix of all targets created by the factory. -#' For instance, `name = anaerobe` will create the target `anaerobe_spectra_raw` among others. +#' @param name A symbol indicating the prefix of all targets created by the factory. +#' For instance, calling `tar_import_and_process_spectra(anaerobe, ...)` will create the target `anaerobe_spectra_raw` among others (see the Value section). #' @param raw_spectra_directories A vector of paths to directories containing MALDI Biotyper spectra files. This is similar to the `biotyper_directory` parameter from [import_biotyper_spectra], but as a character vector. #' @inheritParams check_spectra #' @inheritParams targets::tar_target_raw # TODO: add others parameters to inherit, maybe with ... to targets::tar_target()? @@ -52,7 +52,7 @@ tar_import_and_process_spectra <- function( rlang::check_installed(c("targets", "tarchetypes"), reason = "to facilitate {maldipickr} workflow development" ) - targets::tar_assert_chr(name) + name <- targets::tar_deparse_language(substitute(name)) targets::tar_assert_path(raw_spectra_directories) targets::tar_assert_dbl(tolerance) diff --git a/dev/maldipickr-workflow-with-targets.Rmd b/dev/maldipickr-workflow-with-targets.Rmd index 73f7979..2fb8ac3 100644 --- a/dev/maldipickr-workflow-with-targets.Rmd +++ b/dev/maldipickr-workflow-with-targets.Rmd @@ -40,8 +40,8 @@ These helpers in `{maldipickr}` are called {targets} factories, which means they #' quality-checked processed spectra. See [targets::tar_target] for more #' information about what are target objects. #' -#' @param name A character indicating the prefix of all targets created by the factory. -#' For instance, `name = anaerobe` will create the target `anaerobe_spectra_raw` among others. +#' @param name A symbol indicating the prefix of all targets created by the factory. +#' For instance, calling `tar_import_and_process_spectra(anaerobe, ...)` will create the target `anaerobe_spectra_raw` among others (see the Value section). #' @param raw_spectra_directories A vector of paths to directories containing MALDI Biotyper spectra files. This is similar to the `biotyper_directory` parameter from [import_biotyper_spectra], but as a character vector. #' @inheritParams check_spectra #' @inheritParams targets::tar_target_raw # TODO: add others parameters to inherit, maybe with ... to targets::tar_target()? @@ -67,7 +67,7 @@ tar_import_and_process_spectra <- function( rlang::check_installed(c("targets", "tarchetypes"), reason = "to facilitate {maldipickr} workflow development" ) - targets::tar_assert_chr(name) + name <- targets::tar_deparse_language(substitute(name)) targets::tar_assert_path(raw_spectra_directories) targets::tar_assert_dbl(tolerance) diff --git a/man/tar_import_and_process_spectra.Rd b/man/tar_import_and_process_spectra.Rd index 98d825f..c901619 100644 --- a/man/tar_import_and_process_spectra.Rd +++ b/man/tar_import_and_process_spectra.Rd @@ -12,8 +12,8 @@ tar_import_and_process_spectra( ) } \arguments{ -\item{name}{A character indicating the prefix of all targets created by the factory. -For instance, \code{name = anaerobe} will create the target \code{anaerobe_spectra_raw} among others.} +\item{name}{A symbol indicating the prefix of all targets created by the factory. +For instance, calling \code{tar_import_and_process_spectra(anaerobe, ...)} will create the target \code{anaerobe_spectra_raw} among others (see the Value section).} \item{raw_spectra_directories}{A vector of paths to directories containing MALDI Biotyper spectra files. This is similar to the \code{biotyper_directory} parameter from \link{import_biotyper_spectra}, but as a character vector.} From 35aecfea0cdd55f30f70e6e4edc2e36ec32af54a Mon Sep 17 00:00:00 2001 From: cpauvert Date: Wed, 8 May 2024 10:40:10 +0200 Subject: [PATCH 18/27] add {coop} to Suggests packages --- DESCRIPTION | 1 + NAMESPACE | 1 + dev/0-dev_history.Rmd | 2 +- dev/config_attachment.yaml | 2 ++ 4 files changed, 5 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index b0eea4b..dd70180 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -41,6 +41,7 @@ Imports: tools, utils Suggests: + coop, knitr, rmarkdown, spelling, diff --git a/NAMESPACE b/NAMESPACE index b0a3eeb..174c152 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,6 +17,7 @@ export(read_many_biotyper_reports) export(remove_spectra) export(set_reference_spectra) export(tar_import_and_process_spectra) +export(tar_pick_with_similarity) importFrom(MALDIquant,binPeaks) importFrom(MALDIquant,calibrateIntensity) importFrom(MALDIquant,createMassSpectrum) diff --git a/dev/0-dev_history.Rmd b/dev/0-dev_history.Rmd index b7b4c4a..c4fbff8 100644 --- a/dev/0-dev_history.Rmd +++ b/dev/0-dev_history.Rmd @@ -108,7 +108,7 @@ pkgload::load_all() # Generate documentation and deal with dependencies # Ran once after issue with pkgs moved from Suggests to Imports -attachment::att_amend_desc(extra.suggests = c("targets","tarchetypes"),pkg_ignore = c("targets","tarchetypes"), normalize = TRUE, update.config = TRUE) +attachment::att_amend_desc(extra.suggests = c("targets","tarchetypes", "coop"),pkg_ignore = c("targets","tarchetypes", "coop"), normalize = TRUE, update.config = TRUE) # Check the package devtools::check() diff --git a/dev/config_attachment.yaml b/dev/config_attachment.yaml index 26e25f6..db4ccc3 100644 --- a/dev/config_attachment.yaml +++ b/dev/config_attachment.yaml @@ -6,9 +6,11 @@ dir.t: tests extra.suggests: - targets - tarchetypes + - coop pkg_ignore: - targets - tarchetypes + - coop document: yes normalize: yes inside_rmd: no From 23a821ffed6fe7913b36f3071a872a7ceec273f6 Mon Sep 17 00:00:00 2001 From: cpauvert Date: Wed, 8 May 2024 10:43:13 +0200 Subject: [PATCH 19/27] initiate the target factory for delineation --- R/tar_pick_with_similarity.R | 65 ++++++++++++++++ dev/config_fusen.yaml | 2 + dev/maldipickr-workflow-with-targets.Rmd | 76 +++++++++++++++++++ man/tar_pick_with_similarity.Rd | 32 ++++++++ .../testthat/test-tar_pick_with_similarity.R | 5 ++ .../maldipickr-workflow-with-targets.Rmd | 14 ++++ 6 files changed, 194 insertions(+) create mode 100644 R/tar_pick_with_similarity.R create mode 100644 man/tar_pick_with_similarity.Rd create mode 100644 tests/testthat/test-tar_pick_with_similarity.R diff --git a/R/tar_pick_with_similarity.R b/R/tar_pick_with_similarity.R new file mode 100644 index 0000000..9e7b970 --- /dev/null +++ b/R/tar_pick_with_similarity.R @@ -0,0 +1,65 @@ +# WARNING - Generated by {fusen} from dev/maldipickr-workflow-with-targets.Rmd: do not edit by hand + +#' Delineate clusters of spectra to be picked using targets +#' +#' Description +#' +#' @param name A symbol indicating the prefix of all targets created by the factory. +#' For instance, calling `tar_pick_with_similarity(anaerobe, ...)` will create +#' the target `anaerobe_sim_interpolated` among others (see the Value section). +#' @param targets_spectra A list of targets produced by [tar_import_and_process_spectra] that should contains one or more targets named `*_processed`. +#' @param threshold A numeric value indicating the minimal cosine similarity between two spectra. +#' @param ... Arguments passed to [pick_spectra] +#' +#' @return A list of target objects whose names use the `name` argument as a prefix: +#' * `*_fm_interpolated` (e.g., `anaerobe_fm_interpolated`): a matrix produced by [merge_processed_spectra]. +#' * `*_sim_interpolated` (e.g., `anaerobe_sim_interpolated`): a symetric cosine similarity matrix produced by [coop::tcosine]. +#' +#' @export +#' @examples +#' tar_pick_with_similarity() +tar_pick_with_similarity <- function( + name, + targets_spectra, + threshold, ...) { + rlang::check_installed(c("targets", "tarchetypes", "coop"), + reason = "to facilitate {maldipickr} workflow development" + ) + name <- targets::tar_deparse_language(substitute(name)) + targets::tar_assert_dbl(threshold) + targets::tar_assert_list(targets_spectra) + targets_spectra <- unlist(list(targets_spectra), recursive = TRUE) + + + # It was tricky to apply the symbol transformation to a list whilst constructing + # a list structure that could be used correctly by merge_processed_spectra() + # Thankfully, @wlandau suggested an awesome solution to a similar problem + # https://github.com/ropensci/targets/discussions/461#discussioncomment-709984 + # + # which will create list(fast_processed, slow_processed) from + # targets_spectra = c(fast_target_factory, slow_target_factory) + name_processed <- tarchetypes::tar_select_names(targets_spectra, targets::ends_with("_processed")) + processed_expr <- as.call(c(as.symbol("list"), lapply(name_processed, as.symbol))) + + + name_fm <- paste0(name, "_fm_interpolated") + name_sim <- paste0(name, "_sim_interpolated") + + + sym_fm <- as.symbol(name_fm) + sym_sim <- as.symbol(name_sim) + list( + targets::tar_target_raw( + name = name_fm, + command = substitute(merge_processed_spectra(processed_spectra), + env = list(processed_spectra = processed_expr) + ) + ), + targets::tar_target_raw( + name = name_sim, + command = substitute(coop::tcosine(fm_interpolated), + env = list(fm_interpolated = sym_fm) + ) + ) + ) +} diff --git a/dev/config_fusen.yaml b/dev/config_fusen.yaml index abe6578..def8a66 100644 --- a/dev/config_fusen.yaml +++ b/dev/config_fusen.yaml @@ -71,12 +71,14 @@ keep: path: keep state: active R: + - R/tar_pick_with_similarity.R - R/gather_spectra_stats.R - R/remove_spectra_logical.R - R/remove_spectra.R - R/maldipickr-package.R - R/utils-pipe.R tests: + - tests/testthat/test-tar_pick_with_similarity.R - tests/testthat/test-gather_spectra_stats.R - tests/testthat/test-remove_spectra_logical.R - tests/testthat/test-remove_spectra.R diff --git a/dev/maldipickr-workflow-with-targets.Rmd b/dev/maldipickr-workflow-with-targets.Rmd index 2fb8ac3..64cc824 100644 --- a/dev/maldipickr-workflow-with-targets.Rmd +++ b/dev/maldipickr-workflow-with-targets.Rmd @@ -164,6 +164,82 @@ test_that("tar_import_and_process_spectra works", { }) ``` +# Target factory to pick representative spectra from clusters of MALDI-TOF spectra + +```{r function-tar_pick_with_similarity} +#' Delineate clusters of spectra to be picked using targets +#' +#' Description +#' +#' @param name A symbol indicating the prefix of all targets created by the factory. +#' For instance, calling `tar_pick_with_similarity(anaerobe, ...)` will create +#' the target `anaerobe_sim_interpolated` among others (see the Value section). +#' @param targets_spectra A list of targets produced by [tar_import_and_process_spectra] that should contains one or more targets named `*_processed`. +#' @param threshold A numeric value indicating the minimal cosine similarity between two spectra. +#' @param ... Arguments passed to [pick_spectra] +#' +#' @return A list of target objects whose names use the `name` argument as a prefix: +#' * `*_fm_interpolated` (e.g., `anaerobe_fm_interpolated`): a matrix produced by [merge_processed_spectra]. +#' * `*_sim_interpolated` (e.g., `anaerobe_sim_interpolated`): a symetric cosine similarity matrix produced by [coop::tcosine]. +#' +#' @export +tar_pick_with_similarity <- function( + name, + targets_spectra, + threshold, ...) { + rlang::check_installed(c("targets", "tarchetypes", "coop"), + reason = "to facilitate {maldipickr} workflow development" + ) + name <- targets::tar_deparse_language(substitute(name)) + targets::tar_assert_dbl(threshold) + targets::tar_assert_list(targets_spectra) + targets_spectra <- unlist(list(targets_spectra), recursive = TRUE) + + + # It was tricky to apply the symbol transformation to a list whilst constructing + # a list structure that could be used correctly by merge_processed_spectra() + # Thankfully, @wlandau suggested an awesome solution to a similar problem + # https://github.com/ropensci/targets/discussions/461#discussioncomment-709984 + # + # which will create list(fast_processed, slow_processed) from + # targets_spectra = c(fast_target_factory, slow_target_factory) + name_processed <- tarchetypes::tar_select_names(targets_spectra, targets::ends_with("_processed")) + processed_expr <- as.call(c(as.symbol("list"), lapply(name_processed, as.symbol))) + + + name_fm <- paste0(name, "_fm_interpolated") + name_sim <- paste0(name, "_sim_interpolated") + + + sym_fm <- as.symbol(name_fm) + sym_sim <- as.symbol(name_sim) + list( + targets::tar_target_raw( + name = name_fm, + command = substitute(merge_processed_spectra(processed_spectra), + env = list(processed_spectra = processed_expr) + ) + ), + targets::tar_target_raw( + name = name_sim, + command = substitute(coop::tcosine(fm_interpolated), + env = list(fm_interpolated = sym_fm) + ) + ) + ) +} +``` + +```{r example-tar_pick_with_similarity, eval=FALSE} +tar_pick_with_similarity() +``` + +```{r tests-tar_pick_with_similarity} +test_that("tar_pick_with_similarity works", { + expect_true(inherits(tar_pick_with_similarity, "function")) +}) +``` + ```{r development-inflate, eval=FALSE} # Run but keep eval=FALSE to avoid infinite loop diff --git a/man/tar_pick_with_similarity.Rd b/man/tar_pick_with_similarity.Rd new file mode 100644 index 0000000..8b7188e --- /dev/null +++ b/man/tar_pick_with_similarity.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tar_pick_with_similarity.R +\name{tar_pick_with_similarity} +\alias{tar_pick_with_similarity} +\title{Delineate clusters of spectra to be picked using targets} +\usage{ +tar_pick_with_similarity(name, targets_spectra, threshold, ...) +} +\arguments{ +\item{name}{A symbol indicating the prefix of all targets created by the factory. +For instance, calling \code{tar_pick_with_similarity(anaerobe, ...)} will create +the target \code{anaerobe_sim_interpolated} among others (see the Value section).} + +\item{targets_spectra}{A list of targets produced by \link{tar_import_and_process_spectra} that should contains one or more targets named \verb{*_processed}.} + +\item{threshold}{A numeric value indicating the minimal cosine similarity between two spectra.} + +\item{...}{Arguments passed to \link{pick_spectra}} +} +\value{ +A list of target objects whose names use the \code{name} argument as a prefix: +\itemize{ +\item \verb{*_fm_interpolated} (e.g., \code{anaerobe_fm_interpolated}): a matrix produced by \link{merge_processed_spectra}. +\item \verb{*_sim_interpolated} (e.g., \code{anaerobe_sim_interpolated}): a symetric cosine similarity matrix produced by \link[coop:cosine]{coop::tcosine}. +} +} +\description{ +Description +} +\examples{ +tar_pick_with_similarity() +} diff --git a/tests/testthat/test-tar_pick_with_similarity.R b/tests/testthat/test-tar_pick_with_similarity.R new file mode 100644 index 0000000..3d90d71 --- /dev/null +++ b/tests/testthat/test-tar_pick_with_similarity.R @@ -0,0 +1,5 @@ +# WARNING - Generated by {fusen} from dev/maldipickr-workflow-with-targets.Rmd: do not edit by hand + +test_that("tar_pick_with_similarity works", { + expect_true(inherits(tar_pick_with_similarity, "function")) +}) diff --git a/vignettes/maldipickr-workflow-with-targets.Rmd b/vignettes/maldipickr-workflow-with-targets.Rmd index d4da5d4..a9ad160 100644 --- a/vignettes/maldipickr-workflow-with-targets.Rmd +++ b/vignettes/maldipickr-workflow-with-targets.Rmd @@ -64,4 +64,18 @@ if (Sys.getenv("TAR_LONG_EXAMPLES") == "true") { +# Target factory to pick representative spectra from clusters of MALDI-TOF spectra + + + + + +```{r example-tar_pick_with_similarity, eval = FALSE} +tar_pick_with_similarity() +``` + + + + + From 7e4584c71418d6ee60d45f59724186ba5fea50b6 Mon Sep 17 00:00:00 2001 From: cpauvert Date: Thu, 9 May 2024 18:15:23 +0200 Subject: [PATCH 20/27] extend the factory up to the picking step --- R/tar_pick_with_similarity.R | 51 ++++++++++++++++++- dev/maldipickr-workflow-with-targets.Rmd | 51 ++++++++++++++++++- man/tar_pick_with_similarity.Rd | 11 +++- .../maldipickr-workflow-with-targets.Rmd | 2 +- 4 files changed, 108 insertions(+), 7 deletions(-) diff --git a/R/tar_pick_with_similarity.R b/R/tar_pick_with_similarity.R index 9e7b970..0134e7f 100644 --- a/R/tar_pick_with_similarity.R +++ b/R/tar_pick_with_similarity.R @@ -2,7 +2,10 @@ #' Delineate clusters of spectra to be picked using targets #' -#' Description +#' Given upstream targets of processed spectra (from [tar_import_and_process_spectra]) +#' this target factory facilitates the steps from quality-checked +#' processed spectra to clusters of spectra. See [targets::tar_target] for more +#' information about what are target objects. #' #' @param name A symbol indicating the prefix of all targets created by the factory. #' For instance, calling `tar_pick_with_similarity(anaerobe, ...)` will create @@ -14,10 +17,14 @@ #' @return A list of target objects whose names use the `name` argument as a prefix: #' * `*_fm_interpolated` (e.g., `anaerobe_fm_interpolated`): a matrix produced by [merge_processed_spectra]. #' * `*_sim_interpolated` (e.g., `anaerobe_sim_interpolated`): a symetric cosine similarity matrix produced by [coop::tcosine]. +#' * `*_df_interpolated` (e.g., `anaerobe_df_interpolated`): a tibble with the membership (i.e., which cluster label) each spectra belongs to produced by [delineate_with_similarity]. +#' * `*_processed_metadata` (e.g., `anaerobe_processed_metadata`): a tibble of aggregated technical metadata for each spectra. +#' * `*_clusters` (e.g., `anaerobe_clusters`): a tibble indicating with the previous metadata and which spectra was chosen as reference produced by [set_reference_spectra]. +#' * `*_picked` (e.g., `anaerobe_picked`): a tibble containing all the previous metadata but more importantly which spectra should be picked produced by [pick_spectra]. #' #' @export #' @examples -#' tar_pick_with_similarity() +#'\dontrun{tar_pick_with_similarity()} tar_pick_with_similarity <- function( name, targets_spectra, @@ -44,10 +51,18 @@ tar_pick_with_similarity <- function( name_fm <- paste0(name, "_fm_interpolated") name_sim <- paste0(name, "_sim_interpolated") + name_df <- paste0(name, "_df_interpolated") + name_processed_metadata <- paste0(name, "_processed_metadata") + name_clusters <- paste0(name, "_clusters") + name_picked <- paste0(name, "_picked") sym_fm <- as.symbol(name_fm) sym_sim <- as.symbol(name_sim) + sym_df <- as.symbol(name_df) + sym_processed_metadata <- as.symbol(name_processed_metadata) + sym_clusters <- as.symbol(name_clusters) + list( targets::tar_target_raw( name = name_fm, @@ -60,6 +75,38 @@ tar_pick_with_similarity <- function( command = substitute(coop::tcosine(fm_interpolated), env = list(fm_interpolated = sym_fm) ) + ), + targets::tar_target_raw( + name = name_df, + command = substitute(delineate_with_similarity( + sim_matrix = sim_interpolated, + threshold = threshold, + method = "complete"), + env = list(sim_interpolated = sym_sim, threshold = threshold) + ) + ), + targets::tar_target_raw( + name = name_processed_metadata, + command = substitute( + dplyr::bind_rows( + lapply(processed_spectra, `[[`, "metadata")), + env = list(processed_spectra = processed_expr) + ), + iteration = "list" + ), + targets::tar_target_raw( + name = name_clusters, + command = substitute( + set_reference_spectra(df_interpolated, processed_metadata), + env = list(df_interpolated = sym_df, processed_metadata = sym_processed_metadata) + ) + ), + targets::tar_target_raw( + name = name_picked, + command = substitute( + pick_spectra(df_interpolated, ...), + env = list(df_interpolated = sym_clusters) + ) ) ) } diff --git a/dev/maldipickr-workflow-with-targets.Rmd b/dev/maldipickr-workflow-with-targets.Rmd index 64cc824..9ab41fa 100644 --- a/dev/maldipickr-workflow-with-targets.Rmd +++ b/dev/maldipickr-workflow-with-targets.Rmd @@ -169,7 +169,10 @@ test_that("tar_import_and_process_spectra works", { ```{r function-tar_pick_with_similarity} #' Delineate clusters of spectra to be picked using targets #' -#' Description +#' Given upstream targets of processed spectra (from [tar_import_and_process_spectra]) +#' this target factory facilitates the steps from quality-checked +#' processed spectra to clusters of spectra. See [targets::tar_target] for more +#' information about what are target objects. #' #' @param name A symbol indicating the prefix of all targets created by the factory. #' For instance, calling `tar_pick_with_similarity(anaerobe, ...)` will create @@ -181,6 +184,10 @@ test_that("tar_import_and_process_spectra works", { #' @return A list of target objects whose names use the `name` argument as a prefix: #' * `*_fm_interpolated` (e.g., `anaerobe_fm_interpolated`): a matrix produced by [merge_processed_spectra]. #' * `*_sim_interpolated` (e.g., `anaerobe_sim_interpolated`): a symetric cosine similarity matrix produced by [coop::tcosine]. +#' * `*_df_interpolated` (e.g., `anaerobe_df_interpolated`): a tibble with the membership (i.e., which cluster label) each spectra belongs to produced by [delineate_with_similarity]. +#' * `*_processed_metadata` (e.g., `anaerobe_processed_metadata`): a tibble of aggregated technical metadata for each spectra. +#' * `*_clusters` (e.g., `anaerobe_clusters`): a tibble indicating with the previous metadata and which spectra was chosen as reference produced by [set_reference_spectra]. +#' * `*_picked` (e.g., `anaerobe_picked`): a tibble containing all the previous metadata but more importantly which spectra should be picked produced by [pick_spectra]. #' #' @export tar_pick_with_similarity <- function( @@ -209,10 +216,18 @@ tar_pick_with_similarity <- function( name_fm <- paste0(name, "_fm_interpolated") name_sim <- paste0(name, "_sim_interpolated") + name_df <- paste0(name, "_df_interpolated") + name_processed_metadata <- paste0(name, "_processed_metadata") + name_clusters <- paste0(name, "_clusters") + name_picked <- paste0(name, "_picked") sym_fm <- as.symbol(name_fm) sym_sim <- as.symbol(name_sim) + sym_df <- as.symbol(name_df) + sym_processed_metadata <- as.symbol(name_processed_metadata) + sym_clusters <- as.symbol(name_clusters) + list( targets::tar_target_raw( name = name_fm, @@ -225,13 +240,45 @@ tar_pick_with_similarity <- function( command = substitute(coop::tcosine(fm_interpolated), env = list(fm_interpolated = sym_fm) ) + ), + targets::tar_target_raw( + name = name_df, + command = substitute(delineate_with_similarity( + sim_matrix = sim_interpolated, + threshold = threshold, + method = "complete"), + env = list(sim_interpolated = sym_sim, threshold = threshold) + ) + ), + targets::tar_target_raw( + name = name_processed_metadata, + command = substitute( + dplyr::bind_rows( + lapply(processed_spectra, `[[`, "metadata")), + env = list(processed_spectra = processed_expr) + ), + iteration = "list" + ), + targets::tar_target_raw( + name = name_clusters, + command = substitute( + set_reference_spectra(df_interpolated, processed_metadata), + env = list(df_interpolated = sym_df, processed_metadata = sym_processed_metadata) + ) + ), + targets::tar_target_raw( + name = name_picked, + command = substitute( + pick_spectra(df_interpolated, ...), + env = list(df_interpolated = sym_clusters) + ) ) ) } ``` ```{r example-tar_pick_with_similarity, eval=FALSE} -tar_pick_with_similarity() +#'\dontrun{tar_pick_with_similarity()} ``` ```{r tests-tar_pick_with_similarity} diff --git a/man/tar_pick_with_similarity.Rd b/man/tar_pick_with_similarity.Rd index 8b7188e..9a20be4 100644 --- a/man/tar_pick_with_similarity.Rd +++ b/man/tar_pick_with_similarity.Rd @@ -22,11 +22,18 @@ A list of target objects whose names use the \code{name} argument as a prefix: \itemize{ \item \verb{*_fm_interpolated} (e.g., \code{anaerobe_fm_interpolated}): a matrix produced by \link{merge_processed_spectra}. \item \verb{*_sim_interpolated} (e.g., \code{anaerobe_sim_interpolated}): a symetric cosine similarity matrix produced by \link[coop:cosine]{coop::tcosine}. +\item \verb{*_df_interpolated} (e.g., \code{anaerobe_df_interpolated}): a tibble with the membership (i.e., which cluster label) each spectra belongs to produced by \link{delineate_with_similarity}. +\item \verb{*_processed_metadata} (e.g., \code{anaerobe_processed_metadata}): a tibble of aggregated technical metadata for each spectra. +\item \verb{*_clusters} (e.g., \code{anaerobe_clusters}): a tibble indicating with the previous metadata and which spectra was chosen as reference produced by \link{set_reference_spectra}. +\item \verb{*_picked} (e.g., \code{anaerobe_picked}): a tibble containing all the previous metadata but more importantly which spectra should be picked produced by \link{pick_spectra}. } } \description{ -Description +Given upstream targets of processed spectra (from \link{tar_import_and_process_spectra}) +this target factory facilitates the steps from quality-checked +processed spectra to clusters of spectra. See \link[targets:tar_target]{targets::tar_target} for more +information about what are target objects. } \examples{ -tar_pick_with_similarity() +\dontrun{tar_pick_with_similarity()} } diff --git a/vignettes/maldipickr-workflow-with-targets.Rmd b/vignettes/maldipickr-workflow-with-targets.Rmd index a9ad160..3a967d9 100644 --- a/vignettes/maldipickr-workflow-with-targets.Rmd +++ b/vignettes/maldipickr-workflow-with-targets.Rmd @@ -71,7 +71,7 @@ if (Sys.getenv("TAR_LONG_EXAMPLES") == "true") { ```{r example-tar_pick_with_similarity, eval = FALSE} -tar_pick_with_similarity() +#'\dontrun{tar_pick_with_similarity()} ``` From 60bd383741a626f3028dde3dc5222150e348f7ad Mon Sep 17 00:00:00 2001 From: cpauvert Date: Thu, 9 May 2024 18:17:25 +0200 Subject: [PATCH 21/27] fix typo --- R/tar_pick_with_similarity.R | 2 +- dev/maldipickr-workflow-with-targets.Rmd | 2 +- man/tar_pick_with_similarity.Rd | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/tar_pick_with_similarity.R b/R/tar_pick_with_similarity.R index 0134e7f..8dcbc5e 100644 --- a/R/tar_pick_with_similarity.R +++ b/R/tar_pick_with_similarity.R @@ -16,7 +16,7 @@ #' #' @return A list of target objects whose names use the `name` argument as a prefix: #' * `*_fm_interpolated` (e.g., `anaerobe_fm_interpolated`): a matrix produced by [merge_processed_spectra]. -#' * `*_sim_interpolated` (e.g., `anaerobe_sim_interpolated`): a symetric cosine similarity matrix produced by [coop::tcosine]. +#' * `*_sim_interpolated` (e.g., `anaerobe_sim_interpolated`): a symmetric cosine similarity matrix produced by [coop::tcosine]. #' * `*_df_interpolated` (e.g., `anaerobe_df_interpolated`): a tibble with the membership (i.e., which cluster label) each spectra belongs to produced by [delineate_with_similarity]. #' * `*_processed_metadata` (e.g., `anaerobe_processed_metadata`): a tibble of aggregated technical metadata for each spectra. #' * `*_clusters` (e.g., `anaerobe_clusters`): a tibble indicating with the previous metadata and which spectra was chosen as reference produced by [set_reference_spectra]. diff --git a/dev/maldipickr-workflow-with-targets.Rmd b/dev/maldipickr-workflow-with-targets.Rmd index 9ab41fa..ea62863 100644 --- a/dev/maldipickr-workflow-with-targets.Rmd +++ b/dev/maldipickr-workflow-with-targets.Rmd @@ -183,7 +183,7 @@ test_that("tar_import_and_process_spectra works", { #' #' @return A list of target objects whose names use the `name` argument as a prefix: #' * `*_fm_interpolated` (e.g., `anaerobe_fm_interpolated`): a matrix produced by [merge_processed_spectra]. -#' * `*_sim_interpolated` (e.g., `anaerobe_sim_interpolated`): a symetric cosine similarity matrix produced by [coop::tcosine]. +#' * `*_sim_interpolated` (e.g., `anaerobe_sim_interpolated`): a symmetric cosine similarity matrix produced by [coop::tcosine]. #' * `*_df_interpolated` (e.g., `anaerobe_df_interpolated`): a tibble with the membership (i.e., which cluster label) each spectra belongs to produced by [delineate_with_similarity]. #' * `*_processed_metadata` (e.g., `anaerobe_processed_metadata`): a tibble of aggregated technical metadata for each spectra. #' * `*_clusters` (e.g., `anaerobe_clusters`): a tibble indicating with the previous metadata and which spectra was chosen as reference produced by [set_reference_spectra]. diff --git a/man/tar_pick_with_similarity.Rd b/man/tar_pick_with_similarity.Rd index 9a20be4..95c5629 100644 --- a/man/tar_pick_with_similarity.Rd +++ b/man/tar_pick_with_similarity.Rd @@ -21,7 +21,7 @@ the target \code{anaerobe_sim_interpolated} among others (see the Value section) A list of target objects whose names use the \code{name} argument as a prefix: \itemize{ \item \verb{*_fm_interpolated} (e.g., \code{anaerobe_fm_interpolated}): a matrix produced by \link{merge_processed_spectra}. -\item \verb{*_sim_interpolated} (e.g., \code{anaerobe_sim_interpolated}): a symetric cosine similarity matrix produced by \link[coop:cosine]{coop::tcosine}. +\item \verb{*_sim_interpolated} (e.g., \code{anaerobe_sim_interpolated}): a symmetric cosine similarity matrix produced by \link[coop:cosine]{coop::tcosine}. \item \verb{*_df_interpolated} (e.g., \code{anaerobe_df_interpolated}): a tibble with the membership (i.e., which cluster label) each spectra belongs to produced by \link{delineate_with_similarity}. \item \verb{*_processed_metadata} (e.g., \code{anaerobe_processed_metadata}): a tibble of aggregated technical metadata for each spectra. \item \verb{*_clusters} (e.g., \code{anaerobe_clusters}): a tibble indicating with the previous metadata and which spectra was chosen as reference produced by \link{set_reference_spectra}. From 166ee94becc56c40f6322c35d9dfaedd71231723 Mon Sep 17 00:00:00 2001 From: cpauvert Date: Thu, 9 May 2024 18:18:16 +0200 Subject: [PATCH 22/27] fix issue with unrecognized processed list --- R/tar_pick_with_similarity.R | 2 +- dev/maldipickr-workflow-with-targets.Rmd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/tar_pick_with_similarity.R b/R/tar_pick_with_similarity.R index 8dcbc5e..cde7b0a 100644 --- a/R/tar_pick_with_similarity.R +++ b/R/tar_pick_with_similarity.R @@ -46,7 +46,7 @@ tar_pick_with_similarity <- function( # which will create list(fast_processed, slow_processed) from # targets_spectra = c(fast_target_factory, slow_target_factory) name_processed <- tarchetypes::tar_select_names(targets_spectra, targets::ends_with("_processed")) - processed_expr <- as.call(c(as.symbol("list"), lapply(name_processed, as.symbol))) + processed_expr <- as.call(c(as.symbol("c"), lapply(name_processed, as.symbol))) name_fm <- paste0(name, "_fm_interpolated") diff --git a/dev/maldipickr-workflow-with-targets.Rmd b/dev/maldipickr-workflow-with-targets.Rmd index ea62863..2dde4b5 100644 --- a/dev/maldipickr-workflow-with-targets.Rmd +++ b/dev/maldipickr-workflow-with-targets.Rmd @@ -211,7 +211,7 @@ tar_pick_with_similarity <- function( # which will create list(fast_processed, slow_processed) from # targets_spectra = c(fast_target_factory, slow_target_factory) name_processed <- tarchetypes::tar_select_names(targets_spectra, targets::ends_with("_processed")) - processed_expr <- as.call(c(as.symbol("list"), lapply(name_processed, as.symbol))) + processed_expr <- as.call(c(as.symbol("c"), lapply(name_processed, as.symbol))) name_fm <- paste0(name, "_fm_interpolated") From dbfbf3fbc7c206121d78b7d68dc91bb25a214cbc Mon Sep 17 00:00:00 2001 From: cpauvert Date: Mon, 13 May 2024 10:57:48 +0200 Subject: [PATCH 23/27] convert the factory output from vectors to list this fixes a downstream issue with list structure for `merge_processed_spectra` --- R/tar_import_and_process_spectra.R | 2 +- dev/maldipickr-workflow-with-targets.Rmd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/tar_import_and_process_spectra.R b/R/tar_import_and_process_spectra.R index 2e2c9ec..232f1c9 100644 --- a/R/tar_import_and_process_spectra.R +++ b/R/tar_import_and_process_spectra.R @@ -118,7 +118,7 @@ tar_import_and_process_spectra <- function( pattern = substitute(map(sym_valid_spectra), env = list(sym_valid_spectra = sym_valid_spectra) ), - format = format + format = format, iteration = "list" ) ) } diff --git a/dev/maldipickr-workflow-with-targets.Rmd b/dev/maldipickr-workflow-with-targets.Rmd index 2dde4b5..a8bae54 100644 --- a/dev/maldipickr-workflow-with-targets.Rmd +++ b/dev/maldipickr-workflow-with-targets.Rmd @@ -133,7 +133,7 @@ tar_import_and_process_spectra <- function( pattern = substitute(map(sym_valid_spectra), env = list(sym_valid_spectra = sym_valid_spectra) ), - format = format + format = format, iteration = "list" ) ) } From 8afef9e9e5b2049f32b3d22e5f6018ec6d9a429f Mon Sep 17 00:00:00 2001 From: cpauvert Date: Mon, 13 May 2024 11:31:45 +0200 Subject: [PATCH 24/27] draft test for targets factory --- dev/maldipickr-workflow-with-targets.Rmd | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/dev/maldipickr-workflow-with-targets.Rmd b/dev/maldipickr-workflow-with-targets.Rmd index a8bae54..419e4bc 100644 --- a/dev/maldipickr-workflow-with-targets.Rmd +++ b/dev/maldipickr-workflow-with-targets.Rmd @@ -160,7 +160,25 @@ if (Sys.getenv("TAR_LONG_EXAMPLES") == "true") { ```{r tests-tar_import_and_process_spectra} test_that("tar_import_and_process_spectra works", { - expect_true(inherits(tar_import_and_process_spectra, "function")) + skip_if_not_installed("targets") + skip_if_not_installed("tarchetypes") + skip_on_cran() + targets::tar_dir({ # tar_dir() runs code from a temporary directory. + targets::tar_script({ + library(maldipickr) + list( + tar_import_and_process_spectra( + name = "anaerobe", + raw_spectra_directories = system.file( + "toy-species-spectra", + package = "maldipickr"), + tolerance = 1 + ) + )},ask = FALSE) + # Enough targets are created. + out <- targets::tar_manifest(callr_function = NULL) + expect_equal(nrow(out), 7L) + }) }) ``` From 875e2a2e6e5d95bbb657c97eb0a6d9a2b8cef100 Mon Sep 17 00:00:00 2001 From: cpauvert Date: Mon, 13 May 2024 14:54:17 +0200 Subject: [PATCH 25/27] add test for import spectra target factory inspired by examples in ropensci/jagstargets --- dev/config_fusen.yaml | 8 ++- dev/maldipickr-workflow-with-targets.Rmd | 68 +++++++++++++++---- .../test-tar_import_and_process_spectra.R | 62 ++++++++++++++++- 3 files changed, 122 insertions(+), 16 deletions(-) diff --git a/dev/config_fusen.yaml b/dev/config_fusen.yaml index def8a66..fc41c54 100644 --- a/dev/config_fusen.yaml +++ b/dev/config_fusen.yaml @@ -86,8 +86,12 @@ keep: maldipickr-workflow-with-targets.Rmd: path: dev/maldipickr-workflow-with-targets.Rmd state: active - R: R/tar_import_and_process_spectra.R - tests: tests/testthat/test-tar_import_and_process_spectra.R + R: + - R/tar_import_and_process_spectra.R + - R/tar_pick_with_similarity.R + tests: + - tests/testthat/test-tar_import_and_process_spectra.R + - tests/testthat/test-tar_pick_with_similarity.R vignettes: vignettes/maldipickr-workflow-with-targets.Rmd inflate: flat_file: dev/maldipickr-workflow-with-targets.Rmd diff --git a/dev/maldipickr-workflow-with-targets.Rmd b/dev/maldipickr-workflow-with-targets.Rmd index 419e4bc..a019914 100644 --- a/dev/maldipickr-workflow-with-targets.Rmd +++ b/dev/maldipickr-workflow-with-targets.Rmd @@ -159,25 +159,67 @@ if (Sys.getenv("TAR_LONG_EXAMPLES") == "true") { ``` ```{r tests-tar_import_and_process_spectra} +# Tests for targets factory borrowed from +# src: https://github.com/ropensci/jagstargets/blob/2bad8b2958d0252a5993d341d38f8d9d02aeb41a/tests/testthat/test-tar_jags_rep_dic.R test_that("tar_import_and_process_spectra works", { skip_if_not_installed("targets") skip_if_not_installed("tarchetypes") skip_on_cran() targets::tar_dir({ # tar_dir() runs code from a temporary directory. targets::tar_script({ - library(maldipickr) - list( - tar_import_and_process_spectra( - name = "anaerobe", - raw_spectra_directories = system.file( - "toy-species-spectra", - package = "maldipickr"), - tolerance = 1 - ) - )},ask = FALSE) - # Enough targets are created. - out <- targets::tar_manifest(callr_function = NULL) - expect_equal(nrow(out), 7L) + library(maldipickr) + list( + tar_import_and_process_spectra( + name = "anaerobe", + raw_spectra_directories = system.file( + "toy-species-spectra", + package = "maldipickr"), + tolerance = 1 + ) + )},ask = FALSE) + # Enough targets are created. + out <- targets::tar_manifest(callr_function = NULL) + expect_equal(nrow(out), 7L) + # Nodes in the graph are connected properly. + out <- targets::tar_network(callr_function = NULL, targets_only = TRUE)$edges + out <- dplyr::arrange(out, from, to) + rownames(out) <- NULL + exp <- tibble::tribble( + ~from, ~to, + "anaerobe_checks", "anaerobe_spectra_stats", + "anaerobe_checks", "anaerobe_valid_spectra", + "anaerobe_plates", "anaerobe_spectra_raw", + "anaerobe_plates", "anaerobe_spectra_stats", + "anaerobe_plates_files", "anaerobe_plates", + "anaerobe_spectra_raw", "anaerobe_checks", + "anaerobe_spectra_raw", "anaerobe_valid_spectra", + "anaerobe_valid_spectra", "anaerobe_processed" + ) + exp <- dplyr::arrange(exp, from, to) + rownames(exp) <- NULL + expect_equal(out, exp) + # The pipeline produces correctly formatted output. + capture.output(suppressWarnings(targets::tar_make(callr_function = NULL))) + expect_equal( nrow( + targets::tar_read(anaerobe_processed)[[1]][["metadata"]] + ), 6L ) + }) +}) +test_that("tar_import_and_process_spectra fails on wrong input", { + skip_if_not_installed("targets") + skip_if_not_installed("tarchetypes") + skip_on_cran() + targets::tar_dir({ # tar_dir() runs code from a temporary directory. + targets::tar_script({ + library(maldipickr) + list( + tar_import_and_process_spectra( + name = "anaerobe", + raw_spectra_directories = "data_directory_that_should_not_exist", + tolerance = 1 + ) + )},ask = FALSE) + expect_error(tar_make(), class = "tar_condition_validate") }) }) ``` diff --git a/tests/testthat/test-tar_import_and_process_spectra.R b/tests/testthat/test-tar_import_and_process_spectra.R index 612b7c9..b770c89 100644 --- a/tests/testthat/test-tar_import_and_process_spectra.R +++ b/tests/testthat/test-tar_import_and_process_spectra.R @@ -1,5 +1,65 @@ # WARNING - Generated by {fusen} from dev/maldipickr-workflow-with-targets.Rmd: do not edit by hand +# Tests for targets factory borrowed from +# src: https://github.com/ropensci/jagstargets/blob/2bad8b2958d0252a5993d341d38f8d9d02aeb41a/tests/testthat/test-tar_jags_rep_dic.R test_that("tar_import_and_process_spectra works", { - expect_true(inherits(tar_import_and_process_spectra, "function")) + skip_if_not_installed("targets") + skip_if_not_installed("tarchetypes") + skip_on_cran() + targets::tar_dir({ # tar_dir() runs code from a temporary directory. + targets::tar_script({ + library(maldipickr) + list( + tar_import_and_process_spectra( + name = "anaerobe", + raw_spectra_directories = system.file( + "toy-species-spectra", + package = "maldipickr"), + tolerance = 1 + ) + )},ask = FALSE) + # Enough targets are created. + out <- targets::tar_manifest(callr_function = NULL) + expect_equal(nrow(out), 7L) + # Nodes in the graph are connected properly. + out <- targets::tar_network(callr_function = NULL, targets_only = TRUE)$edges + out <- dplyr::arrange(out, from, to) + rownames(out) <- NULL + exp <- tibble::tribble( + ~from, ~to, + "anaerobe_checks", "anaerobe_spectra_stats", + "anaerobe_checks", "anaerobe_valid_spectra", + "anaerobe_plates", "anaerobe_spectra_raw", + "anaerobe_plates", "anaerobe_spectra_stats", + "anaerobe_plates_files", "anaerobe_plates", + "anaerobe_spectra_raw", "anaerobe_checks", + "anaerobe_spectra_raw", "anaerobe_valid_spectra", + "anaerobe_valid_spectra", "anaerobe_processed" + ) + exp <- dplyr::arrange(exp, from, to) + rownames(exp) <- NULL + expect_equal(out, exp) + # The pipeline produces correctly formatted output. + capture.output(suppressWarnings(targets::tar_make(callr_function = NULL))) + expect_equal( nrow( + targets::tar_read(anaerobe_processed)[[1]][["metadata"]] + ), 6L ) + }) +}) +test_that("tar_import_and_process_spectra fails on wrong input", { + skip_if_not_installed("targets") + skip_if_not_installed("tarchetypes") + skip_on_cran() + targets::tar_dir({ # tar_dir() runs code from a temporary directory. + targets::tar_script({ + library(maldipickr) + list( + tar_import_and_process_spectra( + name = "anaerobe", + raw_spectra_directories = "data_directory_that_should_not_exist", + tolerance = 1 + ) + )},ask = FALSE) + expect_error(tar_make(), class = "tar_condition_validate") + }) }) From c4c2c1d762e4561c4b5b5750f7b550b3219979c4 Mon Sep 17 00:00:00 2001 From: cpauvert Date: Mon, 13 May 2024 15:03:45 +0200 Subject: [PATCH 26/27] precise the cosine function and link correctly to the man page --- R/tar_pick_with_similarity.R | 2 +- dev/maldipickr-workflow-with-targets.Rmd | 2 +- man/tar_pick_with_similarity.Rd | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/tar_pick_with_similarity.R b/R/tar_pick_with_similarity.R index cde7b0a..c0ee1db 100644 --- a/R/tar_pick_with_similarity.R +++ b/R/tar_pick_with_similarity.R @@ -16,7 +16,7 @@ #' #' @return A list of target objects whose names use the `name` argument as a prefix: #' * `*_fm_interpolated` (e.g., `anaerobe_fm_interpolated`): a matrix produced by [merge_processed_spectra]. -#' * `*_sim_interpolated` (e.g., `anaerobe_sim_interpolated`): a symmetric cosine similarity matrix produced by [coop::tcosine]. +#' * `*_sim_interpolated` (e.g., `anaerobe_sim_interpolated`): a symmetric cosine similarity matrix produced by the transposed version of [coop::cosine]. #' * `*_df_interpolated` (e.g., `anaerobe_df_interpolated`): a tibble with the membership (i.e., which cluster label) each spectra belongs to produced by [delineate_with_similarity]. #' * `*_processed_metadata` (e.g., `anaerobe_processed_metadata`): a tibble of aggregated technical metadata for each spectra. #' * `*_clusters` (e.g., `anaerobe_clusters`): a tibble indicating with the previous metadata and which spectra was chosen as reference produced by [set_reference_spectra]. diff --git a/dev/maldipickr-workflow-with-targets.Rmd b/dev/maldipickr-workflow-with-targets.Rmd index a019914..e944a36 100644 --- a/dev/maldipickr-workflow-with-targets.Rmd +++ b/dev/maldipickr-workflow-with-targets.Rmd @@ -243,7 +243,7 @@ test_that("tar_import_and_process_spectra fails on wrong input", { #' #' @return A list of target objects whose names use the `name` argument as a prefix: #' * `*_fm_interpolated` (e.g., `anaerobe_fm_interpolated`): a matrix produced by [merge_processed_spectra]. -#' * `*_sim_interpolated` (e.g., `anaerobe_sim_interpolated`): a symmetric cosine similarity matrix produced by [coop::tcosine]. +#' * `*_sim_interpolated` (e.g., `anaerobe_sim_interpolated`): a symmetric cosine similarity matrix produced by the transposed version of [coop::cosine]. #' * `*_df_interpolated` (e.g., `anaerobe_df_interpolated`): a tibble with the membership (i.e., which cluster label) each spectra belongs to produced by [delineate_with_similarity]. #' * `*_processed_metadata` (e.g., `anaerobe_processed_metadata`): a tibble of aggregated technical metadata for each spectra. #' * `*_clusters` (e.g., `anaerobe_clusters`): a tibble indicating with the previous metadata and which spectra was chosen as reference produced by [set_reference_spectra]. diff --git a/man/tar_pick_with_similarity.Rd b/man/tar_pick_with_similarity.Rd index 95c5629..ff2dca5 100644 --- a/man/tar_pick_with_similarity.Rd +++ b/man/tar_pick_with_similarity.Rd @@ -21,7 +21,7 @@ the target \code{anaerobe_sim_interpolated} among others (see the Value section) A list of target objects whose names use the \code{name} argument as a prefix: \itemize{ \item \verb{*_fm_interpolated} (e.g., \code{anaerobe_fm_interpolated}): a matrix produced by \link{merge_processed_spectra}. -\item \verb{*_sim_interpolated} (e.g., \code{anaerobe_sim_interpolated}): a symmetric cosine similarity matrix produced by \link[coop:cosine]{coop::tcosine}. +\item \verb{*_sim_interpolated} (e.g., \code{anaerobe_sim_interpolated}): a symmetric cosine similarity matrix produced by the transposed version of \link[coop:cosine]{coop::cosine}. \item \verb{*_df_interpolated} (e.g., \code{anaerobe_df_interpolated}): a tibble with the membership (i.e., which cluster label) each spectra belongs to produced by \link{delineate_with_similarity}. \item \verb{*_processed_metadata} (e.g., \code{anaerobe_processed_metadata}): a tibble of aggregated technical metadata for each spectra. \item \verb{*_clusters} (e.g., \code{anaerobe_clusters}): a tibble indicating with the previous metadata and which spectra was chosen as reference produced by \link{set_reference_spectra}. From 1c5ea4e0b5cea0374d98e11d97a254cc28833058 Mon Sep 17 00:00:00 2001 From: cpauvert Date: Mon, 13 May 2024 16:47:00 +0200 Subject: [PATCH 27/27] draft and explore objects within picking factory --- R/tar_pick_with_similarity.R | 23 +++++++++++++++---- dev/maldipickr-workflow-with-targets.Rmd | 26 +++++++++++++++++---- man/tar_pick_with_similarity.Rd | 29 ++++++++++++++++++++++-- 3 files changed, 67 insertions(+), 11 deletions(-) diff --git a/R/tar_pick_with_similarity.R b/R/tar_pick_with_similarity.R index c0ee1db..70330da 100644 --- a/R/tar_pick_with_similarity.R +++ b/R/tar_pick_with_similarity.R @@ -12,7 +12,7 @@ #' the target `anaerobe_sim_interpolated` among others (see the Value section). #' @param targets_spectra A list of targets produced by [tar_import_and_process_spectra] that should contains one or more targets named `*_processed`. #' @param threshold A numeric value indicating the minimal cosine similarity between two spectra. -#' @param ... Arguments passed to [pick_spectra] +#' @inheritParams pick_spectra #' #' @return A list of target objects whose names use the `name` argument as a prefix: #' * `*_fm_interpolated` (e.g., `anaerobe_fm_interpolated`): a matrix produced by [merge_processed_spectra]. @@ -28,7 +28,11 @@ tar_pick_with_similarity <- function( name, targets_spectra, - threshold, ...) { + threshold, + metadata_df = NULL, criteria_column = NULL, + hard_mask_column = NULL, soft_mask_column = NULL, + is_descending_order = TRUE, + is_sorted = FALSE) { rlang::check_installed(c("targets", "tarchetypes", "coop"), reason = "to facilitate {maldipickr} workflow development" ) @@ -104,8 +108,19 @@ tar_pick_with_similarity <- function( targets::tar_target_raw( name = name_picked, command = substitute( - pick_spectra(df_interpolated, ...), - env = list(df_interpolated = sym_clusters) + pick_spectra(cluster_df = df_interpolated, + metadata_df = metadata_df, criteria_column = criteria_column, + hard_mask_column = hard_mask_column, + soft_mask_column = soft_mask_column, + is_descending_order = is_descending_order, + is_sorted = is_sorted), + env = list(df_interpolated = sym_clusters, + metadata_df = metadata_df, criteria_column = criteria_column, + hard_mask_column = hard_mask_column, + soft_mask_column = soft_mask_column, + is_descending_order = is_descending_order, + is_sorted = is_sorted + ) ) ) ) diff --git a/dev/maldipickr-workflow-with-targets.Rmd b/dev/maldipickr-workflow-with-targets.Rmd index e944a36..76ab7c0 100644 --- a/dev/maldipickr-workflow-with-targets.Rmd +++ b/dev/maldipickr-workflow-with-targets.Rmd @@ -239,7 +239,7 @@ test_that("tar_import_and_process_spectra fails on wrong input", { #' the target `anaerobe_sim_interpolated` among others (see the Value section). #' @param targets_spectra A list of targets produced by [tar_import_and_process_spectra] that should contains one or more targets named `*_processed`. #' @param threshold A numeric value indicating the minimal cosine similarity between two spectra. -#' @param ... Arguments passed to [pick_spectra] +#' @inheritParams pick_spectra #' #' @return A list of target objects whose names use the `name` argument as a prefix: #' * `*_fm_interpolated` (e.g., `anaerobe_fm_interpolated`): a matrix produced by [merge_processed_spectra]. @@ -253,7 +253,11 @@ test_that("tar_import_and_process_spectra fails on wrong input", { tar_pick_with_similarity <- function( name, targets_spectra, - threshold, ...) { + threshold, + metadata_df = NULL, criteria_column = NULL, + hard_mask_column = NULL, soft_mask_column = NULL, + is_descending_order = TRUE, + is_sorted = FALSE) { rlang::check_installed(c("targets", "tarchetypes", "coop"), reason = "to facilitate {maldipickr} workflow development" ) @@ -287,7 +291,8 @@ tar_pick_with_similarity <- function( sym_df <- as.symbol(name_df) sym_processed_metadata <- as.symbol(name_processed_metadata) sym_clusters <- as.symbol(name_clusters) - + print("metadata_df:") + print(metadata_df) list( targets::tar_target_raw( name = name_fm, @@ -329,8 +334,19 @@ tar_pick_with_similarity <- function( targets::tar_target_raw( name = name_picked, command = substitute( - pick_spectra(df_interpolated, ...), - env = list(df_interpolated = sym_clusters) + pick_spectra(cluster_df = df_interpolated, + metadata_df = metadata_df, criteria_column = criteria_column, + hard_mask_column = hard_mask_column, + soft_mask_column = soft_mask_column, + is_descending_order = is_descending_order, + is_sorted = is_sorted), + env = list(df_interpolated = sym_clusters, + metadata_df = metadata_df, criteria_column = criteria_column, + hard_mask_column = hard_mask_column, + soft_mask_column = soft_mask_column, + is_descending_order = is_descending_order, + is_sorted = is_sorted + ) ) ) ) diff --git a/man/tar_pick_with_similarity.Rd b/man/tar_pick_with_similarity.Rd index ff2dca5..5a12e5b 100644 --- a/man/tar_pick_with_similarity.Rd +++ b/man/tar_pick_with_similarity.Rd @@ -4,7 +4,17 @@ \alias{tar_pick_with_similarity} \title{Delineate clusters of spectra to be picked using targets} \usage{ -tar_pick_with_similarity(name, targets_spectra, threshold, ...) +tar_pick_with_similarity( + name, + targets_spectra, + threshold, + metadata_df = NULL, + criteria_column = NULL, + hard_mask_column = NULL, + soft_mask_column = NULL, + is_descending_order = TRUE, + is_sorted = FALSE +) } \arguments{ \item{name}{A symbol indicating the prefix of all targets created by the factory. @@ -15,7 +25,22 @@ the target \code{anaerobe_sim_interpolated} among others (see the Value section) \item{threshold}{A numeric value indicating the minimal cosine similarity between two spectra.} -\item{...}{Arguments passed to \link{pick_spectra}} +\item{metadata_df}{Optional tibble with relevant metadata to guide the +picking process (e.g., OD600).} + +\item{criteria_column}{Optional character indicating the column in \code{metadata_df} +to be used as a criteria.} + +\item{hard_mask_column}{Column name in the \code{cluster_df} or \code{metadata_df} tibble indicating whether the spectra, \strong{and the clusters to which they belong} should be discarded (\code{TRUE}) or not (\code{FALSE}) before the picking decision.} + +\item{soft_mask_column}{Column name in the \code{cluster_df} or \code{metadata_df} tibble indicating whether the spectra should be discarded (\code{TRUE}) or not (\code{FALSE}) before the picking decision.} + +\item{is_descending_order}{Optional logical indicating whether to sort the \code{criteria_column} from the highest-to-lowest value (\code{TRUE}) or lowest-to-highest (\code{FALSE}).} + +\item{is_sorted}{Optional logical to indicate that the \code{cluster_df} is +already sorted by cluster based on (usually multiple) internal criteria to +pick the first of each cluster. This flag is \strong{overridden} if a \code{metadata_df} +is provided.} } \value{ A list of target objects whose names use the \code{name} argument as a prefix: