diff --git a/R/merge_processed_spectra.R b/R/merge_processed_spectra.R index 1dbdf38..acf439d 100644 --- a/R/merge_processed_spectra.R +++ b/R/merge_processed_spectra.R @@ -5,13 +5,18 @@ #' Aggregate multiple processed spectra, their associated peaks and metadata into a feature matrix and a concatenated metadata table. #' #' @param processed_spectra A [list] of the processed spectra and associated peaks and metadata in two possible formats: -#' * A list of **in-memory objects** (named `spectra`, `peaks`, `metadata`) produced by [process_spectra]. +#' * A list of **in-memory objects** (named `spectra`, `peaks`, `metadata`) produced by [process_spectra]. Named lists will have names dropped, see Note. #' * `r lifecycle::badge('deprecated')` A list of **paths** to RDS files produced by [process_spectra] when using the `rds_prefix` option. #' @param remove_peakless_spectra A logical indicating whether to discard the spectra without detected peaks. #' @param interpolate_missing A logical indicating if intensity values for missing peaks should be interpolated from the processed spectra signal or left NA which would then be converted to 0. #' #' @return A *n*×*p* matrix, with *n* spectra as rows and *p* features as columns that are the peaks found in all the processed spectra. #' +#' @note When aggregating multiple runs of processed spectra, if a named list is +#' provided, note that the names will be dropped, to prevent further downstream +#' issues when these names were being appended to the rownames of the matrix +#' thus preventing downstream metadata merge. +#' #' @seealso [process_spectra], the "Value" section in [`MALDIquant::intensityMatrix`](https://rdrr.io/cran/MALDIquant/man/intensityMatrix-functions.html) #' @export #' @examples @@ -43,6 +48,14 @@ #' # The feature matrix has 3×6=18 spectra as rows and #' # 35 peaks as columns #' dim(fm_all) +#' +#' # If using a list, names will be dropped and are not propagated to the matrix. +#' \dontrun{ +#' fm_all <- merge_processed_spectra( +#' list("A" = processed, "B" = processed, "C" = processed)) +#' any(grepl("A|B|C", rownames(fm_all))) # FALSE +#' } +#' merge_processed_spectra <- function(processed_spectra, remove_peakless_spectra = TRUE, interpolate_missing = TRUE) { if (any( is.null(processed_spectra), @@ -68,6 +81,12 @@ merge_processed_spectra <- function(processed_spectra, remove_peakless_spectra = processed <- processed_spectra } + # Names at the upper level causes problems when aggregating multiple runs by + # being appended to the rownames of matrix thus preventing downstream metadata + # merge. + if(!is.null(names(processed))){ + processed <- unname(processed) + } stopifnot(is_a_processed_spectra_list(processed)) peakless <- list() diff --git a/dev/dereplicate-spectra.Rmd b/dev/dereplicate-spectra.Rmd index 12be27f..3172f05 100644 --- a/dev/dereplicate-spectra.Rmd +++ b/dev/dereplicate-spectra.Rmd @@ -217,13 +217,18 @@ The current function enables the analyst to decide whether to interpolate the va #' Aggregate multiple processed spectra, their associated peaks and metadata into a feature matrix and a concatenated metadata table. #' #' @param processed_spectra A [list] of the processed spectra and associated peaks and metadata in two possible formats: -#' * A list of **in-memory objects** (named `spectra`, `peaks`, `metadata`) produced by [process_spectra]. +#' * A list of **in-memory objects** (named `spectra`, `peaks`, `metadata`) produced by [process_spectra]. Named lists will have names dropped, see Note. #' * `r lifecycle::badge('deprecated')` A list of **paths** to RDS files produced by [process_spectra] when using the `rds_prefix` option. #' @param remove_peakless_spectra A logical indicating whether to discard the spectra without detected peaks. #' @param interpolate_missing A logical indicating if intensity values for missing peaks should be interpolated from the processed spectra signal or left NA which would then be converted to 0. #' #' @return A *n*×*p* matrix, with *n* spectra as rows and *p* features as columns that are the peaks found in all the processed spectra. #' +#' @note When aggregating multiple runs of processed spectra, if a named list is +#' provided, note that the names will be dropped, to prevent further downstream +#' issues when these names were being appended to the rownames of the matrix +#' thus preventing downstream metadata merge. +#' #' @seealso [process_spectra], the "Value" section in [`MALDIquant::intensityMatrix`](https://rdrr.io/cran/MALDIquant/man/intensityMatrix-functions.html) #' @export merge_processed_spectra <- function(processed_spectra, remove_peakless_spectra = TRUE, interpolate_missing = TRUE) { @@ -251,6 +256,12 @@ merge_processed_spectra <- function(processed_spectra, remove_peakless_spectra = processed <- processed_spectra } + # Names at the upper level causes problems when aggregating multiple runs by + # being appended to the rownames of matrix thus preventing downstream metadata + # merge. + if(!is.null(names(processed))){ + processed <- unname(processed) + } stopifnot(is_a_processed_spectra_list(processed)) peakless <- list() @@ -335,6 +346,14 @@ fm_all <- merge_processed_spectra(list(processed, processed, processed)) # The feature matrix has 3×6=18 spectra as rows and # 35 peaks as columns dim(fm_all) + +# If using a list, names will be dropped and are not propagated to the matrix. +#' \dontrun{ +#' fm_all <- merge_processed_spectra( +#' list("A" = processed, "B" = processed, "C" = processed)) +#' any(grepl("A|B|C", rownames(fm_all))) # FALSE +#' } +#' ``` ```{r tests-merge_processed_spectra} @@ -351,6 +370,14 @@ test_that("merge_processed_spectra works", { expect_identical( sum(fm == 0), 0L ) + expect_no_error( + fm_multiple <- merge_processed_spectra( + list("with_name_bar" = processed_test, "with_name_foo" = processed_test) + ) + ) + expect_equal( + dim(fm_multiple), c(4, 26) + ) }) test_that("merge_processed_spectra works without interpolation", { expect_no_error( diff --git a/man/merge_processed_spectra.Rd b/man/merge_processed_spectra.Rd index 0344ac4..8c64334 100644 --- a/man/merge_processed_spectra.Rd +++ b/man/merge_processed_spectra.Rd @@ -13,7 +13,7 @@ merge_processed_spectra( \arguments{ \item{processed_spectra}{A \link{list} of the processed spectra and associated peaks and metadata in two possible formats: \itemize{ -\item A list of \strong{in-memory objects} (named \code{spectra}, \code{peaks}, \code{metadata}) produced by \link{process_spectra}. +\item A list of \strong{in-memory objects} (named \code{spectra}, \code{peaks}, \code{metadata}) produced by \link{process_spectra}. Named lists will have names dropped, see Note. \item \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} A list of \strong{paths} to RDS files produced by \link{process_spectra} when using the \code{rds_prefix} option. }} @@ -27,6 +27,12 @@ A \emph{n}×\emph{p} matrix, with \emph{n} spectra as rows and \emph{p} features \description{ Aggregate multiple processed spectra, their associated peaks and metadata into a feature matrix and a concatenated metadata table. } +\note{ +When aggregating multiple runs of processed spectra, if a named list is +provided, note that the names will be dropped, to prevent further downstream +issues when these names were being appended to the rownames of the matrix +thus preventing downstream metadata merge. +} \examples{ # Get an example directory of six Bruker MALDI Biotyper spectra directory_biotyper_spectra <- system.file( @@ -56,6 +62,14 @@ fm_all <- merge_processed_spectra(list(processed, processed, processed)) # The feature matrix has 3×6=18 spectra as rows and # 35 peaks as columns dim(fm_all) + +# If using a list, names will be dropped and are not propagated to the matrix. +\dontrun{ +fm_all <- merge_processed_spectra( + list("A" = processed, "B" = processed, "C" = processed)) +any(grepl("A|B|C", rownames(fm_all))) # FALSE + } + } \seealso{ \link{process_spectra}, the "Value" section in \href{https://rdrr.io/cran/MALDIquant/man/intensityMatrix-functions.html}{\code{MALDIquant::intensityMatrix}} diff --git a/tests/testthat/test-merge_processed_spectra.R b/tests/testthat/test-merge_processed_spectra.R index d9e9817..9dafefb 100644 --- a/tests/testthat/test-merge_processed_spectra.R +++ b/tests/testthat/test-merge_processed_spectra.R @@ -13,6 +13,14 @@ test_that("merge_processed_spectra works", { expect_identical( sum(fm == 0), 0L ) + expect_no_error( + fm_multiple <- merge_processed_spectra( + list("with_name_bar" = processed_test, "with_name_foo" = processed_test) + ) + ) + expect_equal( + dim(fm_multiple), c(4, 26) + ) }) test_that("merge_processed_spectra works without interpolation", { expect_no_error( diff --git a/vignettes/dereplicate-bruker-maldi-biotyper-spectra.Rmd b/vignettes/dereplicate-bruker-maldi-biotyper-spectra.Rmd index b5343fa..41e3438 100644 --- a/vignettes/dereplicate-bruker-maldi-biotyper-spectra.Rmd +++ b/vignettes/dereplicate-bruker-maldi-biotyper-spectra.Rmd @@ -109,6 +109,14 @@ fm_all <- merge_processed_spectra(list(processed, processed, processed)) # The feature matrix has 3×6=18 spectra as rows and # 35 peaks as columns dim(fm_all) + +# If using a list, names will be dropped and are not propagated to the matrix. +#' \dontrun{ +#' fm_all <- merge_processed_spectra( +#' list("A" = processed, "B" = processed, "C" = processed)) +#' any(grepl("A|B|C", rownames(fm_all))) # FALSE +#' } +#' ```