From 533a2252fd6223a73364d8091ff84f175b99de77 Mon Sep 17 00:00:00 2001 From: jorainer Date: Wed, 31 May 2023 07:43:19 +0200 Subject: [PATCH] refactor: applyProcessing correctly manages processing queue - `applyProcessing` ensures that all peaks variables are properly updates/subset depending on the processing queue (issue #289). --- NEWS.md | 2 + R/MsBackendMemory.R | 3 +- R/Spectra-functions.R | 20 +++--- R/Spectra.R | 47 ++++++++++--- man/Spectra.Rd | 9 ++- tests/testthat/test_MsBackendMemory.R | 4 +- tests/testthat/test_Spectra.R | 96 +++++++++++++++++++++++++++ 7 files changed, 156 insertions(+), 25 deletions(-) diff --git a/NEWS.md b/NEWS.md index 0124d539..2c83016f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -11,6 +11,8 @@ queue. - `$,Spectra` to access peak variables ensures the lazy evaluation queue is applied prior to extracting the values. +- `applyProcessing` correctly subsets and processes all peak variables + depending on the processing queue. ## Changes in 1.11.2 diff --git a/R/MsBackendMemory.R b/R/MsBackendMemory.R index 8875dea6..f2117847 100644 --- a/R/MsBackendMemory.R +++ b/R/MsBackendMemory.R @@ -404,7 +404,8 @@ setReplaceMethod("peaksData", "MsBackendMemory", function(object, value) { pcn <- intersect(c("mz", "intensity"), cn) if (length(pcn)) object@peaksData <- lapply( - value, function(z) base::as.matrix(z[, pcn, drop = FALSE])) + value, function(z) as.matrix(z[, pcn, drop = FALSE], + rownames.force = FALSE)) pcn <- setdiff(cn, c("mz", "intensity")) if (length(pcn)) object@peaksDataFrame <- lapply( diff --git a/R/Spectra-functions.R b/R/Spectra-functions.R index b1a679b1..428d6d55 100644 --- a/R/Spectra-functions.R +++ b/R/Spectra-functions.R @@ -192,14 +192,18 @@ applyProcessing <- function(object, f = dataStorage(object), if (length(f) != length(object)) stop("length 'f' has to be equal to the length of 'object' (", length(object), ")") - bknds <- bplapply(split(object@backend, f = f), function(z, queue, svars) { - if (length(svars)) - spd <- as.data.frame(spectraData(z, columns = svars)) - else spd <- NULL - peaksData(z) <- .apply_processing_queue(peaksData(z), spd, queue) - z - }, queue = object@processingQueue, - svars = .processingQueueVariables(object), BPPARAM = BPPARAM) + pv <- peaksVariables(object) + bknds <- bplapply( + split(object@backend, f = f), function(z, queue, pv, svars) { + if (length(svars)) + spd <- as.data.frame(spectraData(z, columns = svars)) + else spd <- NULL + peaksData(z) <- .apply_processing_queue( + peaksData(z, columns = pv), spd, queue) + z + }, queue = object@processingQueue, pv = pv, + svars = .processingQueueVariables(object), + BPPARAM = BPPARAM) bknds <- backendMerge(bknds) if (is.unsorted(f)) bknds <- bknds[order(unlist(split(seq_along(bknds), f), diff --git a/R/Spectra.R b/R/Spectra.R index 41364844..89b4072d 100644 --- a/R/Spectra.R +++ b/R/Spectra.R @@ -240,7 +240,7 @@ NULL #' each backend must support extraction of `"mz"` and `"intensity"` columns). #' Parameter `columns` defaults to `c("mz", "intensity")` but any value #' returned from `peaksVariables` is supported. -#' Note also that it is possible to extract the peaks matrices with +#' Note also that it is possible to extract the peak data with #' `as(x, "list")` and `as(x, "SimpleList")` as a `list` and `SimpleList`, #' respectively. Note however that, in contrast to `peaksData`, `as` does not #' support the parameter `columns`. @@ -298,8 +298,11 @@ NULL #' - `spectraNames`, `spectraNames<-`: gets or sets the spectra names. #' #' - `spectraVariables`: returns a `character` vector with the -#' available spectra variables (columns, fields or attributes) -#' available in `object`. +#' available spectra variables (columns, fields or attributes of each +#' spectrum) available in `object`. Note that `spectraVariables` does not +#' list the *peak variables* (`"mz"`, `"intensity"` and eventual additional +#' annotations for each MS peak). Peak variables are returned by +#' `peaksVariables`. #' #' - `tic`: gets the total ion current/count (sum of signal of a #' spectrum) for all spectra in `object`. By default, the value @@ -1844,11 +1847,27 @@ setReplaceMethod("smoothed", "Spectra", function(object, value) { #' @importMethodsFrom ProtGenerics spectraData #' #' @exportMethod spectraData -setMethod("spectraData", "Spectra", function(object, - columns = spectraVariables(object)) -{ - spectraData(object@backend, columns = columns) -}) +setMethod( + "spectraData", "Spectra", + function(object, columns = spectraVariables(object)) { + if (length(object@processingQueue) && + length(pcns <- intersect(columns, peaksVariables(object)))) { + scns <- setdiff(columns, pcns) + if (length(scns)) + spd <- spectraData(object@backend, columns = scns) + else + spd <- make_zero_col_DFrame(nrow = length(object)) + pkd <- peaksData(object, columns = pcns) + for (pcn in pcns) { + vals <- lapply(pkd, `[`, , pcn) + if (pcn %in% c("mz", "intensity")) + vals <- NumericList(vals, compress = FALSE) + spd <- do.call(`[[<-`, list(spd, i = pcn, value = vals)) + } + spd + } else + spectraData(object@backend, columns = columns) + }) #' @rdname Spectra #' @@ -1858,6 +1877,11 @@ setMethod("spectraData", "Spectra", function(object, setReplaceMethod("spectraData", "Spectra", function(object, value) { if (!inherits(value, "DataFrame")) stop("'spectraData<-' expects a 'DataFrame' as input.", call. = FALSE) + ## TODO + ## if processing queue not empty and any colnames in peaks variables: + ## - check lengths of object and lengths of peaks variables, if that + ## differs -> error. + ## stop("Processing queue is not empty. In order to replace values in 'object' the processing queue has to be applied with the 'applyProcessing' function. Depending on the used backend it might also be required to change the backend to a *writeable* backend e.g. with 'setBackend(object, MsBackendMemory())'.") if (!any(colnames(value) == "mz")) value$mz <- object$mz if (!any(colnames(value) == "intensity")) @@ -1879,8 +1903,7 @@ setReplaceMethod("spectraNames", "Spectra", function(object, value) { #' @rdname Spectra setMethod("spectraVariables", "Spectra", function(object) { - svars <- spectraVariables(object@backend) - svars[!(svars %in% c("mz", "intensity"))] + setdiff(spectraVariables(object@backend), peaksVariables(object@backend)) }) #' @rdname Spectra @@ -1898,7 +1921,7 @@ setMethod("tic", "Spectra", function(object, initial = TRUE) { #' #' @export setMethod("$", "Spectra", function(x, name) { - if (!(name %in% c(spectraVariables(x), "mz", "intensity"))) + if (!(name %in% c(spectraVariables(x@backend), peaksVariables(x@backend)))) stop("No spectra variable '", name, "' available") if (name == "mz") mz(x) @@ -1917,6 +1940,8 @@ setMethod("$", "Spectra", function(x, name) { #' #' @export setReplaceMethod("$", "Spectra", function(x, name, value) { + ## TODO: check if name is a peaksVariable and peaks queue not empty. Same + ## procedure than for spectraData<- x@backend <- do.call("$<-", list(x@backend, name, value)) x }) diff --git a/man/Spectra.Rd b/man/Spectra.Rd index 84649062..550364d5 100644 --- a/man/Spectra.Rd +++ b/man/Spectra.Rd @@ -885,7 +885,7 @@ it is \strong{not} guaranteed that each backend supports this parameter (while each backend must support extraction of \code{"mz"} and \code{"intensity"} columns). Parameter \code{columns} defaults to \code{c("mz", "intensity")} but any value returned from \code{peaksVariables} is supported. -Note also that it is possible to extract the peaks matrices with +Note also that it is possible to extract the peak data with \code{as(x, "list")} and \code{as(x, "SimpleList")} as a \code{list} and \code{SimpleList}, respectively. Note however that, in contrast to \code{peaksData}, \code{as} does not support the parameter \code{columns}. @@ -933,8 +933,11 @@ should be replaced individually. The \verb{spectraData<-} function expects a \code{DataFrame} to be passed as value. \item \code{spectraNames}, \verb{spectraNames<-}: gets or sets the spectra names. \item \code{spectraVariables}: returns a \code{character} vector with the -available spectra variables (columns, fields or attributes) -available in \code{object}. +available spectra variables (columns, fields or attributes of each +spectrum) available in \code{object}. Note that \code{spectraVariables} does not +list the \emph{peak variables} (\code{"mz"}, \code{"intensity"} and eventual additional +annotations for each MS peak). Peak variables are returned by +\code{peaksVariables}. \item \code{tic}: gets the total ion current/count (sum of signal of a spectrum) for all spectra in \code{object}. By default, the value reported in the original raw data file is returned. For an empty diff --git a/tests/testthat/test_MsBackendMemory.R b/tests/testthat/test_MsBackendMemory.R index 6d1aad9b..ae6ac565 100644 --- a/tests/testthat/test_MsBackendMemory.R +++ b/tests/testthat/test_MsBackendMemory.R @@ -379,8 +379,8 @@ test_that("peaksData<-,MsBackendMemory works", { expect_equal(peaksData(be), lst) lst2 <- list(cbind(intensity = 10.1, mz = 1), - cbind(intensity = c(12.1, 12.4, 12.4), mz = 1:3), - cbind(intensity = 100, mz = 3.1)) + cbind(intensity = c(12.1, 12.4, 12.4), mz = 1:3), + cbind(intensity = 100, mz = 3.1)) peaksData(be) <- lst2 expect_equal(peaksData(be), lst) diff --git a/tests/testthat/test_Spectra.R b/tests/testthat/test_Spectra.R index 35d8e2c0..8585cd95 100644 --- a/tests/testthat/test_Spectra.R +++ b/tests/testthat/test_Spectra.R @@ -528,6 +528,12 @@ test_that("peaksData,Spectra works", { expect_equal(colnames(res[[1L]]), c("ann", "mz")) expect_equal(s$ann[[1L]], c("b", "c", "d")) + + expect_equal(spectraData(s, "ann")$ann[[1L]], c("b", "c", "d")) + expect_equal(spectraData(s, c("rtime", "ann"))$ann[[1L]], c("b", "c", "d")) + expect_equal( + spectraData(s, c(spectraVariables(s), peaksVariables(s)))$ann[[1L]], + c("b", "c", "d")) }) test_that("lengths,Spectra works", { @@ -778,6 +784,13 @@ test_that("spectraVariables,Spectra works", { sps <- Spectra(df) res <- spectraVariables(sps) expect_true(all(c(exp_col, "other_col") %in% res)) + + df$mz <- list(c(1.2, 1.4), c(4.5, 5.6, 7.8)) + df$intensity <- list(c(12, 23.3), c(134.3, 5, 123)) + df$pk_ann <- list(c("a", "b"), c(NA, NA, NA)) + sps <- Spectra(df, peaksVariables = c("mz", "intensity", "pk_ann")) + res <- spectraVariables(sps) + expect_true(!any(peaksVariables(sps) %in% res)) }) test_that("tic,Spectra works", { @@ -1632,3 +1645,86 @@ test_that("uniqueMsLevels,Spectra works", { expect_equal(res, uniqueMsLevels(Spectra(sciex_mzr))) }) + +test_that("peaks variables and filtering properly works", { + test_df <- DataFrame(msLevel = c(1L, 2L, 2L), scanIndex = 4:6) + test_df$mz <- list(c(1.1, 1.3, 1.5), + c(4.1, 5.1), + c(1.6, 1.7, 1.8, 1.9)) + test_df$intensity <- list(c(45.1, 34, 12), + c(234.4, 1333), + c(42.1, 34.2, 65, 6)) + test_df$pk_ann <- list(c(NA, NA, "C12H2"), + c("A", "B"), + c("D", "E", "F", "G")) + s <- Spectra(test_df, peaksVariables = c("mz", "intensity", "pk_ann")) + expect_equal(peaksVariables(s), c("mz", "intensity", "pk_ann")) + expect_true(!any(spectraVariables(s) %in% peaksVariables(s))) + + spd <- spectraData(s) + expect_true(!any(colnames(spd) %in% peaksVariables(s))) + pkd <- peaksData(s) + expect_equal(colnames(pkd[[1]]), c("mz", "intensity")) + expect_equal(s$pk_ann, test_df$pk_ann) + expect_equal(s$mz, spectraData(s, columns = "mz")$mz) + expect_equal(test_df$pk_ann, + spectraData(s, columns = c("rtime", "pk_ann"))$pk_ann) + + ######## + ## Filter peaks. Have to ensure that ALL peak variables get subset properly + sf <- filterIntensity(s, intensity = 34.1) + expect_equal(mz(sf), + NumericList(list(c(mz=1.1), c(4.1, 5.1), c(1.6, 1.7, 1.8)), + compress = FALSE)) + expect_equal(mz(sf), sf$mz) + + ## peaksData + expect_equal(lengths(sf), c(1, 2, 3)) + pkd <- peaksData(sf) + expect_true(is.matrix(pkd[[1L]])) + expect_equal(colnames(pkd[[1L]]), c("mz", "intensity")) + pkd <- peaksData(sf, columns = peaksVariables(sf)) + expect_true(is.data.frame(pkd[[1L]])) + expect_equal(colnames(pkd[[1L]]), c("mz", "intensity", "pk_ann")) + expect_equal(pkd[[1L]][, "mz"], 1.1) + expect_equal(pkd[[2L]][, "mz"], c(4.1, 5.1)) + expect_equal(pkd[[3L]][, "mz"], c(1.6, 1.7, 1.8)) + expect_equal(pkd[[1L]][, "pk_ann"], NA_character_) + expect_equal(pkd[[2L]][, "pk_ann"], c("A", "B")) + expect_equal(pkd[[3L]][, "pk_ann"], c("D", "E", "F")) + + ## spectraData + spd <- spectraData(sf) + expect_true(!any(colnames(spd) %in% peaksVariables(sf))) + + spd <- spectraData(sf, columns = c("rtime", "mz")) + expect_equal(colnames(spd), c("rtime", "mz")) + expect_equal(spectraData(sf, columns = "mz")$mz, sf$mz) + spd <- spectraData(sf, columns = "intensity") + expect_equal(colnames(spd), "intensity") + expect_equal(spd$intensity, sf$intensity) + spd <- spectraData(sf, columns = "pk_ann") + expect_equal( + spd$pk_ann, list(c(NA_character_), c("A", "B"), c("D", "E", "F"))) + spd <- spectraData(sf, columns = c("intensity", "pk_ann")) + expect_equal(colnames(spd), c("intensity", "pk_ann")) + expect_equal( + spd$pk_ann, list(c(NA_character_), c("A", "B"), c("D", "E", "F"))) + + ## check applyProcessing + res <- applyProcessing(sf) + expect_equal(res$rtime, sf$rtime) + expect_equal(res$mz, sf$mz) + expect_equal(res@backend$mz, sf$mz) + expect_equal(res$intensity, sf$intensity) + expect_equal(res@backend$intensity, sf$intensity) + expect_equal(res$pk_ann, sf$pk_ann) + expect_equal(res@backend$pk_ann, sf$pk_ann) + expect_true(is.matrix(res@backend@peaksData[[1L]])) + expect_equal( + res$pk_ann, list(c(NA_character_), c("A", "B"), c("D", "E", "F"))) + + ## TODO: check replacement of peaks variables + ## spectraData<- + ## $<- +})