Skip to content

Commit

Permalink
refactor: applyProcessing correctly manages processing queue
Browse files Browse the repository at this point in the history
- `applyProcessing` ensures that all peaks variables are properly updates/subset
  depending on the processing queue (issue #289).
  • Loading branch information
jorainer committed May 31, 2023
1 parent dc02d60 commit 533a225
Show file tree
Hide file tree
Showing 7 changed files with 156 additions and 25 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
queue.
- `$,Spectra` to access peak variables ensures the lazy evaluation queue is
applied prior to extracting the values.
- `applyProcessing` correctly subsets and processes all peak variables
depending on the processing queue.

## Changes in 1.11.2

Expand Down
3 changes: 2 additions & 1 deletion R/MsBackendMemory.R
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,8 @@ setReplaceMethod("peaksData", "MsBackendMemory", function(object, value) {
pcn <- intersect(c("mz", "intensity"), cn)
if (length(pcn))
object@peaksData <- lapply(
value, function(z) base::as.matrix(z[, pcn, drop = FALSE]))
value, function(z) as.matrix(z[, pcn, drop = FALSE],
rownames.force = FALSE))
pcn <- setdiff(cn, c("mz", "intensity"))
if (length(pcn))
object@peaksDataFrame <- lapply(
Expand Down
20 changes: 12 additions & 8 deletions R/Spectra-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -192,14 +192,18 @@ applyProcessing <- function(object, f = dataStorage(object),
if (length(f) != length(object))
stop("length 'f' has to be equal to the length of 'object' (",
length(object), ")")
bknds <- bplapply(split(object@backend, f = f), function(z, queue, svars) {
if (length(svars))
spd <- as.data.frame(spectraData(z, columns = svars))
else spd <- NULL
peaksData(z) <- .apply_processing_queue(peaksData(z), spd, queue)
z
}, queue = object@processingQueue,
svars = .processingQueueVariables(object), BPPARAM = BPPARAM)
pv <- peaksVariables(object)
bknds <- bplapply(
split(object@backend, f = f), function(z, queue, pv, svars) {
if (length(svars))
spd <- as.data.frame(spectraData(z, columns = svars))
else spd <- NULL
peaksData(z) <- .apply_processing_queue(
peaksData(z, columns = pv), spd, queue)
z
}, queue = object@processingQueue, pv = pv,
svars = .processingQueueVariables(object),
BPPARAM = BPPARAM)
bknds <- backendMerge(bknds)
if (is.unsorted(f))
bknds <- bknds[order(unlist(split(seq_along(bknds), f),
Expand Down
47 changes: 36 additions & 11 deletions R/Spectra.R
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ NULL
#' each backend must support extraction of `"mz"` and `"intensity"` columns).
#' Parameter `columns` defaults to `c("mz", "intensity")` but any value
#' returned from `peaksVariables` is supported.
#' Note also that it is possible to extract the peaks matrices with
#' Note also that it is possible to extract the peak data with
#' `as(x, "list")` and `as(x, "SimpleList")` as a `list` and `SimpleList`,
#' respectively. Note however that, in contrast to `peaksData`, `as` does not
#' support the parameter `columns`.
Expand Down Expand Up @@ -298,8 +298,11 @@ NULL
#' - `spectraNames`, `spectraNames<-`: gets or sets the spectra names.
#'
#' - `spectraVariables`: returns a `character` vector with the
#' available spectra variables (columns, fields or attributes)
#' available in `object`.
#' available spectra variables (columns, fields or attributes of each
#' spectrum) available in `object`. Note that `spectraVariables` does not
#' list the *peak variables* (`"mz"`, `"intensity"` and eventual additional
#' annotations for each MS peak). Peak variables are returned by
#' `peaksVariables`.
#'
#' - `tic`: gets the total ion current/count (sum of signal of a
#' spectrum) for all spectra in `object`. By default, the value
Expand Down Expand Up @@ -1844,11 +1847,27 @@ setReplaceMethod("smoothed", "Spectra", function(object, value) {
#' @importMethodsFrom ProtGenerics spectraData
#'
#' @exportMethod spectraData
setMethod("spectraData", "Spectra", function(object,
columns = spectraVariables(object))
{
spectraData(object@backend, columns = columns)
})
setMethod(
"spectraData", "Spectra",
function(object, columns = spectraVariables(object)) {
if (length(object@processingQueue) &&
length(pcns <- intersect(columns, peaksVariables(object)))) {
scns <- setdiff(columns, pcns)
if (length(scns))
spd <- spectraData(object@backend, columns = scns)
else
spd <- make_zero_col_DFrame(nrow = length(object))
pkd <- peaksData(object, columns = pcns)
for (pcn in pcns) {
vals <- lapply(pkd, `[`, , pcn)
if (pcn %in% c("mz", "intensity"))
vals <- NumericList(vals, compress = FALSE)
spd <- do.call(`[[<-`, list(spd, i = pcn, value = vals))
}
spd
} else
spectraData(object@backend, columns = columns)
})

#' @rdname Spectra
#'
Expand All @@ -1858,6 +1877,11 @@ setMethod("spectraData", "Spectra", function(object,
setReplaceMethod("spectraData", "Spectra", function(object, value) {
if (!inherits(value, "DataFrame"))
stop("'spectraData<-' expects a 'DataFrame' as input.", call. = FALSE)
## TODO
## if processing queue not empty and any colnames in peaks variables:
## - check lengths of object and lengths of peaks variables, if that
## differs -> error.
## stop("Processing queue is not empty. In order to replace values in 'object' the processing queue has to be applied with the 'applyProcessing' function. Depending on the used backend it might also be required to change the backend to a *writeable* backend e.g. with 'setBackend(object, MsBackendMemory())'.")
if (!any(colnames(value) == "mz"))
value$mz <- object$mz
if (!any(colnames(value) == "intensity"))
Expand All @@ -1879,8 +1903,7 @@ setReplaceMethod("spectraNames", "Spectra", function(object, value) {

#' @rdname Spectra
setMethod("spectraVariables", "Spectra", function(object) {
svars <- spectraVariables(object@backend)
svars[!(svars %in% c("mz", "intensity"))]
setdiff(spectraVariables(object@backend), peaksVariables(object@backend))
})

#' @rdname Spectra
Expand All @@ -1898,7 +1921,7 @@ setMethod("tic", "Spectra", function(object, initial = TRUE) {
#'
#' @export
setMethod("$", "Spectra", function(x, name) {
if (!(name %in% c(spectraVariables(x), "mz", "intensity")))
if (!(name %in% c(spectraVariables(x@backend), peaksVariables(x@backend))))
stop("No spectra variable '", name, "' available")
if (name == "mz")
mz(x)
Expand All @@ -1917,6 +1940,8 @@ setMethod("$", "Spectra", function(x, name) {
#'
#' @export
setReplaceMethod("$", "Spectra", function(x, name, value) {
## TODO: check if name is a peaksVariable and peaks queue not empty. Same
## procedure than for spectraData<-
x@backend <- do.call("$<-", list(x@backend, name, value))
x
})
Expand Down
9 changes: 6 additions & 3 deletions man/Spectra.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions tests/testthat/test_MsBackendMemory.R
Original file line number Diff line number Diff line change
Expand Up @@ -379,8 +379,8 @@ test_that("peaksData<-,MsBackendMemory works", {
expect_equal(peaksData(be), lst)

lst2 <- list(cbind(intensity = 10.1, mz = 1),
cbind(intensity = c(12.1, 12.4, 12.4), mz = 1:3),
cbind(intensity = 100, mz = 3.1))
cbind(intensity = c(12.1, 12.4, 12.4), mz = 1:3),
cbind(intensity = 100, mz = 3.1))
peaksData(be) <- lst2
expect_equal(peaksData(be), lst)

Expand Down
96 changes: 96 additions & 0 deletions tests/testthat/test_Spectra.R
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,12 @@ test_that("peaksData,Spectra works", {
expect_equal(colnames(res[[1L]]), c("ann", "mz"))

expect_equal(s$ann[[1L]], c("b", "c", "d"))

expect_equal(spectraData(s, "ann")$ann[[1L]], c("b", "c", "d"))
expect_equal(spectraData(s, c("rtime", "ann"))$ann[[1L]], c("b", "c", "d"))
expect_equal(
spectraData(s, c(spectraVariables(s), peaksVariables(s)))$ann[[1L]],
c("b", "c", "d"))
})

test_that("lengths,Spectra works", {
Expand Down Expand Up @@ -778,6 +784,13 @@ test_that("spectraVariables,Spectra works", {
sps <- Spectra(df)
res <- spectraVariables(sps)
expect_true(all(c(exp_col, "other_col") %in% res))

df$mz <- list(c(1.2, 1.4), c(4.5, 5.6, 7.8))
df$intensity <- list(c(12, 23.3), c(134.3, 5, 123))
df$pk_ann <- list(c("a", "b"), c(NA, NA, NA))
sps <- Spectra(df, peaksVariables = c("mz", "intensity", "pk_ann"))
res <- spectraVariables(sps)
expect_true(!any(peaksVariables(sps) %in% res))
})

test_that("tic,Spectra works", {
Expand Down Expand Up @@ -1632,3 +1645,86 @@ test_that("uniqueMsLevels,Spectra works", {

expect_equal(res, uniqueMsLevels(Spectra(sciex_mzr)))
})

test_that("peaks variables and filtering properly works", {
test_df <- DataFrame(msLevel = c(1L, 2L, 2L), scanIndex = 4:6)
test_df$mz <- list(c(1.1, 1.3, 1.5),
c(4.1, 5.1),
c(1.6, 1.7, 1.8, 1.9))
test_df$intensity <- list(c(45.1, 34, 12),
c(234.4, 1333),
c(42.1, 34.2, 65, 6))
test_df$pk_ann <- list(c(NA, NA, "C12H2"),
c("A", "B"),
c("D", "E", "F", "G"))
s <- Spectra(test_df, peaksVariables = c("mz", "intensity", "pk_ann"))
expect_equal(peaksVariables(s), c("mz", "intensity", "pk_ann"))
expect_true(!any(spectraVariables(s) %in% peaksVariables(s)))

spd <- spectraData(s)
expect_true(!any(colnames(spd) %in% peaksVariables(s)))
pkd <- peaksData(s)
expect_equal(colnames(pkd[[1]]), c("mz", "intensity"))
expect_equal(s$pk_ann, test_df$pk_ann)
expect_equal(s$mz, spectraData(s, columns = "mz")$mz)
expect_equal(test_df$pk_ann,
spectraData(s, columns = c("rtime", "pk_ann"))$pk_ann)

########
## Filter peaks. Have to ensure that ALL peak variables get subset properly
sf <- filterIntensity(s, intensity = 34.1)
expect_equal(mz(sf),
NumericList(list(c(mz=1.1), c(4.1, 5.1), c(1.6, 1.7, 1.8)),
compress = FALSE))
expect_equal(mz(sf), sf$mz)

## peaksData
expect_equal(lengths(sf), c(1, 2, 3))
pkd <- peaksData(sf)
expect_true(is.matrix(pkd[[1L]]))
expect_equal(colnames(pkd[[1L]]), c("mz", "intensity"))
pkd <- peaksData(sf, columns = peaksVariables(sf))
expect_true(is.data.frame(pkd[[1L]]))
expect_equal(colnames(pkd[[1L]]), c("mz", "intensity", "pk_ann"))
expect_equal(pkd[[1L]][, "mz"], 1.1)
expect_equal(pkd[[2L]][, "mz"], c(4.1, 5.1))
expect_equal(pkd[[3L]][, "mz"], c(1.6, 1.7, 1.8))
expect_equal(pkd[[1L]][, "pk_ann"], NA_character_)
expect_equal(pkd[[2L]][, "pk_ann"], c("A", "B"))
expect_equal(pkd[[3L]][, "pk_ann"], c("D", "E", "F"))

## spectraData
spd <- spectraData(sf)
expect_true(!any(colnames(spd) %in% peaksVariables(sf)))

spd <- spectraData(sf, columns = c("rtime", "mz"))
expect_equal(colnames(spd), c("rtime", "mz"))
expect_equal(spectraData(sf, columns = "mz")$mz, sf$mz)
spd <- spectraData(sf, columns = "intensity")
expect_equal(colnames(spd), "intensity")
expect_equal(spd$intensity, sf$intensity)
spd <- spectraData(sf, columns = "pk_ann")
expect_equal(
spd$pk_ann, list(c(NA_character_), c("A", "B"), c("D", "E", "F")))
spd <- spectraData(sf, columns = c("intensity", "pk_ann"))
expect_equal(colnames(spd), c("intensity", "pk_ann"))
expect_equal(
spd$pk_ann, list(c(NA_character_), c("A", "B"), c("D", "E", "F")))

## check applyProcessing
res <- applyProcessing(sf)
expect_equal(res$rtime, sf$rtime)
expect_equal(res$mz, sf$mz)
expect_equal(res@backend$mz, sf$mz)
expect_equal(res$intensity, sf$intensity)
expect_equal(res@backend$intensity, sf$intensity)
expect_equal(res$pk_ann, sf$pk_ann)
expect_equal(res@backend$pk_ann, sf$pk_ann)
expect_true(is.matrix(res@backend@peaksData[[1L]]))
expect_equal(
res$pk_ann, list(c(NA_character_), c("A", "B"), c("D", "E", "F")))

## TODO: check replacement of peaks variables
## spectraData<-
## $<-
})

0 comments on commit 533a225

Please sign in to comment.