From 7b57466dcdd10d87f748f18355ff455a58c1a022 Mon Sep 17 00:00:00 2001 From: snikumbh Date: Thu, 4 May 2023 17:25:05 +0100 Subject: [PATCH] minor change to fix #77 --- R/AggregationMethods.R | 98 +++++++++++++++++++++--------------------- 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/R/AggregationMethods.R b/R/AggregationMethods.R index c53705d..627d50a 100644 --- a/R/AggregationMethods.R +++ b/R/AggregationMethods.R @@ -1,37 +1,37 @@ #' @include AllClasses.R Annotations.R CAGEexp.R CAGEr.R ClusteringMethods.R ClusteringFunctions.R CTSS.R Multicore.R #' @name aggregateTagClusters -#' +#' #' @title Aggregate TCs across all samples -#' +#' #' @description Aggregates tag clusters (TCs) across all CAGE datasets within #' the CAGEr object to create a referent set of consensus clusters. -#' +#' #' @param object A [`CAGEr`] object -#' +#' #' @param tpmThreshold Ignore tag clusters with normalized signal `< tpmThreshold` #' when constructing the consensus clusters. -#' +#' #' @param excludeSignalBelowThreshold When `TRUE` the tag clusters with #' normalized signal `< tpmThreshold` will not contribute to the total #' CAGE signal of a consensus cluster. When set to `FALSE` all TCs that #' overlap consensus clusters will contribute to the total signal, #' regardless whether they pass the threshold for constructing the #' clusters or not. -#' +#' #' @param qLow,qUp Set which "lower" (or "upper") quantile should be used as 5' #' (or 3') boundary of the tag cluster. If `NULL` the start (for `qLow`) #' or end (for `qUp`) position of the TC is used. -#' +#' #' @param maxDist Maximal length of the gap (in base-pairs) between two tag #' clusters for them to be part of the same consensus clusters. -#' +#' #' @param useMulticore Logical, should multicore be used (supported only on #' Unix-like platforms). -#' +#' #' @param nrCores Number of cores to use when `useMulticore = TRUE`. Default #' (`NULL`) uses all detected cores. -#' +#' #' @details Since the tag clusters (TCs) returned by the [`clusterCTSS`] #' function are constructed separately for every CAGE sample within the CAGEr #' object, they can differ between samples in both their number, genomic @@ -46,7 +46,7 @@ #' `<= maxDist` base-pairs apart. Consensus clusters represent a referent set #' of promoters that can be further used for expression profiling or detecting #' "shifting" (differentially used) promoters between different CAGE samples. -#' +#' #' @return Returns the object in which the _experiment_ `consensusClusters` will #' be occupied by a [`RangedSummarizedExperiment`] containing the cluster #' coordinates as row ranges, and their expression levels in the `counts` and @@ -56,33 +56,33 @@ #' `tagCountMatrix` _experiment_ will gain a `cluster` column indicating which #' cluster they belong to. Lastly, the number of CTSS outside clusters will be #' documented in the `outOfClusters` column data. -#' +#' #' @author Vanja Haberle #' @author Charles Plessy -#' +#' #' @family CAGEr object modifiers #' @family CAGEr clusters functions -#' +#' #' @importFrom IRanges reduce #' @importFrom GenomicRanges granges #' @importFrom S4Vectors endoapply mcols -#' +#' #' @examples -#' +#' #' consensusClustersGR(exampleCAGEexp) #' ce <- aggregateTagClusters( exampleCAGEexp, tpmThreshold = 50 #' , excludeSignalBelowThreshold = FALSE, maxDist = 100) #' consensusClustersGR(ce) -#' +#' #' ce <- aggregateTagClusters( exampleCAGEexp, tpmThreshold = 50 #' , excludeSignalBelowThreshold = TRUE, maxDist = 100) #' consensusClustersGR(ce) -#' +#' #' ce <- aggregateTagClusters( exampleCAGEexp, tpmThreshold = 50 #' , excludeSignalBelowThreshold = TRUE, maxDist = 100 #' , qLow = 0.1, qUp = 0.9) #' consensusClustersGR(ce) -#' +#' #' @export setGeneric( "aggregateTagClusters" @@ -102,18 +102,18 @@ setMethod( "aggregateTagClusters", "CAGEr" consensus.clusters <- .aggregateTagClustersGR( object, tpmThreshold = tpmThreshold , qLow = qLow, qUp = qUp, maxDist = maxDist) - + if (excludeSignalBelowThreshold) { filter <- .filterCtss( object , threshold = tpmThreshold , nrPassThreshold = 1 , thresholdIsTpm = TRUE) } else filter <- TRUE - + CTSScoordinatesGR(object)$cluster <- ranges2names(CTSScoordinatesGR(object), consensus.clusters) se <- CTSStagCountSE(object)[filter & decode(filteredCTSSidx(object)), ] - consensusClustersSE(object) <- .CCtoSE(se, consensus.clusters) + consensusClustersSE(object) <- .CCtoSE(se, consensus.clusters, tpmThreshold = tpmThreshold) score(consensusClustersGR(object)) <- rowSums(assays(consensusClustersSE(object))[["normalized"]]) object$outOfClusters <- librarySizes(object) - colSums(assay(consensusClustersSE(object))) object @@ -140,26 +140,26 @@ setMethod( ".aggregateTagClustersGR", "CAGEr" # Filter out TCs with too low score. gr.list <- endoapply(TC.list, function (gr) gr <- gr[score(gr) >= tpmThreshold]) - + # Aggregate clusters by expanding and merging TCs from all samples. clusters.gr <- unlist(gr.list) suppressWarnings(start(clusters.gr) <- start(clusters.gr) - round(maxDist/2)) # Suppress warnings suppressWarnings(end(clusters.gr) <- end(clusters.gr) + round(maxDist/2)) # because we trim later clusters.gr <- reduce(trim(clusters.gr)) # By definition of `reduce`, they will not overlap # Note that the clusters are temporarily too broad, because we added `maxDist)` to the TCs… - + # CTSS with score that is sum od all samples ctss <- CTSScoordinatesGR(object) score(ctss) <- rowSums(CTSSnormalizedTpmDF(object) |> DelayedArray::DelayedArray() ) - + # See `benchmarks/dominant_ctss.md`. o <- findOverlaps(clusters.gr, ctss) - + rl <- rle(queryHits(o))$length cluster_start_idx <- cumsum(c(1, head(rl, -1))) # Where each run starts grouped_scores <- extractList(score(ctss), o) grouped_pos <- extractList(pos(ctss), o) - + find.dominant.idx <- function (x) { # which.max is breaking ties by taking the last, but this will give slightly # different biases on plus an minus strands. @@ -190,17 +190,17 @@ setMethod( ".CCtoSE" stop("Needs normalised data; run ", sQuote("normalizeTagCount()"), " first.") if (is.null(rowRanges(se)$cluster)) rowRanges(se)$cluster <- ranges2names(rowRanges(se), consensus.clusters) - + if (tpmThreshold > 0) se <- se[rowSums(DelayedArray(assays(se)[["normalizedTpmMatrix"]])) > tpmThreshold,] - + .rowsumAsMatrix <- function(DF, names) { rs <- rowsum(as.matrix(DelayedArray(DF)), as.factor(names)) if (rownames(rs)[1] == "") # If some CTSS were not in clusters rs <- rs[-1, , drop = FALSE] rs } - + counts <- .rowsumAsMatrix(assays(se)[["counts"]], rowRanges(se)$cluster) norm <- .rowsumAsMatrix(assays(se)[["normalizedTpmMatrix"]], rowRanges(se)$cluster) @@ -210,44 +210,44 @@ setMethod( ".CCtoSE" }) #' @name CustomConsensusClusters -#' +#' #' @title Expression levels of consensus cluster -#' -#' @description Intersects custom consensus clusters with the CTSS data in a +#' +#' @description Intersects custom consensus clusters with the CTSS data in a #' [`CAGEexp`] object, and stores the result as a expression matrices #' (raw and normalised tag counts). -#' +#' #' @param object A `CAGEexp` object -#' +#' #' @param clusters Consensus clusters in [`GRanges`] format. -#' +#' #' @param threshold,nrPassThreshold Only CTSSs with signal `>= threshold` in #' `>= nrPassThreshold` experiments will be used for clustering and will #' contribute towards total signal of the cluster. -#' +#' #' @param thresholdIsTpm Logical, is threshold raw tag count value (FALSE) or #' normalized signal (TRUE). -#' +#' #' @details Consensus clusters must not overlap, so that a single base of the #' genome can only be attributed to a single cluster. This is enforced by the #' [`.ConsensusClusters`] constructor. -#' +#' #' @return stores the result as a new [`RangedSummarizedExperiment`] in the #' `experiment` slot of the object. The assays of the new experiment are called #' `counts` and `normalized`. An `outOfClusters` column is added #' to the sample metadata to reflect the number of molecules that do not have #' their TSS in a consensus cluster. -#' +#' #' @author Charles Plessy -#' +#' #' @family CAGEr object modifiers #' @family CAGEr clusters functions -#' -#' @examples -#' +#' +#' @examples +#' #' cc <- consensusClustersGR(exampleCAGEexp) #' CustomConsensusClusters(exampleCAGEexp, cc) -#' +#' #' @export setGeneric( "CustomConsensusClusters" @@ -264,16 +264,16 @@ setMethod( "CustomConsensusClusters", c("CAGEexp", "GRanges") , function (object, clusters , threshold, nrPassThreshold, thresholdIsTpm = TRUE) { objname <- deparse(substitute(object)) - + clusters <- .ConsensusClusters(clusters) - + filter <- .filterCtss( object , threshold = threshold , nrPassThreshold = nrPassThreshold , thresholdIsTpm = thresholdIsTpm) - + CTSScoordinatesGR(object)$cluster <- ranges2names(CTSScoordinatesGR(object), clusters) - + consensusClustersSE(object) <- .CCtoSE( CTSStagCountSE(object)[filter, ] , clusters) score(consensusClustersGR(object)) <- rowSums(assays(consensusClustersSE(object))[["normalized"]])