minor change to fix #77

charles-plessy · May 4, 2023 · 7b57466 · 7b57466 · Tadie-ops · Jun 21, 2023
1 parent a98977f
commit 7b57466
Showing 1 changed file with 49 additions and 49 deletions.
diff --git a/R/AggregationMethods.R b/R/AggregationMethods.R
@@ -1,37 +1,37 @@
 #' @include AllClasses.R Annotations.R CAGEexp.R CAGEr.R ClusteringMethods.R ClusteringFunctions.R CTSS.R Multicore.R
 
 #' @name aggregateTagClusters
-#' 
+#'
 #' @title Aggregate TCs across all samples
-#' 
+#'
 #' @description Aggregates tag clusters (TCs) across all CAGE datasets within
 #' the CAGEr object to create a referent set of consensus clusters.
-#' 
+#'
 #' @param object A [`CAGEr`] object
-#' 
+#'
 #' @param tpmThreshold Ignore tag clusters with normalized signal `< tpmThreshold`
 #'        when constructing the consensus clusters.
-#' 
+#'
 #' @param excludeSignalBelowThreshold When `TRUE` the tag clusters with
 #'        normalized signal `< tpmThreshold` will not contribute to the total
 #'        CAGE signal of a consensus cluster.  When set to `FALSE` all TCs that
 #'        overlap consensus clusters will contribute to the total signal,
 #'        regardless whether they pass the threshold for constructing the
 #'        clusters or not.
-#'        
+#'
 #' @param qLow,qUp Set which "lower" (or "upper") quantile should be used as 5'
 #'        (or 3') boundary of the tag cluster.  If `NULL` the start (for `qLow`)
 #'        or end (for `qUp`) position of the TC is used.
-#' 
+#'
 #' @param maxDist Maximal length of the gap (in base-pairs) between two tag
 #'        clusters for them to be part of the same consensus clusters.
-#'        
+#'
 #' @param useMulticore Logical, should multicore be used (supported only on
 #'        Unix-like platforms).
-#' 
+#'
 #' @param nrCores Number of cores to use when `useMulticore = TRUE`.  Default
 #'        (`NULL`) uses all detected cores.
-#' 
+#'
 #' @details Since the tag clusters (TCs) returned by the [`clusterCTSS`]
 #' function are constructed separately for every CAGE sample within the CAGEr
 #' object, they can differ between samples in both their number, genomic
@@ -46,7 +46,7 @@
 #' `<= maxDist` base-pairs apart.  Consensus clusters represent a referent set
 #' of promoters that can be further used for expression profiling or detecting
 #'  "shifting" (differentially used) promoters between different CAGE samples.
-#' 
+#'
 #' @return Returns the object in which the _experiment_ `consensusClusters` will
 #' be occupied by a [`RangedSummarizedExperiment`] containing the cluster
 #' coordinates as row ranges, and their expression levels in the `counts` and
@@ -56,33 +56,33 @@
 #' `tagCountMatrix` _experiment_ will gain a `cluster` column indicating which
 #' cluster they belong to.  Lastly, the number of CTSS outside clusters will be
 #' documented in the `outOfClusters` column data.
-#'  
+#'
 #' @author Vanja Haberle
 #' @author Charles Plessy
-#' 
+#'
 #' @family CAGEr object modifiers
 #' @family CAGEr clusters functions
-#' 
+#'
 #' @importFrom IRanges reduce
 #' @importFrom GenomicRanges granges
 #' @importFrom S4Vectors endoapply mcols
-#' 
+#'
 #' @examples
-#' 
+#'
 #' consensusClustersGR(exampleCAGEexp)
 #' ce <- aggregateTagClusters( exampleCAGEexp, tpmThreshold = 50
 #'                           , excludeSignalBelowThreshold = FALSE, maxDist = 100)
 #' consensusClustersGR(ce)
-#' 
+#'
 #' ce <- aggregateTagClusters( exampleCAGEexp, tpmThreshold = 50
 #'                           , excludeSignalBelowThreshold = TRUE, maxDist = 100)
 #' consensusClustersGR(ce)
-#' 
+#'
 #' ce <- aggregateTagClusters( exampleCAGEexp, tpmThreshold = 50
 #'                           , excludeSignalBelowThreshold = TRUE, maxDist = 100
 #'                           , qLow = 0.1, qUp = 0.9)
 #' consensusClustersGR(ce)
-#' 
+#'
 #' @export
 
 setGeneric( "aggregateTagClusters"
@@ -102,18 +102,18 @@ setMethod( "aggregateTagClusters", "CAGEr"
 
   consensus.clusters <- .aggregateTagClustersGR( object, tpmThreshold = tpmThreshold
                                                , qLow = qLow, qUp = qUp, maxDist = maxDist)
-  
+
   if (excludeSignalBelowThreshold) {
     filter <- .filterCtss( object
                          , threshold       = tpmThreshold
                          , nrPassThreshold = 1
                          , thresholdIsTpm  = TRUE)
   } else filter <- TRUE
-	
+
     CTSScoordinatesGR(object)$cluster <-
       ranges2names(CTSScoordinatesGR(object), consensus.clusters)
     se <- CTSStagCountSE(object)[filter & decode(filteredCTSSidx(object)), ]
-    consensusClustersSE(object) <- .CCtoSE(se, consensus.clusters)
+    consensusClustersSE(object) <- .CCtoSE(se, consensus.clusters, tpmThreshold = tpmThreshold)
     score(consensusClustersGR(object)) <- rowSums(assays(consensusClustersSE(object))[["normalized"]])
     object$outOfClusters <- librarySizes(object) - colSums(assay(consensusClustersSE(object)))
     object
@@ -140,26 +140,26 @@ setMethod( ".aggregateTagClustersGR", "CAGEr"
 
   # Filter out TCs with too low score.
   gr.list <- endoapply(TC.list, function (gr) gr <- gr[score(gr) >= tpmThreshold])
-  
+
   # Aggregate clusters by expanding and merging TCs from all samples.
   clusters.gr <- unlist(gr.list)
   suppressWarnings(start(clusters.gr) <- start(clusters.gr) - round(maxDist/2)) # Suppress warnings
   suppressWarnings(end(clusters.gr)   <- end(clusters.gr)   + round(maxDist/2)) # because we trim later
   clusters.gr <- reduce(trim(clusters.gr))  # By definition of `reduce`, they will not overlap
   # Note that the clusters are temporarily too broad, because we added `maxDist)` to the TCs…
-  
+
   # CTSS with score that is sum od all samples
   ctss <- CTSScoordinatesGR(object)
   score(ctss) <- rowSums(CTSSnormalizedTpmDF(object) |> DelayedArray::DelayedArray() )
-  
+
   # See `benchmarks/dominant_ctss.md`.
   o <- findOverlaps(clusters.gr, ctss)
-  
+
   rl <- rle(queryHits(o))$length
   cluster_start_idx <- cumsum(c(1, head(rl, -1))) # Where each run starts
   grouped_scores <- extractList(score(ctss), o)
   grouped_pos    <- extractList(pos(ctss), o)
-  
+
   find.dominant.idx <- function (x) {
     # which.max is breaking ties by taking the last, but this will give slightly
     # different biases on plus an minus strands.
@@ -190,17 +190,17 @@ setMethod( ".CCtoSE"
       stop("Needs normalised data; run ", sQuote("normalizeTagCount()"), " first.")
     if (is.null(rowRanges(se)$cluster))
       rowRanges(se)$cluster <- ranges2names(rowRanges(se), consensus.clusters)
-    
+
     if (tpmThreshold > 0)
       se <- se[rowSums(DelayedArray(assays(se)[["normalizedTpmMatrix"]])) > tpmThreshold,]
-    
+
     .rowsumAsMatrix <- function(DF, names) {
       rs <- rowsum(as.matrix(DelayedArray(DF)), as.factor(names))
       if (rownames(rs)[1] == "") # If some CTSS were not in clusters
         rs <- rs[-1, , drop = FALSE]
       rs
       }
-    
+
     counts <- .rowsumAsMatrix(assays(se)[["counts"]], rowRanges(se)$cluster)
     norm   <- .rowsumAsMatrix(assays(se)[["normalizedTpmMatrix"]], rowRanges(se)$cluster)
 
@@ -210,44 +210,44 @@ setMethod( ".CCtoSE"
 })
 
 #' @name CustomConsensusClusters
-#' 
+#'
 #' @title Expression levels of consensus cluster
-#' 
-#' @description Intersects custom consensus clusters with the CTSS data in a 
+#'
+#' @description Intersects custom consensus clusters with the CTSS data in a
 #' [`CAGEexp`] object, and stores the result as a expression matrices
 #' (raw and normalised tag counts).
-#' 
+#'
 #' @param object A `CAGEexp` object
-#' 
+#'
 #' @param clusters Consensus clusters in [`GRanges`] format.
-#' 
+#'
 #' @param threshold,nrPassThreshold Only CTSSs with signal `>= threshold` in
 #'        `>= nrPassThreshold` experiments will be used for clustering and will
 #'        contribute towards total signal of the cluster.
-#' 
+#'
 #' @param thresholdIsTpm Logical, is threshold raw tag count value (FALSE) or
 #'        normalized signal (TRUE).
-#'        
+#'
 #' @details Consensus clusters must not overlap, so that a single base of the
 #' genome can only be attributed to a single cluster.  This is enforced by the
 #' [`.ConsensusClusters`] constructor.
-#' 
+#'
 #' @return stores the result as a new [`RangedSummarizedExperiment`] in the
 #' `experiment` slot of the object.  The assays of the new experiment are called
 #' `counts` and `normalized`.  An `outOfClusters` column is added
 #' to the sample metadata to reflect the number of molecules that do not have
 #' their TSS in a consensus cluster.
-#' 
+#'
 #' @author Charles Plessy
-#' 
+#'
 #' @family CAGEr object modifiers
 #' @family CAGEr clusters functions
-#'  
-#' @examples 
-#' 
+#'
+#' @examples
+#'
 #' cc <- consensusClustersGR(exampleCAGEexp)
 #' CustomConsensusClusters(exampleCAGEexp, cc)
-#' 
+#'
 #' @export
 
 setGeneric( "CustomConsensusClusters"
@@ -264,16 +264,16 @@ setMethod( "CustomConsensusClusters", c("CAGEexp", "GRanges")
          , function (object, clusters
                     , threshold, nrPassThreshold, thresholdIsTpm  = TRUE) {
   objname <- deparse(substitute(object))
-  
+
   clusters <- .ConsensusClusters(clusters)
-  
+
   filter <- .filterCtss( object
                        , threshold       = threshold
                        , nrPassThreshold = nrPassThreshold
                        , thresholdIsTpm  = thresholdIsTpm)
-  
+
   CTSScoordinatesGR(object)$cluster <- ranges2names(CTSScoordinatesGR(object), clusters)
-  
+
   consensusClustersSE(object) <- .CCtoSE( CTSStagCountSE(object)[filter, ]
                                         , clusters)
   score(consensusClustersGR(object)) <- rowSums(assays(consensusClustersSE(object))[["normalized"]])