Skip to content

Commit

Permalink
minor change to fix #77
Browse files Browse the repository at this point in the history
  • Loading branch information
snikumbh committed May 4, 2023
1 parent a98977f commit 7b57466
Showing 1 changed file with 49 additions and 49 deletions.
98 changes: 49 additions & 49 deletions R/AggregationMethods.R
Original file line number Diff line number Diff line change
@@ -1,37 +1,37 @@
#' @include AllClasses.R Annotations.R CAGEexp.R CAGEr.R ClusteringMethods.R ClusteringFunctions.R CTSS.R Multicore.R

#' @name aggregateTagClusters
#'
#'
#' @title Aggregate TCs across all samples
#'
#'
#' @description Aggregates tag clusters (TCs) across all CAGE datasets within
#' the CAGEr object to create a referent set of consensus clusters.
#'
#'
#' @param object A [`CAGEr`] object
#'
#'
#' @param tpmThreshold Ignore tag clusters with normalized signal `< tpmThreshold`
#' when constructing the consensus clusters.
#'
#'
#' @param excludeSignalBelowThreshold When `TRUE` the tag clusters with
#' normalized signal `< tpmThreshold` will not contribute to the total
#' CAGE signal of a consensus cluster. When set to `FALSE` all TCs that
#' overlap consensus clusters will contribute to the total signal,
#' regardless whether they pass the threshold for constructing the
#' clusters or not.
#'
#'
#' @param qLow,qUp Set which "lower" (or "upper") quantile should be used as 5'
#' (or 3') boundary of the tag cluster. If `NULL` the start (for `qLow`)
#' or end (for `qUp`) position of the TC is used.
#'
#'
#' @param maxDist Maximal length of the gap (in base-pairs) between two tag
#' clusters for them to be part of the same consensus clusters.
#'
#'
#' @param useMulticore Logical, should multicore be used (supported only on
#' Unix-like platforms).
#'
#'
#' @param nrCores Number of cores to use when `useMulticore = TRUE`. Default
#' (`NULL`) uses all detected cores.
#'
#'
#' @details Since the tag clusters (TCs) returned by the [`clusterCTSS`]
#' function are constructed separately for every CAGE sample within the CAGEr
#' object, they can differ between samples in both their number, genomic
Expand All @@ -46,7 +46,7 @@
#' `<= maxDist` base-pairs apart. Consensus clusters represent a referent set
#' of promoters that can be further used for expression profiling or detecting
#' "shifting" (differentially used) promoters between different CAGE samples.
#'
#'
#' @return Returns the object in which the _experiment_ `consensusClusters` will
#' be occupied by a [`RangedSummarizedExperiment`] containing the cluster
#' coordinates as row ranges, and their expression levels in the `counts` and
Expand All @@ -56,33 +56,33 @@
#' `tagCountMatrix` _experiment_ will gain a `cluster` column indicating which
#' cluster they belong to. Lastly, the number of CTSS outside clusters will be
#' documented in the `outOfClusters` column data.
#'
#'
#' @author Vanja Haberle
#' @author Charles Plessy
#'
#'
#' @family CAGEr object modifiers
#' @family CAGEr clusters functions
#'
#'
#' @importFrom IRanges reduce
#' @importFrom GenomicRanges granges
#' @importFrom S4Vectors endoapply mcols
#'
#'
#' @examples
#'
#'
#' consensusClustersGR(exampleCAGEexp)
#' ce <- aggregateTagClusters( exampleCAGEexp, tpmThreshold = 50
#' , excludeSignalBelowThreshold = FALSE, maxDist = 100)
#' consensusClustersGR(ce)
#'
#'
#' ce <- aggregateTagClusters( exampleCAGEexp, tpmThreshold = 50
#' , excludeSignalBelowThreshold = TRUE, maxDist = 100)
#' consensusClustersGR(ce)
#'
#'
#' ce <- aggregateTagClusters( exampleCAGEexp, tpmThreshold = 50
#' , excludeSignalBelowThreshold = TRUE, maxDist = 100
#' , qLow = 0.1, qUp = 0.9)
#' consensusClustersGR(ce)
#'
#'
#' @export

setGeneric( "aggregateTagClusters"
Expand All @@ -102,18 +102,18 @@ setMethod( "aggregateTagClusters", "CAGEr"

consensus.clusters <- .aggregateTagClustersGR( object, tpmThreshold = tpmThreshold
, qLow = qLow, qUp = qUp, maxDist = maxDist)

if (excludeSignalBelowThreshold) {
filter <- .filterCtss( object
, threshold = tpmThreshold
, nrPassThreshold = 1
, thresholdIsTpm = TRUE)
} else filter <- TRUE

CTSScoordinatesGR(object)$cluster <-
ranges2names(CTSScoordinatesGR(object), consensus.clusters)
se <- CTSStagCountSE(object)[filter & decode(filteredCTSSidx(object)), ]
consensusClustersSE(object) <- .CCtoSE(se, consensus.clusters)
consensusClustersSE(object) <- .CCtoSE(se, consensus.clusters, tpmThreshold = tpmThreshold)
score(consensusClustersGR(object)) <- rowSums(assays(consensusClustersSE(object))[["normalized"]])
object$outOfClusters <- librarySizes(object) - colSums(assay(consensusClustersSE(object)))
object
Expand All @@ -140,26 +140,26 @@ setMethod( ".aggregateTagClustersGR", "CAGEr"

# Filter out TCs with too low score.
gr.list <- endoapply(TC.list, function (gr) gr <- gr[score(gr) >= tpmThreshold])

# Aggregate clusters by expanding and merging TCs from all samples.
clusters.gr <- unlist(gr.list)
suppressWarnings(start(clusters.gr) <- start(clusters.gr) - round(maxDist/2)) # Suppress warnings
suppressWarnings(end(clusters.gr) <- end(clusters.gr) + round(maxDist/2)) # because we trim later
clusters.gr <- reduce(trim(clusters.gr)) # By definition of `reduce`, they will not overlap
# Note that the clusters are temporarily too broad, because we added `maxDist)` to the TCs…

# CTSS with score that is sum od all samples
ctss <- CTSScoordinatesGR(object)
score(ctss) <- rowSums(CTSSnormalizedTpmDF(object) |> DelayedArray::DelayedArray() )

# See `benchmarks/dominant_ctss.md`.
o <- findOverlaps(clusters.gr, ctss)

rl <- rle(queryHits(o))$length
cluster_start_idx <- cumsum(c(1, head(rl, -1))) # Where each run starts
grouped_scores <- extractList(score(ctss), o)
grouped_pos <- extractList(pos(ctss), o)

find.dominant.idx <- function (x) {
# which.max is breaking ties by taking the last, but this will give slightly
# different biases on plus an minus strands.
Expand Down Expand Up @@ -190,17 +190,17 @@ setMethod( ".CCtoSE"
stop("Needs normalised data; run ", sQuote("normalizeTagCount()"), " first.")
if (is.null(rowRanges(se)$cluster))
rowRanges(se)$cluster <- ranges2names(rowRanges(se), consensus.clusters)

if (tpmThreshold > 0)
se <- se[rowSums(DelayedArray(assays(se)[["normalizedTpmMatrix"]])) > tpmThreshold,]

.rowsumAsMatrix <- function(DF, names) {
rs <- rowsum(as.matrix(DelayedArray(DF)), as.factor(names))
if (rownames(rs)[1] == "") # If some CTSS were not in clusters
rs <- rs[-1, , drop = FALSE]
rs
}

counts <- .rowsumAsMatrix(assays(se)[["counts"]], rowRanges(se)$cluster)
norm <- .rowsumAsMatrix(assays(se)[["normalizedTpmMatrix"]], rowRanges(se)$cluster)

Expand All @@ -210,44 +210,44 @@ setMethod( ".CCtoSE"
})

#' @name CustomConsensusClusters
#'
#'
#' @title Expression levels of consensus cluster
#'
#' @description Intersects custom consensus clusters with the CTSS data in a
#'
#' @description Intersects custom consensus clusters with the CTSS data in a
#' [`CAGEexp`] object, and stores the result as a expression matrices
#' (raw and normalised tag counts).
#'
#'
#' @param object A `CAGEexp` object
#'
#'
#' @param clusters Consensus clusters in [`GRanges`] format.
#'
#'
#' @param threshold,nrPassThreshold Only CTSSs with signal `>= threshold` in
#' `>= nrPassThreshold` experiments will be used for clustering and will
#' contribute towards total signal of the cluster.
#'
#'
#' @param thresholdIsTpm Logical, is threshold raw tag count value (FALSE) or
#' normalized signal (TRUE).
#'
#'
#' @details Consensus clusters must not overlap, so that a single base of the
#' genome can only be attributed to a single cluster. This is enforced by the
#' [`.ConsensusClusters`] constructor.
#'
#'
#' @return stores the result as a new [`RangedSummarizedExperiment`] in the
#' `experiment` slot of the object. The assays of the new experiment are called
#' `counts` and `normalized`. An `outOfClusters` column is added
#' to the sample metadata to reflect the number of molecules that do not have
#' their TSS in a consensus cluster.
#'
#'
#' @author Charles Plessy
#'
#'
#' @family CAGEr object modifiers
#' @family CAGEr clusters functions
#'
#' @examples
#'
#'
#' @examples
#'
#' cc <- consensusClustersGR(exampleCAGEexp)
#' CustomConsensusClusters(exampleCAGEexp, cc)
#'
#'
#' @export

setGeneric( "CustomConsensusClusters"
Expand All @@ -264,16 +264,16 @@ setMethod( "CustomConsensusClusters", c("CAGEexp", "GRanges")
, function (object, clusters
, threshold, nrPassThreshold, thresholdIsTpm = TRUE) {
objname <- deparse(substitute(object))

clusters <- .ConsensusClusters(clusters)

filter <- .filterCtss( object
, threshold = threshold
, nrPassThreshold = nrPassThreshold
, thresholdIsTpm = thresholdIsTpm)

CTSScoordinatesGR(object)$cluster <- ranges2names(CTSScoordinatesGR(object), clusters)

consensusClustersSE(object) <- .CCtoSE( CTSStagCountSE(object)[filter, ]
, clusters)
score(consensusClustersGR(object)) <- rowSums(assays(consensusClustersSE(object))[["normalized"]])
Expand Down

2 comments on commit 7b57466

@Tadie-ops
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dear Snikumbh,

"importPublicData" function is not found in the package, but found in vignette. Could you give me a bit elaboration how I can download FANTOM5 data please?

@charles-plessy
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dear @Tadie-ops , thanks for your interest but this commit is not the place to discuss the importPublicData. Please follow up in issue #78, which I have just created.

Please sign in to comment.