From 880d73b12cbabdc040ba15cff807423eeebd65e6 Mon Sep 17 00:00:00 2001 From: Charles Plessy Date: Fri, 17 May 2024 14:26:18 +0900 Subject: [PATCH] Remove the returnInterquantileWidth option. Closes #114 --- NEWS.md | 2 ++ R/AggregationMethods.R | 2 +- R/ExportMethods.R | 4 +-- R/GetMethods.R | 69 +++++++++++++--------------------------- man/consensusClusters.Rd | 40 +++++------------------ man/tagClusters.Rd | 34 ++++++-------------- vignettes/CAGEexp.Rmd | 10 ++---- 7 files changed, 47 insertions(+), 114 deletions(-) diff --git a/NEWS.md b/NEWS.md index 6a3d009..999bdfc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,8 @@ BACKWARDS-INCOMPATIBLE CHANGES - Remove `CTSSclusteringMethod()` function and stop recording clustering method name. +- Remove `returnInterquantileWidth` argument of functions and always return + that width when quantile information is provided. Closes #114 NEW FEATURES diff --git a/R/AggregationMethods.R b/R/AggregationMethods.R index 4f931a3..93f5c66 100644 --- a/R/AggregationMethods.R +++ b/R/AggregationMethods.R @@ -102,7 +102,7 @@ setMethod( "aggregateTagClusters", "CAGEr" # Prepare the GRangesList object containing the TCs. if (all( !is.null(qLow), !is.null(qUp))) { # If using quantiles, correct start and end. - TC.list <- tagClustersGR(object, returnInterquantileWidth = TRUE, qLow = qLow, qUp = qUp) + TC.list <- tagClustersGR(object, qLow = qLow, qUp = qUp) # Define start and and according to quantile positions. # Quantile coordinates are relative to start position: a value of "1" means # "first base of the cluster". Therefore, 1 base must be subtracted in the diff --git a/R/ExportMethods.R b/R/ExportMethods.R index bfc4544..2c9f794 100644 --- a/R/ExportMethods.R +++ b/R/ExportMethods.R @@ -264,7 +264,7 @@ setMethod( "plotInterquantileWidth", "CAGEexp" # Extract a list of data frames in "long" format for ggplot iqwidths <- lapply(seq_along(sampleLabels(object)), function(x) { - gr <- getClustFun(object, x, returnInterquantileWidth = TRUE, qLow = qLow, qUp = qUp) + gr <- getClustFun(object, x, qLow = qLow, qUp = qUp) gr <- gr[score(gr) >= tpmThreshold] data.frame( sampleName = sampleLabels(object)[[x]], @@ -482,7 +482,7 @@ setMethod( "exportToTrack", "CAGEexp" , CTSS = if (oneTrack) { CTSScoordinatesGR(object) } else { CTSStagCountGR(object, samples = "all") } , tagClusters = tagClustersGR( object, qLow = qLow, qUp = qUp) - , consensusClusters = consensusClustersGR(object, qLow = qLow, qUp = qUp, returnInterquantileWidth = ifelse(is.null(qLow), FALSE, TRUE))) # See issue 108 + , consensusClusters = consensusClustersGR(object, qLow = qLow, qUp = qUp)) exportToTrack( clusters, qLow = qLow, qUp = qUp , colorByExpressionProfile = colorByExpressionProfile diff --git a/R/GetMethods.R b/R/GetMethods.R index 8d7a70a..415033e 100644 --- a/R/GetMethods.R +++ b/R/GetMethods.R @@ -398,20 +398,17 @@ setMethod( "CTSSnormalizedTpmGR", "CAGEexp", function (object, samples) { #' extract tag clusters. If `samples = NULL`, a list of all the clusters for #' each sample is returned. #' -#' @param returnInterquantileWidth Return the interquantile width for each tag cluster. -#' #' @param qLow,qUp Position of which quantile should be used as a left (lower) #' or right (upper) boundary (for `qLow` and `qUp` respectively) when #' calculating interquantile width. Default value `NULL` results in using the -#' start coordinate of the cluster. Used only when -#' `returnInterquantileWidth = TRUE`, otherwise ignored. +#' start coordinate of the cluster. #' #' @return Returns a `GRangesList` or a `TagClusters` object with genomic coordinates, #' position of dominant TSS, total CAGE signal and additional information for -#' all TCs from specified CAGE dataset (sample). If -#' `returnInterquantileWidth = TRUE`, interquantile width for each TC is also -#' calculated using provided quantile positions. The [`S4Vectors::metadata`] -#' slot of the object contains a copy of the `CAGEexp` object's _column data_. +#' all TCs from specified CAGE dataset (sample). If quantile information is +#' provided, interquantile width for each TC is also calculated. The +#' [`S4Vectors::metadata`] slot of the object contains a copy of the `CAGEexp` +#' object's _column data_. #' #' @author Vanja Haberle #' @author Charles Plessy @@ -421,21 +418,18 @@ setMethod( "CTSSnormalizedTpmGR", "CAGEexp", function (object, samples) { #' @export #' #' @examples -#' tagClustersGR( exampleCAGEexp, "Zf.high", TRUE, 0.1, 0.9 ) -#' tagClustersGR( exampleCAGEexp, 1 -#' , returnInterquantileWidth = TRUE, qLow = 0.1, qUp = 0.9 ) +#' tagClustersGR( exampleCAGEexp, "Zf.high", 0.1, 0.9 ) +#' tagClustersGR( exampleCAGEexp, 1, qLow = 0.1, qUp = 0.9 ) #' tagClustersGR( exampleCAGEexp )@metadata$colData #' #' @export setGeneric( "tagClustersGR" - , function( object, sample = NULL - , returnInterquantileWidth = FALSE, qLow = NULL, qUp = NULL) { + , function( object, sample = NULL, qLow = NULL, qUp = NULL) { if (is.null(sample)) { tc.list <- GRangesList( lapply( sampleLabels(object) , tagClustersGR , object = object - , returnInterquantileWidth = returnInterquantileWidth , qLow = qLow, qUp = qUp)) names(tc.list) <- sampleLabels(object) metadata(tc.list)$colData <- colData(object) @@ -448,15 +442,12 @@ setGeneric( "tagClustersGR" #' @rdname tagClusters setMethod( "tagClustersGR", "CAGEexp" - , function (object, sample, returnInterquantileWidth, qLow, qUp) { + , function (object, sample, qLow, qUp) { tc <- metadata(object)$tagClusters[[sample]] if (is.null(tc)) stop( "No clusters found, run ", sQuote("clusterCTSS"), " first." , call. = FALSE) - if (returnInterquantileWidth) { - if (is.null(qLow) | is.null(qUp)) - stop( "No quantiles specified! Set the ", sQuote("qLow") - , " and ", sQuote("qUp"), "arguments.") + if (! is.null(qLow) & ! is.null(qUp)) { qLowName <- paste0("q_", qLow) qUpName <- paste0("q_", qUp) if(! all( c(qLowName, qUpName) %in% colnames(mcols(tc)))) @@ -487,20 +478,9 @@ setMethod("filteredCTSSidx", "CAGEexp", function (object){ #' @param object A [`CAGEr`] object. #' #' @param sample Optional. Label of the CAGE dataset (experiment, sample) for -#' which to extract sample-specific information on consensus clusters. -#' When no sample is specified (NULL), sample-agnostic information -#' on consensus clusters is provided. This includes the `dominant_ctss` -#' and `tpm.dominant_ctss` for each consensus cluster. -#' -#' @param returnInterquantileWidth Should the interquantile width of consensus -#' clusters be returned? When `sample` argument is specified, the -#' interquantile widths of the consensus clusters in that specified -#' sample are returned, otherwise, the (sample-agnostic) interquantile -#' width of the consensus cluster itself is returned. +#' which to extract sample-specific information on consensus clusters. #' -#' @param qLow,qUp Position of which quantile should be used as a left (lower) -#' or right (upper) boundary when calculating interquantile width. Used -#' only when `returnInterquantileWidth = TRUE`, otherwise ignored. +#' @param qLow,qUp Lower and upper quantiles to compute interquantile width. #' #' @return `consensusClustersGR` returns a [`ConsensusClusters`] object, which #' wraps the [`GRanges`] class. The `score` columns indicates the @@ -510,10 +490,10 @@ setMethod("filteredCTSSidx", "CAGEexp", function (object){ #' NOT specified, total CAGE signal across all CAGE datasets (samples) is #' returned in the `tpm` column. When `sample` argument is specified, the `tpm` #' column contains CAGE signal of consensus clusters in that specific sample. -#' When `returnInterquantileWidth = TRUE`, additional sample-specific information -#' is returned, including position of the dominant TSS, and interquantile width -#' of the consensus clusters in the specified sample or otherwise, -#' sample-agnostic information is returned. +#' In addition, sample-specific information is returned, including position of +#' the dominant TSS, and (if applicable) interquantile width of the consensus +#' clusters in the specified sample or otherwise, sample-agnostic information is +#' returned. #' #' @author Vanja Haberle #' @author Charles Plessy @@ -525,7 +505,6 @@ setMethod("filteredCTSSidx", "CAGEexp", function (object){ #' #' @examples #' consensusClustersGR( exampleCAGEexp, sample = 2 -#' , returnInterquantileWidth = TRUE #' , qLow = 0.1, qUp = 0.9) #' #' @importFrom GenomicRanges granges @@ -533,8 +512,7 @@ setMethod("filteredCTSSidx", "CAGEexp", function (object){ setGeneric( "consensusClustersGR" , function( object - , sample = NULL - , returnInterquantileWidth = FALSE + , sample = NULL , qLow = NULL, qUp = NULL) { validSamples(object, sample) standardGeneric("consensusClustersGR")}) @@ -542,7 +520,7 @@ setGeneric( "consensusClustersGR" #' @rdname consensusClusters setMethod( "consensusClustersGR", "CAGEexp" - , function (object, sample, returnInterquantileWidth, qLow, qUp) { + , function (object, sample, qLow, qUp) { cc <- rowRanges(consensusClustersSE(object)) ## Comment and edits added: 2022-OCT-06 ## If sample is NULL, provide sample-agnostic information. @@ -554,12 +532,9 @@ setMethod( "consensusClustersGR", "CAGEexp" if (!is.null(qUp)) mcols(cc)[[paste0("q_", qUp)]] <- consensusClustersQuantile(object, sample, qUp) - if (returnInterquantileWidth == TRUE) { - if (is.null(qLow) | is.null(qUp)) - stop( "Set ", sQuote("qLow"), " and ", sQuote("qUp") - , " to specify the quantile positions used to calculate width.") - mcols(cc)[["interquantile_width"]] = mcols(cc)[[paste0("q_", qUp )]] - - mcols(cc)[[paste0("q_", qLow)]] + 1 + if (! is.null(qLow) & ! is.null(qUp)) { + mcols(cc)[["interquantile_width"]] = mcols(cc)[[paste0("q_", qUp )]] - + mcols(cc)[[paste0("q_", qLow)]] + 1 } cc$tpm <- cc$score <- consensusClustersTpm(object)[,sample] @@ -570,7 +545,7 @@ setMethod( "consensusClustersGR", "CAGEexp" score(ctss) <- CTSSnormalizedTpmDF(object) |> rowSums.RleDataFrame() ctss <- ctss[ctss$filteredCTSSidx] cc <- .ctss_summary_for_clusters(ctss, cc, removeSingletons = FALSE) - if(isTRUE(returnInterquantileWidth)) { + if (! is.null(qLow) & ! is.null(qUp)) { qLowName <- paste0("q_", qLow) qUpName <- paste0("q_", qUp) mcols(cc)[["interquantile_width"]] <- diff --git a/man/consensusClusters.Rd b/man/consensusClusters.Rd index 77516e5..1a5dec7 100644 --- a/man/consensusClusters.Rd +++ b/man/consensusClusters.Rd @@ -7,21 +7,9 @@ \alias{consensusClustersSE,CAGEexp-method} \title{Get consensus clusters from CAGEr objects} \usage{ -consensusClustersGR( - object, - sample = NULL, - returnInterquantileWidth = FALSE, - qLow = NULL, - qUp = NULL -) +consensusClustersGR(object, sample = NULL, qLow = NULL, qUp = NULL) -\S4method{consensusClustersGR}{CAGEexp}( - object, - sample = NULL, - returnInterquantileWidth = FALSE, - qLow = NULL, - qUp = NULL -) +\S4method{consensusClustersGR}{CAGEexp}(object, sample = NULL, qLow = NULL, qUp = NULL) consensusClustersSE(object) @@ -31,20 +19,9 @@ consensusClustersSE(object) \item{object}{A \code{\link{CAGEr}} object.} \item{sample}{Optional. Label of the CAGE dataset (experiment, sample) for -which to extract sample-specific information on consensus clusters. -When no sample is specified (NULL), sample-agnostic information -on consensus clusters is provided. This includes the \code{dominant_ctss} -and \code{tpm.dominant_ctss} for each consensus cluster.} +which to extract sample-specific information on consensus clusters.} -\item{returnInterquantileWidth}{Should the interquantile width of consensus -clusters be returned? When \code{sample} argument is specified, the -interquantile widths of the consensus clusters in that specified -sample are returned, otherwise, the (sample-agnostic) interquantile -width of the consensus cluster itself is returned.} - -\item{qLow, qUp}{Position of which quantile should be used as a left (lower) -or right (upper) boundary when calculating interquantile width. Used -only when \code{returnInterquantileWidth = TRUE}, otherwise ignored.} +\item{qLow, qUp}{Lower and upper quantiles to compute interquantile width.} } \value{ \code{consensusClustersGR} returns a \code{\link{ConsensusClusters}} object, which @@ -55,10 +32,10 @@ be removed in the future. When \code{sample} argument is NOT specified, total CAGE signal across all CAGE datasets (samples) is returned in the \code{tpm} column. When \code{sample} argument is specified, the \code{tpm} column contains CAGE signal of consensus clusters in that specific sample. -When \code{returnInterquantileWidth = TRUE}, additional sample-specific information -is returned, including position of the dominant TSS, and interquantile width -of the consensus clusters in the specified sample or otherwise, -sample-agnostic information is returned. +In addition, sample-specific information is returned, including position of +the dominant TSS, and (if applicable) interquantile width of the consensus +clusters in the specified sample or otherwise, sample-agnostic information is +returned. \code{consensusClustersSE} returns the \code{\link{SummarizedExperiment}} stored in the \code{consensusClusters} experiment slot of the CAGEexp object. @@ -69,7 +46,6 @@ object. } \examples{ consensusClustersGR( exampleCAGEexp, sample = 2 - , returnInterquantileWidth = TRUE , qLow = 0.1, qUp = 0.9) } diff --git a/man/tagClusters.Rd b/man/tagClusters.Rd index e0221a5..3e7beb6 100644 --- a/man/tagClusters.Rd +++ b/man/tagClusters.Rd @@ -8,21 +8,9 @@ \alias{tagClustersGR<-,CAGEexp,missing,GRangesList-method} \title{Extract tag clusters (TCs) for individual CAGE experiments} \usage{ -tagClustersGR( - object, - sample = NULL, - returnInterquantileWidth = FALSE, - qLow = NULL, - qUp = NULL -) +tagClustersGR(object, sample = NULL, qLow = NULL, qUp = NULL) -\S4method{tagClustersGR}{CAGEexp}( - object, - sample = NULL, - returnInterquantileWidth = FALSE, - qLow = NULL, - qUp = NULL -) +\S4method{tagClustersGR}{CAGEexp}(object, sample = NULL, qLow = NULL, qUp = NULL) tagClustersGR(object, sample = NULL) <- value @@ -37,32 +25,28 @@ tagClustersGR(object, sample = NULL) <- value extract tag clusters. If \code{samples = NULL}, a list of all the clusters for each sample is returned.} -\item{returnInterquantileWidth}{Return the interquantile width for each tag cluster.} - \item{qLow, qUp}{Position of which quantile should be used as a left (lower) or right (upper) boundary (for \code{qLow} and \code{qUp} respectively) when calculating interquantile width. Default value \code{NULL} results in using the -start coordinate of the cluster. Used only when -\code{returnInterquantileWidth = TRUE}, otherwise ignored.} +start coordinate of the cluster.} \item{value}{A \code{\link{TagClusters}} object.} } \value{ Returns a \code{GRangesList} or a \code{TagClusters} object with genomic coordinates, position of dominant TSS, total CAGE signal and additional information for -all TCs from specified CAGE dataset (sample). If -\code{returnInterquantileWidth = TRUE}, interquantile width for each TC is also -calculated using provided quantile positions. The \code{\link[S4Vectors:Annotated-class]{S4Vectors::metadata}} -slot of the object contains a copy of the \code{CAGEexp} object's \emph{column data}. +all TCs from specified CAGE dataset (sample). If quantile information is +provided, interquantile width for each TC is also calculated. The +\code{\link[S4Vectors:Annotated-class]{S4Vectors::metadata}} slot of the object contains a copy of the \code{CAGEexp} +object's \emph{column data}. } \description{ Extracts tag clusters (TCs) produced by \code{\link{clusterCTSS}} function for a specified CAGE experiment from a \code{\link{CAGEexp}} object. } \examples{ -tagClustersGR( exampleCAGEexp, "Zf.high", TRUE, 0.1, 0.9 ) -tagClustersGR( exampleCAGEexp, 1 - , returnInterquantileWidth = TRUE, qLow = 0.1, qUp = 0.9 ) +tagClustersGR( exampleCAGEexp, "Zf.high", 0.1, 0.9 ) +tagClustersGR( exampleCAGEexp, 1, qLow = 0.1, qUp = 0.9 ) tagClustersGR( exampleCAGEexp )@metadata$colData } diff --git a/vignettes/CAGEexp.Rmd b/vignettes/CAGEexp.Rmd index e0d73be..35552f7 100644 --- a/vignettes/CAGEexp.Rmd +++ b/vignettes/CAGEexp.Rmd @@ -544,8 +544,7 @@ ce <- quantilePositions(ce, clusters = "tagClusters", qLow = 0.1, qUp = 0.9) Tag clusters and their interquantile width can be retrieved by calling `tagClusters` function: ```{r} -tagClustersGR( ce, "Zf.unfertilized.egg" - , returnInterquantileWidth = TRUE, qLow = 0.1, qUp = 0.9) +tagClustersGR(ce, "Zf.unfertilized.egg", qLow = 0.1, qUp = 0.9) ``` Once the cumulative distributions and the positions of quantiles have been calculated, the @@ -608,17 +607,14 @@ different samples. Sample-specific information on consensus clusters can be ret tag clusters): ```{r} -consensusClustersGR( ce, sample = "Zf.unfertilized.egg" - , returnInterquantileWidth = TRUE, qLow = 0.1, qUp = 0.9) +consensusClustersGR(ce, sample = "Zf.unfertilized.egg", qLow = 0.1, qUp = 0.9) ``` This will, in addition to genomic coordinates of the consensus clusters, which are constant across all samples, also return the position of the dominant TSS, the CAGE signal (tpm) and the interquantile width specific for a given sample. Note that when specifying individual sample, only the consensus clusters that have some CAGE signal in that sample will be returned (which will be a subset of all consensus clusters). When setting `sample = NULL` sample-agnostic information per consensus cluster is provided. This includes the interquantile width and dominant TSS information for each -consensus cluster independent of the samples. -Again, specifying interquantile boundaries, qLow and qUp, has a similar -behaviour for `returnInterquantileWidth = TRUE`. +consensus cluster independent of the samples when specifying interquantile boundaries `qLow` and `qUp`.