diff --git a/DESCRIPTION b/DESCRIPTION index ad30e22..36548b1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -45,6 +45,7 @@ Suggests: BSgenome.Drerio.UCSC.danRer7, DESeq2, FANTOM3and4CAGE, + ggseqlogo, BiocStyle, knitr, rmarkdown @@ -84,6 +85,7 @@ Collate: 'ShiftingFunctions.R' 'ShiftingMethods.R' 'StrandInvaders.R' + 'TSSlogo.R' LazyData: true VignetteBuilder: knitr RoxygenNote: 7.2.1 diff --git a/NAMESPACE b/NAMESPACE index a9aed44..3784ebf 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -27,6 +27,7 @@ export(CTSStoGenes) export(CustomConsensusClusters) export(GeneExpDESeq2) export(GeneExpSE) +export(TSSlogo) export(aggregateTagClusters) export(annotateCTSS) export(annotateConsensusClusters) @@ -89,6 +90,7 @@ importFrom(BiocParallel,MulticoreParam) importFrom(BiocParallel,SerialParam) importFrom(BiocParallel,bplapply) importFrom(BiocParallel,multicoreWorkers) +importFrom(Biostrings,consensusMatrix) importFrom(CAGEfightR,quickEnhancers) importFrom(GenomeInfoDb,"genome<-") importFrom(GenomeInfoDb,"seqlevels<-") diff --git a/R/TSSlogo.R b/R/TSSlogo.R new file mode 100644 index 0000000..6a4cb59 --- /dev/null +++ b/R/TSSlogo.R @@ -0,0 +1,87 @@ +#' TSS logo +#' +#' Plot the sequence logo of the region flanking the TSS. When this function +#' is given _tag clusters_ or _consensus clusters_, it uses the _dominant peak_ +#' as the transcription start site. +#' +#' This function will only work if the [`CAGEexp`] object was built with a +#' [`BSgenome`] package, as it needs to extract genomic sequences. +#' +#' @param x A [`CTSS`], a [`TagClusters`] or a [`ConsensusClusters`] object. +#' @param upstream Number of bases to plot upstream the TSS. +#' @param downstream Number of bases to plot downstream the TSS, including the +#' TSS itself. +#' +#' @returns A [`ggplot2::ggplot`] object showing the sequence logo. The +#' coordinates displayed are negative for upstream sequences and positive +#' downstream. The position of the TSS is set to 1. +#' +#' @family CAGEr plot functions +#' @family CAGEr TSS functions +#' +#' @author Charles Plessy +#' +#' @examples +#' TSSlogo(exampleCAGEexp|>consensusClustersGR(), 20, 10) +#' +#' @importFrom Biostrings consensusMatrix +#' +#' @export + +setGeneric("TSSlogo", + function (x, upstream = 10, downstream = 10) + standardGeneric("TSSlogo")) + +#' @rdname TSSlogo + +setMethod("TSSlogo", signature("CAGEexp"), + function (x, upstream = 10, downstream = 10) { + TSSlogo(CTSScoordinatesGR(object), upstream = upstream, downstream = downstream) +}) + +#' @rdname TSSlogo + +setMethod("TSSlogo", signature("TagClusters"), + function (x, upstream = 10, downstream = 10) { + TSSlogo(object$dominant_ctss |> as("CTSS"), upstream = upstream, downstream = downstream) +}) + +setMethod("TSSlogo", signature("ConsensusClusters"), + function (x, upstream = 10, downstream = 10) { + TSSlogo(object$dominant_ctss |> as("CTSS"), upstream = upstream, downstream = downstream) +}) + +#' @rdname TSSlogo + +setMethod("TSSlogo", signature("CTSS"), + function (x, upstream = 10, downstream = 10) { + .TSSlogo(object, upstream = upstream, downstream = downstream) +}) + +.TSSlogo <- function(x, upstream=10, downstream=10) { + if (! requireNamespace("ggseqlogo")) + stop("This function requires the ", dQuote("ggseqlogo"), " package; please install it.") + # Extract sequences + upstreamRanges <- + promoters(x = x, upstream = upstream, downstream = downstream) |> + suppressWarnings() # This warns about off-genome coordinates, but we will fix this below. + + # Discard ranges that would be trimmed + upstreamRanges_trimmed <- upstreamRanges |> trim() + upstreamRanges <- upstreamRanges[width(upstreamRanges) == width(upstreamRanges_trimmed)] + + # Extract sequences + bsgenome <- getBSgenome(unique(genome(x))) + upstreamSeq <- getSeq(bsgenome, upstreamRanges) + + # Plot sequence logo + letter_counts <- consensusMatrix(upstreamSeq) + probs <- prop.table(letter_counts[1:4,], 2) + gg <- ggseqlogo::ggseqlogo(probs) + # Circumvent "Scale for x is already present." warning. + gg$scales$scales[[2]]$breaks <- seq( 1 , upstream + downstream, by = 1) + gg$scales$scales[[2]]$labels <- seq(1 - upstream, downstream, by = 1) + gg$scales$scales[[1]]$breaks <- seq( 1 , upstream + downstream, by = 1) + gg$scales$scales[[1]]$labels <- seq(1 - upstream, downstream, by = 1) + gg +} \ No newline at end of file diff --git a/man/TSSlogo.Rd b/man/TSSlogo.Rd new file mode 100644 index 0000000..5cd4b47 --- /dev/null +++ b/man/TSSlogo.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/TSSlogo.R +\name{TSSlogo} +\alias{TSSlogo} +\alias{TSSlogo,CAGEexp-method} +\alias{TSSlogo,TagClusters-method} +\alias{TSSlogo,CTSS-method} +\title{TSS logo} +\usage{ +TSSlogo(x, upstream = 10, downstream = 10) + +\S4method{TSSlogo}{CAGEexp}(x, upstream = 10, downstream = 10) + +\S4method{TSSlogo}{TagClusters}(x, upstream = 10, downstream = 10) + +\S4method{TSSlogo}{CTSS}(x, upstream = 10, downstream = 10) +} +\arguments{ +\item{x}{A \code{\link{CTSS}}, a \code{\link{TagClusters}} or a \code{\link{ConsensusClusters}} object.} + +\item{upstream}{Number of bases to plot upstream the TSS.} + +\item{downstream}{Number of bases to plot downstream the TSS, including the +TSS itself.} +} +\value{ +A \code{\link[ggplot2:ggplot]{ggplot2::ggplot}} object showing the sequence logo. The +coordinates displayed are negative for upstream sequences and positive +downstream. The position of the TSS is set to 1. +} +\description{ +Plot the sequence logo of the region flanking the TSS. When this function +is given \emph{tag clusters} or \emph{consensus clusters}, it uses the \emph{dominant peak} +as the transcription start site. +} +\details{ +This function will only work if the \code{\link{CAGEexp}} object was built with a +\code{\link{BSgenome}} package, as it needs to extract genomic sequences. +} +\examples{ +TSSlogo(exampleCAGEexp|>consensusClustersGR(), 20, 10) + +} +\seealso{ +Other CAGEr plot functions: +\code{\link{hanabiPlot}()}, +\code{\link{plotAnnot}()}, +\code{\link{plotCorrelation}()}, +\code{\link{plotExpressionProfiles}()}, +\code{\link{plotInterquantileWidth}()}, +\code{\link{plotReverseCumulatives}()} +} +\author{ +Charles Plessy +} +\concept{CAGEr TSS functions} +\concept{CAGEr plot functions} diff --git a/man/hanabiPlot.Rd b/man/hanabiPlot.Rd index 423829b..8f45459 100644 --- a/man/hanabiPlot.Rd +++ b/man/hanabiPlot.Rd @@ -52,6 +52,7 @@ Other CAGEr richness functions: \code{\link{plot.hanabi}()} Other CAGEr plot functions: +\code{\link{TSSlogo}()}, \code{\link{plotAnnot}()}, \code{\link{plotCorrelation}()}, \code{\link{plotExpressionProfiles}()}, diff --git a/man/plotAnnot.Rd b/man/plotAnnot.Rd index ad1f95f..eac30af 100644 --- a/man/plotAnnot.Rd +++ b/man/plotAnnot.Rd @@ -72,6 +72,7 @@ Other CAGEr annotation functions: \code{\link{ranges2names}()} Other CAGEr plot functions: +\code{\link{TSSlogo}()}, \code{\link{hanabiPlot}()}, \code{\link{plotCorrelation}()}, \code{\link{plotExpressionProfiles}()}, diff --git a/man/plotCorrelation.Rd b/man/plotCorrelation.Rd index 6f7c832..c728d8f 100644 --- a/man/plotCorrelation.Rd +++ b/man/plotCorrelation.Rd @@ -179,6 +179,7 @@ plotCorrelation2(exampleCAGEexp, what = "consensusClusters", value = "normalized } \seealso{ Other CAGEr plot functions: +\code{\link{TSSlogo}()}, \code{\link{hanabiPlot}()}, \code{\link{plotAnnot}()}, \code{\link{plotExpressionProfiles}()}, diff --git a/man/plotExpressionProfiles.Rd b/man/plotExpressionProfiles.Rd index e41492d..8c0ff59 100644 --- a/man/plotExpressionProfiles.Rd +++ b/man/plotExpressionProfiles.Rd @@ -32,6 +32,7 @@ exampleCAGEexp |> plotExpressionProfiles("consensusClusters") } \seealso{ Other CAGEr plot functions: +\code{\link{TSSlogo}()}, \code{\link{hanabiPlot}()}, \code{\link{plotAnnot}()}, \code{\link{plotCorrelation}()}, diff --git a/man/plotInterquantileWidth.Rd b/man/plotInterquantileWidth.Rd index a4a04a1..ace83a0 100644 --- a/man/plotInterquantileWidth.Rd +++ b/man/plotInterquantileWidth.Rd @@ -63,6 +63,7 @@ plotInterquantileWidth( exampleCAGEexp, clusters = "consensusClusters" } \seealso{ Other CAGEr plot functions: +\code{\link{TSSlogo}()}, \code{\link{hanabiPlot}()}, \code{\link{plotAnnot}()}, \code{\link{plotCorrelation}()}, diff --git a/man/plotReverseCumulatives.Rd b/man/plotReverseCumulatives.Rd index ddd8dfb..13b6c2b 100644 --- a/man/plotReverseCumulatives.Rd +++ b/man/plotReverseCumulatives.Rd @@ -101,6 +101,7 @@ expression data: constructing the human and mouse promoterome with deepCAGE data \code{\link{normalizeTagCount}} Other CAGEr plot functions: +\code{\link{TSSlogo}()}, \code{\link{hanabiPlot}()}, \code{\link{plotAnnot}()}, \code{\link{plotCorrelation}()}, diff --git a/vignettes/CAGEexp.Rmd b/vignettes/CAGEexp.Rmd index 8495090..f493999 100644 --- a/vignettes/CAGEexp.Rmd +++ b/vignettes/CAGEexp.Rmd @@ -340,6 +340,19 @@ corr.m <- plotCorrelation2( ce, samples = "all" , method = "pearson") ``` +### Sequence distribution at the transcription start site. + +The presence of the core promoter motifs can be checked with the `TSSlogo()` +function, provided that the _CAGEexp_ object was built with a _BSgenome_ +package allowing access to the sequence flanking the transcription start sites. + +```{r TSSlogo, fig.cap="Sequence logo at the transcription start site"} +TSSlogo(CTSScoordinatesGR(ce) |> subset(annotation == "promoter"), upstream = 35) +``` + +The `TSSlogo()` function can also be used later. When passed _tag clusters_ +or _consensus clusters_, it will automatically center the regions on their +_dominant peak_. Merging of replicates ---------------------