From 6f4842da5800914f22be0a4cb04830fd553aaabf Mon Sep 17 00:00:00 2001 From: js2264 Date: Wed, 13 Sep 2023 00:11:15 +0200 Subject: [PATCH] feat: count and filter overlaps --- DESCRIPTION | 2 + NAMESPACE | 14 +++ R/count-overlaps.R | 137 ++++++++++++++++++++++++++ R/filter-overlaps.R | 139 +++++++++++++++++++++++++++ R/find-overlaps.R | 11 ++- _pkgdown.yml | 2 + man/ginteractions-count-overlaps.Rd | 88 +++++++++++++++++ man/ginteractions-filter-overlaps.Rd | 82 ++++++++++++++++ man/ginteractions-find-overlaps.Rd | 16 ++- 9 files changed, 487 insertions(+), 4 deletions(-) create mode 100644 R/count-overlaps.R create mode 100644 R/filter-overlaps.R create mode 100644 man/ginteractions-count-overlaps.Rd create mode 100644 man/ginteractions-filter-overlaps.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 06f7dce..0491485 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -52,7 +52,9 @@ Collate: 'GroupedGInteractions-class.R' 'anchor.R' 'arrange.R' + 'count-overlaps.R' 'count.R' + 'filter-overlaps.R' 'filter.R' 'find-overlaps.R' 'flank.R' diff --git a/NAMESPACE b/NAMESPACE index 266fa70..5cfae4d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,7 +15,15 @@ S3method(arrange,GInteractions) S3method(as_ginteractions,data.frame) S3method(as_ginteractions,default) S3method(count,GInteractions) +S3method(count_overlaps,GInteractions) +S3method(count_overlaps,PinnedGInteractions) +S3method(count_overlaps_directed,GInteractions) +S3method(count_overlaps_directed,PinnedGInteractions) S3method(filter,GInteractions) +S3method(filter_by_non_overlaps,GInteractions) +S3method(filter_by_non_overlaps,PinnedGInteractions) +S3method(filter_by_overlaps,GInteractions) +S3method(filter_by_overlaps,PinnedGInteractions) S3method(find_overlaps,GInteractions) S3method(find_overlaps,PinnedGInteractions) S3method(find_overlaps_directed,GInteractions) @@ -149,8 +157,10 @@ importFrom(GenomicRanges,GRanges) importFrom(GenomicRanges,granges) importFrom(IRanges,IRanges) importFrom(IRanges,ranges) +importFrom(IRanges,subsetByOverlaps) importFrom(InteractionSet,GInteractions) importFrom(InteractionSet,anchors) +importFrom(InteractionSet,countOverlaps) importFrom(InteractionSet,findOverlaps) importFrom(InteractionSet,regions) importFrom(S4Vectors,"first<-") @@ -202,6 +212,10 @@ importFrom(plyranges,anchor_5p) importFrom(plyranges,anchor_center) importFrom(plyranges,anchor_end) importFrom(plyranges,anchor_start) +importFrom(plyranges,count_overlaps) +importFrom(plyranges,count_overlaps_directed) +importFrom(plyranges,filter_by_non_overlaps) +importFrom(plyranges,filter_by_overlaps) importFrom(plyranges,find_overlaps) importFrom(plyranges,find_overlaps_directed) importFrom(plyranges,flank_downstream) diff --git a/R/count-overlaps.R b/R/count-overlaps.R new file mode 100644 index 0000000..e20748e --- /dev/null +++ b/R/count-overlaps.R @@ -0,0 +1,137 @@ +#' Count overlaps between a query GInteractions and a GRanges +#' +#' @section Pinned `GInteractions`: +#' +#' When using `count_overlaps()` with a `PinnedGInteractions` object, +#' only the pinned anchors are used to check for overlap with `y`. +#' This is equivalent to specifying `use.region="both"` in +#' \code{\href{https://bioconductor.org/packages/release/bioc/vignettes/InteractionSet/inst/doc/interactions.html#27_Overlap_methods}{InteractionSet::countOverlaps()}}. +#' +#' @param x A (Pinned)GInteractions object +#' @param y A GRanges object +#' @param maxgap,minoverlap See \code{?\link[GenomicRanges]{countOverlaps}} +#' in the \pkg{GenomicRanges} package for a description of these arguments +#' +#' @importFrom plyranges count_overlaps +#' @importFrom plyranges count_overlaps_directed +#' @importFrom InteractionSet countOverlaps +#' +#' @return An integer vector of same length as x. +#' +#' @name ginteractions-count-overlaps +#' +#' @examples +#' gi <- read.table(text = " +#' chr1 11 20 - chr1 21 30 + +#' chr1 11 20 - chr1 51 55 + +#' chr1 21 30 - chr1 51 55 + +#' chr1 21 30 - chr2 51 60 +", +#' col.names = c("seqnames1", "start1", "end1", "strand1", "seqnames2", "start2", "end2", "strand2") +#' ) |> as_ginteractions() |> mutate(id = 1:4, type = 'gi') +#' +#' gr <- GenomicRanges::GRanges(c("chr1:20-30:+", "chr2:55-65:-")) |> plyranges::mutate(id = 1:2, type = 'gr') +#' +#' gi +#' +#' gr +#' +#' #################################################################### +#' # 1. Count overlaps between GInteractions and a subject GRanges +#' #################################################################### +#' +#' count_overlaps(gi, gr) +#' +#' count_overlaps_directed(gi, gr) +#' +#' #################################################################### +#' # 2. Count overlaps between PinnedGInteractions and a subject GRanges +#' #################################################################### +#' +#' gi |> pin_by("first") |> count_overlaps(gr) +#' +#' gi |> pin_by("second") |> count_overlaps(gr) +#' +#' gi |> pin_by("first") |> count_overlaps_directed(gr) +#' +#' gi |> pin_by("second") |> count_overlaps_directed(gr) +NULL + +#' @rdname ginteractions-count-overlaps +#' @export +count_overlaps.PinnedGInteractions <- function( + x, y, maxgap = -1L, minoverlap = 0L +) { + + InteractionSet::countOverlaps( + query = unpin(x), + subject = y, + maxgap = maxgap, + minoverlap = minoverlap, + type = 'any', + ignore.strand = TRUE, + use.region = switch( + as.character(pin(x)), + "1" = "first", + "2" = "second" + ) + ) + +} + +#' @rdname ginteractions-count-overlaps +#' @export +count_overlaps.GInteractions <- function( + x, y, maxgap = -1L, minoverlap = 0L, suffix = c(".x", ".y") +) { + + InteractionSet::countOverlaps( + query = x, + subject = y, + maxgap = maxgap, + minoverlap = minoverlap, + type = 'any', + ignore.strand = TRUE, + use.region = 'both' + ) + +} + +#' @rdname ginteractions-count-overlaps +#' @export +count_overlaps_directed.PinnedGInteractions <- function( + x, y, maxgap = -1L, minoverlap = 0L, suffix = c(".x", ".y") +) { + + InteractionSet::countOverlaps( + query = unpin(x), + subject = y, + maxgap = maxgap, + minoverlap = minoverlap, + type = 'any', + ignore.strand = FALSE, + use.region = switch( + as.character(pin(x)), + "1" = "first", + "2" = "second" + ) + ) + +} + +#' @rdname ginteractions-count-overlaps +#' @export +count_overlaps_directed.GInteractions <- function( + x, y, maxgap = -1L, minoverlap = 0L, suffix = c(".x", ".y") +) { + + InteractionSet::countOverlaps( + query = x, + subject = y, + maxgap = maxgap, + minoverlap = minoverlap, + type = 'any', + ignore.strand = FALSE, + use.region = 'both' + ) + +} diff --git a/R/filter-overlaps.R b/R/filter-overlaps.R new file mode 100644 index 0000000..3d45b17 --- /dev/null +++ b/R/filter-overlaps.R @@ -0,0 +1,139 @@ +#' Filter GInteractions overlapping with a GRanges +#' +#' @section Pinned `GInteractions`: +#' +#' When using `filter_by_overlaps()` with a `PinnedGInteractions` object, +#' only the pinned anchors are used to check for overlap with `y`. +#' This is equivalent to specifying `use.region="both"` in +#' \code{\href{https://bioconductor.org/packages/release/bioc/vignettes/InteractionSet/inst/doc/interactions.html#27_Overlap_methods}{InteractionSet::countOverlaps()}}. +#' +#' @param x A (Pinned)GInteractions object +#' @param y A GRanges object +#' @param maxgap,minoverlap See \code{?\link[GenomicRanges]{countOverlaps}} +#' in the \pkg{GenomicRanges} package for a description of these arguments +#' +#' @importFrom plyranges filter_by_overlaps +#' @importFrom plyranges filter_by_non_overlaps +#' @importFrom IRanges subsetByOverlaps +#' +#' @return An integer vector of same length as x. +#' +#' @name ginteractions-filter-overlaps +#' +#' @examples +#' gi <- read.table(text = " +#' chr1 11 20 - chr1 21 30 + +#' chr1 11 20 - chr1 51 55 + +#' chr1 21 30 - chr1 51 55 + +#' chr1 21 30 - chr2 51 60 +", +#' col.names = c("seqnames1", "start1", "end1", "strand1", "seqnames2", "start2", "end2", "strand2") +#' ) |> as_ginteractions() |> mutate(id = 1:4, type = 'gi') +#' +#' gr <- GenomicRanges::GRanges(c("chr1:20-30:+", "chr2:55-65:-")) |> plyranges::mutate(id = 1:2, type = 'gr') +#' +#' gi +#' +#' gr +#' +#' #################################################################### +#' # 1. Filter GInteractions overlapping with a subject GRanges +#' #################################################################### +#' +#' filter_by_overlaps(gi, gr) +#' +#' filter_by_non_overlaps(gi, gr) +#' +#' #################################################################### +#' # 2. Filter PinnedGInteractions overlapping with a subject GRanges +#' #################################################################### +#' +#' gi |> pin_by("first") |> filter_by_overlaps(gr) +#' +#' gi |> pin_by("first") |> filter_by_non_overlaps(gr) +#' +#' gi |> pin_by("second") |> filter_by_overlaps(gr) +#' +#' gi |> pin_by("second") |> filter_by_non_overlaps(gr) +NULL + +#' @rdname ginteractions-filter-overlaps +#' @export +filter_by_overlaps.PinnedGInteractions <- function( + x, y, maxgap = -1L, minoverlap = 0L +) { + + IRanges::subsetByOverlaps( + unpin(x), + y, + maxgap = maxgap, + minoverlap = minoverlap, + type = 'any', + ignore.strand = TRUE, + use.region = switch( + as.character(pin(x)), + "1" = "first", + "2" = "second" + ) + ) + +} + +#' @rdname ginteractions-filter-overlaps +#' @export +filter_by_overlaps.GInteractions <- function( + x, y, maxgap = -1L, minoverlap = 0L, suffix = c(".x", ".y") +) { + + IRanges::subsetByOverlaps( + x, + y, + maxgap = maxgap, + minoverlap = minoverlap, + type = 'any', + ignore.strand = TRUE, + use.region = 'both' + ) + +} + +#' @rdname ginteractions-filter-overlaps +#' @export +filter_by_non_overlaps.PinnedGInteractions <- function( + x, y, maxgap = -1L, minoverlap = 0L +) { + + IRanges::subsetByOverlaps( + unpin(x), + y, + maxgap = maxgap, + minoverlap = minoverlap, + type = 'any', + ignore.strand = TRUE, + use.region = switch( + as.character(pin(x)), + "1" = "first", + "2" = "second" + ), + invert = TRUE + ) + +} + +#' @rdname ginteractions-filter-overlaps +#' @export +filter_by_non_overlaps.GInteractions <- function( + x, y, maxgap = -1L, minoverlap = 0L, suffix = c(".x", ".y") +) { + + IRanges::subsetByOverlaps( + x, + y, + maxgap = maxgap, + minoverlap = minoverlap, + type = 'any', + ignore.strand = TRUE, + use.region = 'both', + invert = TRUE + ) + +} diff --git a/R/find-overlaps.R b/R/find-overlaps.R index 207684c..088c6c5 100644 --- a/R/find-overlaps.R +++ b/R/find-overlaps.R @@ -1,6 +1,6 @@ #' Find overlaps between a query GInteractions and a GRanges #' -#' @details +#' @section Rationale: #' #' `find_overlaps()` will search for any overlap between `GInteractions` #' in `x` and `GRanges` in `y`. It will return a `GInteractions` object of length @@ -8,6 +8,13 @@ #' have additional metadata columns corresponding to the metadata from `y`. #' `find_overlaps_directed()` takes the strandness of each object into account. #' +#' @section Pinned `GInteractions`: +#' +#' When using `find_overlaps()` with a `PinnedGInteractions` object, +#' only the pinned anchors are used to check for overlap with `y`. +#' This is equivalent to specifying `use.region="both"` in +#' \code{\href{https://bioconductor.org/packages/release/bioc/vignettes/InteractionSet/inst/doc/interactions.html#27_Overlap_methods}{InteractionSet::findOverlaps()}}. +#' #' @param x A (Pinned)GInteractions object #' @param y A GRanges object #' @param maxgap,minoverlap See \code{?\link[GenomicRanges]{findOverlaps}} @@ -49,7 +56,7 @@ #' find_overlaps_directed(gi, gr) #' #' #################################################################### -#' # 1. Find overlaps between PinnedGInteractions and a subject GRanges +#' # 2. Find overlaps between PinnedGInteractions and a subject GRanges #' #################################################################### #' #' gi |> pin_by("first") |> find_overlaps(gr) diff --git a/_pkgdown.yml b/_pkgdown.yml index b9a1e18..0ed905a 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -37,6 +37,8 @@ reference: - title: "Overlapping GInteractions" contents: - ginteractions-find-overlaps + - ginteractions-count-overlaps + - ginteractions-filter-overlaps - title: "Pinning GInteractions" contents: - pin diff --git a/man/ginteractions-count-overlaps.Rd b/man/ginteractions-count-overlaps.Rd new file mode 100644 index 0000000..7d2e3ca --- /dev/null +++ b/man/ginteractions-count-overlaps.Rd @@ -0,0 +1,88 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/count-overlaps.R +\name{ginteractions-count-overlaps} +\alias{ginteractions-count-overlaps} +\alias{count_overlaps.PinnedGInteractions} +\alias{count_overlaps.GInteractions} +\alias{count_overlaps_directed.PinnedGInteractions} +\alias{count_overlaps_directed.GInteractions} +\title{Count overlaps between a query GInteractions and a GRanges} +\usage{ +\method{count_overlaps}{PinnedGInteractions}(x, y, maxgap = -1L, minoverlap = 0L) + +\method{count_overlaps}{GInteractions}(x, y, maxgap = -1L, minoverlap = 0L, suffix = c(".x", ".y")) + +\method{count_overlaps_directed}{PinnedGInteractions}( + x, + y, + maxgap = -1L, + minoverlap = 0L, + suffix = c(".x", ".y") +) + +\method{count_overlaps_directed}{GInteractions}( + x, + y, + maxgap = -1L, + minoverlap = 0L, + suffix = c(".x", ".y") +) +} +\arguments{ +\item{x}{A (Pinned)GInteractions object} + +\item{y}{A GRanges object} + +\item{maxgap, minoverlap}{See \code{?\link[GenomicRanges]{countOverlaps}} +in the \pkg{GenomicRanges} package for a description of these arguments} +} +\value{ +An integer vector of same length as x. +} +\description{ +Count overlaps between a query GInteractions and a GRanges +} +\section{Pinned \code{GInteractions}}{ + + +When using \code{count_overlaps()} with a \code{PinnedGInteractions} object, +only the pinned anchors are used to check for overlap with \code{y}. +This is equivalent to specifying \code{use.region="both"} in +\code{\href{https://bioconductor.org/packages/release/bioc/vignettes/InteractionSet/inst/doc/interactions.html#27_Overlap_methods}{InteractionSet::countOverlaps()}}. +} + +\examples{ +gi <- read.table(text = " + chr1 11 20 - chr1 21 30 + + chr1 11 20 - chr1 51 55 + + chr1 21 30 - chr1 51 55 + + chr1 21 30 - chr2 51 60 +", + col.names = c("seqnames1", "start1", "end1", "strand1", "seqnames2", "start2", "end2", "strand2") +) |> as_ginteractions() |> mutate(id = 1:4, type = 'gi') + +gr <- GenomicRanges::GRanges(c("chr1:20-30:+", "chr2:55-65:-")) |> plyranges::mutate(id = 1:2, type = 'gr') + +gi + +gr + +#################################################################### +# 1. Count overlaps between GInteractions and a subject GRanges +#################################################################### + +count_overlaps(gi, gr) + +count_overlaps_directed(gi, gr) + +#################################################################### +# 2. Count overlaps between PinnedGInteractions and a subject GRanges +#################################################################### + +gi |> pin_by("first") |> count_overlaps(gr) + +gi |> pin_by("second") |> count_overlaps(gr) + +gi |> pin_by("first") |> count_overlaps_directed(gr) + +gi |> pin_by("second") |> count_overlaps_directed(gr) +} diff --git a/man/ginteractions-filter-overlaps.Rd b/man/ginteractions-filter-overlaps.Rd new file mode 100644 index 0000000..98ee618 --- /dev/null +++ b/man/ginteractions-filter-overlaps.Rd @@ -0,0 +1,82 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/filter-overlaps.R +\name{ginteractions-filter-overlaps} +\alias{ginteractions-filter-overlaps} +\alias{filter_by_overlaps.PinnedGInteractions} +\alias{filter_by_overlaps.GInteractions} +\alias{filter_by_non_overlaps.PinnedGInteractions} +\alias{filter_by_non_overlaps.GInteractions} +\title{Filter GInteractions overlapping with a GRanges} +\usage{ +\method{filter_by_overlaps}{PinnedGInteractions}(x, y, maxgap = -1L, minoverlap = 0L) + +\method{filter_by_overlaps}{GInteractions}(x, y, maxgap = -1L, minoverlap = 0L, suffix = c(".x", ".y")) + +\method{filter_by_non_overlaps}{PinnedGInteractions}(x, y, maxgap = -1L, minoverlap = 0L) + +\method{filter_by_non_overlaps}{GInteractions}( + x, + y, + maxgap = -1L, + minoverlap = 0L, + suffix = c(".x", ".y") +) +} +\arguments{ +\item{x}{A (Pinned)GInteractions object} + +\item{y}{A GRanges object} + +\item{maxgap, minoverlap}{See \code{?\link[GenomicRanges]{countOverlaps}} +in the \pkg{GenomicRanges} package for a description of these arguments} +} +\value{ +An integer vector of same length as x. +} +\description{ +Filter GInteractions overlapping with a GRanges +} +\section{Pinned \code{GInteractions}}{ + + +When using \code{filter_by_overlaps()} with a \code{PinnedGInteractions} object, +only the pinned anchors are used to check for overlap with \code{y}. +This is equivalent to specifying \code{use.region="both"} in +\code{\href{https://bioconductor.org/packages/release/bioc/vignettes/InteractionSet/inst/doc/interactions.html#27_Overlap_methods}{InteractionSet::countOverlaps()}}. +} + +\examples{ +gi <- read.table(text = " + chr1 11 20 - chr1 21 30 + + chr1 11 20 - chr1 51 55 + + chr1 21 30 - chr1 51 55 + + chr1 21 30 - chr2 51 60 +", + col.names = c("seqnames1", "start1", "end1", "strand1", "seqnames2", "start2", "end2", "strand2") +) |> as_ginteractions() |> mutate(id = 1:4, type = 'gi') + +gr <- GenomicRanges::GRanges(c("chr1:20-30:+", "chr2:55-65:-")) |> plyranges::mutate(id = 1:2, type = 'gr') + +gi + +gr + +#################################################################### +# 1. Filter GInteractions overlapping with a subject GRanges +#################################################################### + +filter_by_overlaps(gi, gr) + +filter_by_non_overlaps(gi, gr) + +#################################################################### +# 2. Filter PinnedGInteractions overlapping with a subject GRanges +#################################################################### + +gi |> pin_by("first") |> filter_by_overlaps(gr) + +gi |> pin_by("first") |> filter_by_non_overlaps(gr) + +gi |> pin_by("second") |> filter_by_overlaps(gr) + +gi |> pin_by("second") |> filter_by_non_overlaps(gr) +} diff --git a/man/ginteractions-find-overlaps.Rd b/man/ginteractions-find-overlaps.Rd index a9f984c..fb73b68 100644 --- a/man/ginteractions-find-overlaps.Rd +++ b/man/ginteractions-find-overlaps.Rd @@ -46,13 +46,25 @@ in \code{x} that overlap \code{y}. \description{ Find overlaps between a query GInteractions and a GRanges } -\details{ +\section{Rationale}{ + + \code{find_overlaps()} will search for any overlap between \code{GInteractions} in \code{x} and \code{GRanges} in \code{y}. It will return a \code{GInteractions} object of length equal to the number of times \code{x} overlaps \code{y}. This \code{GInteractions} will have additional metadata columns corresponding to the metadata from \code{y}. \code{find_overlaps_directed()} takes the strandness of each object into account. } + +\section{Pinned \code{GInteractions}}{ + + +When using \code{find_overlaps()} with a \code{PinnedGInteractions} object, +only the pinned anchors are used to check for overlap with \code{y}. +This is equivalent to specifying \code{use.region="both"} in +\code{\href{https://bioconductor.org/packages/release/bioc/vignettes/InteractionSet/inst/doc/interactions.html#27_Overlap_methods}{InteractionSet::findOverlaps()}}. +} + \examples{ gi <- read.table(text = " chr1 11 20 - chr1 21 30 + @@ -77,7 +89,7 @@ find_overlaps(gi, gr) find_overlaps_directed(gi, gr) #################################################################### -# 1. Find overlaps between PinnedGInteractions and a subject GRanges +# 2. Find overlaps between PinnedGInteractions and a subject GRanges #################################################################### gi |> pin_by("first") |> find_overlaps(gr)