diff --git a/DESCRIPTION b/DESCRIPTION index 853be38..06f7dce 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -54,6 +54,7 @@ Collate: 'arrange.R' 'count.R' 'filter.R' + 'find-overlaps.R' 'flank.R' 'ginteractions-construct.R' 'ginteractions-env.R' diff --git a/NAMESPACE b/NAMESPACE index 6102e0b..266fa70 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -16,6 +16,10 @@ S3method(as_ginteractions,data.frame) S3method(as_ginteractions,default) S3method(count,GInteractions) S3method(filter,GInteractions) +S3method(find_overlaps,GInteractions) +S3method(find_overlaps,PinnedGInteractions) +S3method(find_overlaps_directed,GInteractions) +S3method(find_overlaps_directed,PinnedGInteractions) S3method(flank_downstream,PinnedGInteractions) S3method(flank_downstream,Ranges) S3method(flank_left,PinnedGInteractions) @@ -68,6 +72,8 @@ export(count) export(end1) export(end2) export(filter) +export(find_overlaps) +export(find_overlaps_directed) export(flank_downstream) export(flank_left) export(flank_right) @@ -140,10 +146,12 @@ importFrom(BiocGenerics,width) importFrom(GenomeInfoDb,seqinfo) importFrom(GenomeInfoDb,seqnames) importFrom(GenomicRanges,GRanges) +importFrom(GenomicRanges,granges) importFrom(IRanges,IRanges) importFrom(IRanges,ranges) importFrom(InteractionSet,GInteractions) importFrom(InteractionSet,anchors) +importFrom(InteractionSet,findOverlaps) importFrom(InteractionSet,regions) importFrom(S4Vectors,"first<-") importFrom(S4Vectors,"mcols<-") @@ -194,6 +202,8 @@ importFrom(plyranges,anchor_5p) importFrom(plyranges,anchor_center) importFrom(plyranges,anchor_end) importFrom(plyranges,anchor_start) +importFrom(plyranges,find_overlaps) +importFrom(plyranges,find_overlaps_directed) importFrom(plyranges,flank_downstream) importFrom(plyranges,flank_left) importFrom(plyranges,flank_right) diff --git a/R/find-overlaps.R b/R/find-overlaps.R new file mode 100644 index 0000000..207684c --- /dev/null +++ b/R/find-overlaps.R @@ -0,0 +1,210 @@ +#' Find overlaps between a query GInteractions and a GRanges +#' +#' @details +#' +#' `find_overlaps()` will search for any overlap between `GInteractions` +#' in `x` and `GRanges` in `y`. It will return a `GInteractions` object of length +#' equal to the number of times `x` overlaps `y`. This `GInteractions` will +#' have additional metadata columns corresponding to the metadata from `y`. +#' `find_overlaps_directed()` takes the strandness of each object into account. +#' +#' @param x A (Pinned)GInteractions object +#' @param y A GRanges object +#' @param maxgap,minoverlap See \code{?\link[GenomicRanges]{findOverlaps}} +#' in the \pkg{GenomicRanges} package for a description of these arguments +#' @param suffix Suffix to add to metadata +#' columns (character vector of length 2, default to `c(".x", ".y")`). +#' +#' @importFrom GenomicRanges granges +#' @importFrom plyranges find_overlaps +#' @importFrom plyranges find_overlaps_directed +#' @importFrom InteractionSet findOverlaps +#' +#' @return a GInteractions object with rows corresponding to the GInteractions +#' in `x` that overlap `y`. +#' +#' @name ginteractions-find-overlaps +#' +#' @examples +#' gi <- read.table(text = " +#' chr1 11 20 - chr1 21 30 + +#' chr1 11 20 - chr1 51 55 + +#' chr1 21 30 - chr1 51 55 + +#' chr1 21 30 - chr2 51 60 +", +#' col.names = c("seqnames1", "start1", "end1", "strand1", "seqnames2", "start2", "end2", "strand2") +#' ) |> as_ginteractions() |> mutate(id = 1:4, type = 'gi') +#' +#' gr <- GenomicRanges::GRanges(c("chr1:20-30:+", "chr2:55-65:-")) |> plyranges::mutate(id = 1:2, type = 'gr') +#' +#' gi +#' +#' gr +#' +#' #################################################################### +#' # 1. Find overlaps between GInteractions and a subject GRanges +#' #################################################################### +#' +#' find_overlaps(gi, gr) +#' +#' find_overlaps_directed(gi, gr) +#' +#' #################################################################### +#' # 1. Find overlaps between PinnedGInteractions and a subject GRanges +#' #################################################################### +#' +#' gi |> pin_by("first") |> find_overlaps(gr) +#' +#' gi |> pin_by("second") |> find_overlaps(gr) +#' +#' gi |> pin_by("first") |> find_overlaps_directed(gr) +#' +#' gi |> pin_by("second") |> find_overlaps_directed(gr) +NULL + +#' @rdname ginteractions-find-overlaps +#' @export +find_overlaps.PinnedGInteractions <- function( + x, y, maxgap = -1L, minoverlap = 0L, suffix = c(".x", ".y") +) { + + hits <- InteractionSet::findOverlaps( + query = y, + subject = unpin(x), + maxgap = maxgap, + minoverlap = minoverlap, + type = 'any', + select = 'all', + ignore.strand = TRUE, + use.region = switch( + as.character(pin(x)), + "1" = "first", + "2" = "second" + ) + ) + left <- unpin(x)[S4Vectors::subjectHits(hits), ] + right <- y[S4Vectors::queryHits(hits), ] + mcols(left) <- .mcols_overlaps_update(left, right, suffix) + return(left) + +} + +#' @rdname ginteractions-find-overlaps +#' @export +find_overlaps.GInteractions <- function( + x, y, maxgap = -1L, minoverlap = 0L, suffix = c(".x", ".y") +) { + + hits <- InteractionSet::findOverlaps( + query = y, + subject = x, + maxgap = maxgap, + minoverlap = minoverlap, + type = 'any', + select = 'all', + ignore.strand = TRUE, + use.region = 'both' + ) + left <- x[S4Vectors::subjectHits(hits), ] + right <- y[S4Vectors::queryHits(hits), ] + mcols(left) <- .mcols_overlaps_update(left, right, suffix) + return(left) + +} + +#' @rdname ginteractions-find-overlaps +#' @export +find_overlaps_directed.PinnedGInteractions <- function( + x, y, maxgap = -1L, minoverlap = 0L, suffix = c(".x", ".y") +) { + + hits <- InteractionSet::findOverlaps( + query = y, + subject = unpin(x), + maxgap = maxgap, + minoverlap = minoverlap, + type = 'any', + select = 'all', + ignore.strand = FALSE, + use.region = switch( + as.character(pin(x)), + "1" = "first", + "2" = "second" + ) + ) + left <- unpin(x)[S4Vectors::subjectHits(hits), ] + right <- y[S4Vectors::queryHits(hits), ] + mcols(left) <- .mcols_overlaps_update(left, right, suffix) + return(left) + +} + +#' @rdname ginteractions-find-overlaps +#' @export +find_overlaps_directed.GInteractions <- function( + x, y, maxgap = -1L, minoverlap = 0L, suffix = c(".x", ".y") +) { + + hits <- InteractionSet::findOverlaps( + query = y, + subject = x, + maxgap = maxgap, + minoverlap = minoverlap, + type = 'any', + select = 'all', + ignore.strand = FALSE, + use.region = 'both' + ) + left <- x[S4Vectors::subjectHits(hits), ] + right <- y[S4Vectors::queryHits(hits), ] + mcols(left) <- .mcols_overlaps_update(left, right, suffix) + return(left) + +} + +.mcols_overlaps_update <- function(left, right, suffix, return_data_frame = FALSE) { + + left_names <- names(mcols(left)) + right_names <- names(mcols(right)) + common_name <- intersect(left_names, right_names) + lname_inx <- left_names %in% common_name + rname_inx <- right_names %in% common_name + if (any(lname_inx)) { + names(mcols(left))[lname_inx] <- paste0(left_names[lname_inx], suffix[1]) + } + + if (any(rname_inx)) { + names(mcols(right))[rname_inx] <- paste0(right_names[rname_inx], suffix[2]) + } + + if (!is.null(mcols(left))) { + additional_mcols <- mcols(left) + } else { + additional_mcols <- NULL + } + + if (!is.null(mcols(right))) { + if (is.null(additional_mcols)) { + additional_mcols <- mcols(right) + } else { + additional_mcols <- cbind(additional_mcols, mcols(right)) + } + } + + if (return_data_frame) { + if (is(left, "GenomicRanges")) { + ranges_df <- DataFrame(granges.x = GenomicRanges::granges(left), + granges.y = GenomicRanges::granges(right)) + } else { + ranges_df <- DataFrame(ranges.x = ranges(left), + ranges.y = ranges(right)) + } + names(ranges_df) <- paste0(gsub("\\..*", "" , names(ranges_df)), suffix) + if (!is.null(additional_mcols)) { + additional_mcols <- cbind(ranges_df, additional_mcols) + } else { + return(ranges_df) + } + } + + additional_mcols +} diff --git a/R/reexports-plyranges.R b/R/reexports-plyranges.R index 576db9d..2009030 100644 --- a/R/reexports-plyranges.R +++ b/R/reexports-plyranges.R @@ -37,3 +37,13 @@ plyranges::anchor_5p #' @importFrom plyranges stretch #' @export plyranges::stretch + +#' @rdname reexports +#' @importFrom plyranges find_overlaps +#' @export +plyranges::find_overlaps + +#' @rdname reexports +#' @importFrom plyranges find_overlaps_directed +#' @export +plyranges::find_overlaps_directed diff --git a/_pkgdown.yml b/_pkgdown.yml index e8b1c8a..b9a1e18 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -34,6 +34,9 @@ reference: - title: "`plyranges` verbs" contents: - starts_with("plyranges-") +- title: "Overlapping GInteractions" + contents: + - ginteractions-find-overlaps - title: "Pinning GInteractions" contents: - pin diff --git a/man/ginteractions-find-overlaps.Rd b/man/ginteractions-find-overlaps.Rd new file mode 100644 index 0000000..a9f984c --- /dev/null +++ b/man/ginteractions-find-overlaps.Rd @@ -0,0 +1,90 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/find-overlaps.R +\name{ginteractions-find-overlaps} +\alias{ginteractions-find-overlaps} +\alias{find_overlaps.PinnedGInteractions} +\alias{find_overlaps.GInteractions} +\alias{find_overlaps_directed.PinnedGInteractions} +\alias{find_overlaps_directed.GInteractions} +\title{Find overlaps between a query GInteractions and a GRanges} +\usage{ +\method{find_overlaps}{PinnedGInteractions}(x, y, maxgap = -1L, minoverlap = 0L, suffix = c(".x", ".y")) + +\method{find_overlaps}{GInteractions}(x, y, maxgap = -1L, minoverlap = 0L, suffix = c(".x", ".y")) + +\method{find_overlaps_directed}{PinnedGInteractions}( + x, + y, + maxgap = -1L, + minoverlap = 0L, + suffix = c(".x", ".y") +) + +\method{find_overlaps_directed}{GInteractions}( + x, + y, + maxgap = -1L, + minoverlap = 0L, + suffix = c(".x", ".y") +) +} +\arguments{ +\item{x}{A (Pinned)GInteractions object} + +\item{y}{A GRanges object} + +\item{maxgap, minoverlap}{See \code{?\link[GenomicRanges]{findOverlaps}} +in the \pkg{GenomicRanges} package for a description of these arguments} + +\item{suffix}{Suffix to add to metadata +columns (character vector of length 2, default to \code{c(".x", ".y")}).} +} +\value{ +a GInteractions object with rows corresponding to the GInteractions +in \code{x} that overlap \code{y}. +} +\description{ +Find overlaps between a query GInteractions and a GRanges +} +\details{ +\code{find_overlaps()} will search for any overlap between \code{GInteractions} +in \code{x} and \code{GRanges} in \code{y}. It will return a \code{GInteractions} object of length +equal to the number of times \code{x} overlaps \code{y}. This \code{GInteractions} will +have additional metadata columns corresponding to the metadata from \code{y}. +\code{find_overlaps_directed()} takes the strandness of each object into account. +} +\examples{ +gi <- read.table(text = " + chr1 11 20 - chr1 21 30 + + chr1 11 20 - chr1 51 55 + + chr1 21 30 - chr1 51 55 + + chr1 21 30 - chr2 51 60 +", + col.names = c("seqnames1", "start1", "end1", "strand1", "seqnames2", "start2", "end2", "strand2") +) |> as_ginteractions() |> mutate(id = 1:4, type = 'gi') + +gr <- GenomicRanges::GRanges(c("chr1:20-30:+", "chr2:55-65:-")) |> plyranges::mutate(id = 1:2, type = 'gr') + +gi + +gr + +#################################################################### +# 1. Find overlaps between GInteractions and a subject GRanges +#################################################################### + +find_overlaps(gi, gr) + +find_overlaps_directed(gi, gr) + +#################################################################### +# 1. Find overlaps between PinnedGInteractions and a subject GRanges +#################################################################### + +gi |> pin_by("first") |> find_overlaps(gr) + +gi |> pin_by("second") |> find_overlaps(gr) + +gi |> pin_by("first") |> find_overlaps_directed(gr) + +gi |> pin_by("second") |> find_overlaps_directed(gr) +} diff --git a/man/reexports.Rd b/man/reexports.Rd index 0bf1655..c4805cf 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -32,6 +32,8 @@ \alias{anchor_3p} \alias{anchor_5p} \alias{stretch} +\alias{find_overlaps} +\alias{find_overlaps_directed} \title{Objects exported from other packages} \value{ Depending on the re-exported function @@ -47,6 +49,6 @@ below to see their documentation. \describe{ \item{dplyr}{\code{\link[dplyr]{arrange}}, \code{\link[dplyr]{count}}, \code{\link[dplyr]{filter}}, \code{\link[dplyr]{group_by}}, \code{\link[dplyr]{group_data}}, \code{\link[dplyr:group_data]{group_indices}}, \code{\link[dplyr:group_data]{group_keys}}, \code{\link[dplyr:group_data]{group_rows}}, \code{\link[dplyr:group_data]{group_size}}, \code{\link[dplyr:group_data]{group_vars}}, \code{\link[dplyr:group_data]{groups}}, \code{\link[dplyr]{mutate}}, \code{\link[dplyr:group_data]{n_groups}}, \code{\link[dplyr]{rename}}, \code{\link[dplyr]{select}}, \code{\link[dplyr]{slice}}, \code{\link[dplyr]{summarise}}, \code{\link[dplyr:summarise]{summarize}}, \code{\link[dplyr:count]{tally}}, \code{\link[dplyr:group_by]{ungroup}}} - \item{plyranges}{\code{\link[plyranges:ranges-anchor]{anchor}}, \code{\link[plyranges:ranges-anchor]{anchor_3p}}, \code{\link[plyranges:ranges-anchor]{anchor_5p}}, \code{\link[plyranges:ranges-anchor]{anchor_center}}, \code{\link[plyranges:ranges-anchor]{anchor_end}}, \code{\link[plyranges:ranges-anchor]{anchor_start}}, \code{\link[plyranges]{stretch}}, \code{\link[plyranges:ranges-anchor]{unanchor}}} + \item{plyranges}{\code{\link[plyranges:ranges-anchor]{anchor}}, \code{\link[plyranges:ranges-anchor]{anchor_3p}}, \code{\link[plyranges:ranges-anchor]{anchor_5p}}, \code{\link[plyranges:ranges-anchor]{anchor_center}}, \code{\link[plyranges:ranges-anchor]{anchor_end}}, \code{\link[plyranges:ranges-anchor]{anchor_start}}, \code{\link[plyranges:ranges-overlaps]{find_overlaps}}, \code{\link[plyranges:ranges-overlaps]{find_overlaps_directed}}, \code{\link[plyranges]{stretch}}, \code{\link[plyranges:ranges-anchor]{unanchor}}} }}