Skip to content

Commit

Permalink
feat: summarize grouped GInteractions
Browse files Browse the repository at this point in the history
  • Loading branch information
js2264 committed Sep 6, 2023
1 parent fc159d2 commit a8a84aa
Show file tree
Hide file tree
Showing 13 changed files with 325 additions and 56 deletions.
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ Collate:
'arrange.R'
'filter.R'
'ginteractions-construct.R'
'ginteractions-eval.R'
'ginteractions-env.R'
'ginteractions-scoping.R'
'ginteractions-setters.R'
'tbl_vars.R'
'group_data.R'
Expand All @@ -63,3 +64,4 @@ Collate:
'rename.R'
'select.R'
'slice.R'
'summarize.R'
11 changes: 11 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ S3method(n_groups,GroupedGInteractions)
S3method(rename,GInteractions)
S3method(select,GInteractions)
S3method(slice,GInteractions)
S3method(summarise,GroupedGInteractions)
S3method(summarize,GroupedGInteractions)
S3method(tbl_vars,GInteractions)
S3method(ungroup,GInteractions)
export(anchors1)
Expand Down Expand Up @@ -49,6 +51,8 @@ export(start1)
export(start2)
export(strand1)
export(strand2)
export(summarise)
export(summarize)
export(ungroup)
export(width1)
export(width2)
Expand Down Expand Up @@ -84,9 +88,14 @@ importFrom(S4Vectors,"mcols<-")
importFrom(S4Vectors,"second<-")
importFrom(S4Vectors,DataFrame)
importFrom(S4Vectors,Rle)
importFrom(S4Vectors,as.env)
importFrom(S4Vectors,cbind)
importFrom(S4Vectors,mcols)
importFrom(S4Vectors,rbind)
importFrom(S4Vectors,showAsCell)
importFrom(dplyr,arrange)
importFrom(dplyr,bind_cols)
importFrom(dplyr,bind_rows)
importFrom(dplyr,filter)
importFrom(dplyr,group_by)
importFrom(dplyr,group_data)
Expand All @@ -101,6 +110,8 @@ importFrom(dplyr,n_groups)
importFrom(dplyr,rename)
importFrom(dplyr,select)
importFrom(dplyr,slice)
importFrom(dplyr,summarise)
importFrom(dplyr,summarize)
importFrom(dplyr,tbl_vars)
importFrom(dplyr,ungroup)
importFrom(methods,as)
Expand Down
13 changes: 12 additions & 1 deletion R/DelegatingGInteractions-class.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#' DelegatingGInteractions class
#' @rdname delegating-ginteractions-class
#' @include ginteractions-getters.R
#' @param x DelegatingGInteractions object
#' @param x,object DelegatingGInteractions object
setClass("DelegatingGInteractions",
slots = list(delegate="GInteractions"),
contains=c("GInteractions", "VIRTUAL")
Expand Down Expand Up @@ -69,3 +69,14 @@ setMethod("anchors", "DelegatingGInteractions", function(x) anchors(x@delegate))
setMethod("regions", "DelegatingGInteractions", function(x) regions(x@delegate))
#' @rdname delegating-ginteractions-class
setMethod("seqinfo", "DelegatingGInteractions", function(x) seqinfo(x@delegate))
#' @rdname delegating-ginteractions-class
setMethod("mcols", "DelegatingGInteractions", function(x) mcols(x@delegate))
#' @rdname delegating-ginteractions-class
setMethod("show", "DelegatingGInteractions", function(object) {
groups <- colnames(object@group_keys)
groups <- paste(groups, collapse = ", ")
output <- c("", utils::capture.output(show(object@delegate)))
output[1] <- output[2]
output[2] <- paste("Groups:", groups, paste0("[", object@n, "]"))
cat(output, sep = "\n")
})
14 changes: 4 additions & 10 deletions R/GroupedGInteractions-class.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,14 @@ setClass("GroupedGInteractions",

#' @importFrom methods setMethod initialize
setMethod("initialize", "GroupedGInteractions",
function(.Object, delegate = GRanges(), group_keys = DataFrame(), group_indices = Rle(), n = integer()) {
function(
.Object, delegate = GRanges(), group_keys = DataFrame(),
group_indices = Rle(), n = integer()
) {
.Object@delegate <- delegate
.Object@group_keys <- group_keys
.Object@group_indices <- group_indices
.Object@n <- n
.Object
}
)

setMethod("show", "GroupedGInteractions", function(object) {
groups <- colnames(object@group_keys)
groups <- paste(groups, collapse = ", ")
output <- c("", utils::capture.output(show(object@delegate)))
output[1] <- output[2]
output[2] <- paste("Groups:", groups, paste0("[", object@n, "]"))
cat(output, sep = "\n")
})
54 changes: 54 additions & 0 deletions R/ginteractions-env.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#' @importFrom S4Vectors as.env
setMethod("as.env", "GInteractions", function(x, enclos, tform = identity) {

## This is tricky, normally S4Vectors gets accessors from the
## package defining the class (ie. accessors defined in InteractionSet
## for GInteractions). But `seqnames1`, `strand1`, ... are NOT
## accessors defined in InteractionSet. So I have to trick
## S4Vectors into making the correct nested envs.
##
## Parent: has mcols
## env child: has core GI columns and its accessors

parent <- S4Vectors::as.env(
S4Vectors::mcols(x, use.names=FALSE), enclos, tform = tform
)
env <- .makeFixedColumnEnv(x, parent, tform)
env$.. <- x
env
})

.makeFixedColumnEnv <- function(x, parent, tform = identity) {
env <- new.env(parent=parent)
pvnEnv <- getNamespace("plyinteractions")
nms <- c("seqnames1", "start1", "end1", "width1", "strand1",
"seqnames2", "start2", "end2", "width2", "strand2")
lapply(nms, function(nm) {
accessor <- get(nm, pvnEnv, mode="function")
makeActiveBinding(
sym = nm,
fun = function() {
val <- tform(accessor(x))
rm(list=nm, envir=env)
assign(nm, val, env)
val
},
env = env
)
})
env
}

.overscope_ginteractions <- function(x, envir = parent.frame()) {
env <- S4Vectors::as.env(x, envir)
rlang::new_data_mask(env, top = parent.env(env))
}

.overscope_groupedginteractions <- function(x, envir = parent.frame()) {
env <- S4Vectors::as.env(
x@delegate,
envir,
tform = function(col) unname(S4Vectors::splitAsList(col, x@group_indices))
)
rlang::new_data_mask(env, top = parent.env(env))
}
36 changes: 0 additions & 36 deletions R/ginteractions-eval.R → R/ginteractions-scoping.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,39 +54,3 @@
},
fn)
}

.overscope_ginteractions <- function(x, envir = parent.frame()) {

## This is tricky, normally S4Vectors gets accessors from the
## package defining the class (ie. accessors defined in InteractionSet
## for GInteractions). But `seqnames1`, `strand1`, ... are NOT
## accessors defined in InteractionSet. So I have to trick
## S4Vectors into making the correct nested envs.
##
## Parent: has mcols
## env child: has core GI columns and its accessors

parent <- S4Vectors::as.env(
S4Vectors::mcols(x, use.names=FALSE),
envir,
tform = identity
)
env <- new.env(parent=parent)
pvnEnv <- getNamespace("plyinteractions")
nms <- c("seqnames1", "start1", "end1", "width1", "strand1",
"seqnames2", "start2", "end2", "width2", "strand2")
lapply(nms, function(nm) {
accessor <- get(nm, pvnEnv, mode="function")
makeActiveBinding(nm, function() {
val <- identity(accessor(x))
rm(list=nm, envir=env)
assign(nm, val, env)
val
},
env
)
})
env$.. <- x
rlang::new_data_mask(env, top = parent.env(env))

}
4 changes: 1 addition & 3 deletions R/mutate.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,9 @@
#'
#' # Note how the core columns are modified sequentially
#'
#'
#' # Note how the core columns are modified sequentially
#'
#' gi |>
#' mutate(start1 = 1, end1 = 10)
#'
#' gi |>
#' mutate(start1 = 1, end1 = 10, width1 = 50)
#'
Expand Down
15 changes: 15 additions & 0 deletions R/reexports-dplyr.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,18 @@ dplyr::n_groups
#' @importFrom dplyr group_rows
#' @export
dplyr::group_rows

#' @rdname reexports
#' @importFrom dplyr ungroup
#' @export
dplyr::ungroup

#' @rdname reexports
#' @importFrom dplyr summarize
#' @export
dplyr::summarize

#' @rdname reexports
#' @importFrom dplyr summarise
#' @export
dplyr::summarise
141 changes: 141 additions & 0 deletions R/summarize.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
#' Summarize GInteractions per group
#'
#' @name ginteractions-summarize
#' @aliases ginteractions-summarise
#' @rdname ginteractions-summarize
#'
#' @param .data a (grouped) GInteractions object
#' @param ... <data-masking> Name-value pairs of summary functions.
#' The name will be the name of the variable in the result.
#'
#' @return a \code{S4Vectors::\link[S4Vectors:DataFrame-class]{DataFrame()}}
#' object:
#'
#' - The rows come from the underlying `group_keys()`.
#' - The columns are a combination of the grouping keys and the summary
#' expressions that you provide.
#' - GInteractions class is **not** preserved, as a call to `summarize`
#' fundamentally creates a new data frame
#'
#' @importFrom S4Vectors rbind cbind
#' @importFrom dplyr summarise
#' @importFrom dplyr summarize
#' @importFrom rlang !!! enquos
#' @importFrom dplyr bind_cols bind_rows
#'
#' @examples
#' gi <- read.table(text = "
#' chr1 11 20 chr1 21 30 + +
#' chr1 11 20 chr1 51 50 + +
#' chr1 11 30 chr1 51 50 - -
#' chr1 11 30 chr2 51 60 - -",
#' col.names = c(
#' "seqnames1", "start1", "end1",
#' "seqnames2", "start2", "end2", "strand1", "strand2")
#' ) |>
#' as_ginteractions() |>
#' mutate(score = runif(4), type = c('cis', 'cis', 'cis', 'trans'))
#'
#' ####################################################################
#' # 1. Summarize a single column
#' ####################################################################
#'
#' gi
#'
#' gi |> group_by(type) |> summarize(m = mean(score))
#'
#' gi |> group_by(strand1) |> summarize(m = mean(score))
#'
#' df <- gi |>
#' group_by(strand1) |>
#' summarize(m = mean(score), n = table(seqnames2))
#' df
#'
#' df$n
#'
#' ####################################################################
#' # 2. Summarize by multiple columns
#' ####################################################################
#'
#' gi |>
#' group_by(strand1, seqnames2) |>
#' summarise(m = mean(score), n = table(type))
#'
#' @export
summarise.GroupedGInteractions <- function(.data, ...) {

quosures <- rlang::enquos(..., .named = TRUE)

## Put each quosure in an environment with the required generic
scoped_quosures <- .scope_quos(quosures)
names(scoped_quosures) <- names(quosures)

## tidyeval quosures in a scoped env. This takes care of all the tidy eval.
overscope <- .overscope_groupedginteractions(.data)
evaled_quosures <- vector("list", length(scoped_quosures))
names(evaled_quosures) <- names(scoped_quosures)
for (i in seq_along(scoped_quosures)) {
quo <- scoped_quosures[[i]]
evaled_quosures[[i]] <- eval_tidy(quo, data = overscope)
}

## If evaluated quosures are lists, make sure to keep them
is_list <- vapply(
evaled_quosures, function(.) is(., "List") || is(., "list"), logical(1)
)
if (any(is_list)) {
nr <- .data@n
for (i in which(is_list)) {
## If scalar, repeat the scalar as many times as the number of groups
if (length(evaled_quosures[[i]]) == 1) {
evaled_quosures[[i]] <- as(
rep(evaled_quosures[[i]], nr), "CompressedList"
)
## Otherwise,
} else {
if (all(lengths(evaled_quosures[[i]]) ==
length(evaled_quosures[[i]][[1]]))
) {
stopifnot(length(evaled_quosures[[i]]) == nr)
evaled_quosures[[i]] <- as(
BiocGenerics::Reduce(
S4Vectors::pc, evaled_quosures[[i]]
),
"CompressedList"
)
}
}
}
}

## If evaluated quosures are table, convert to lists
is_table <- vapply(
evaled_quosures,
function(.) is(., "table"),
logical(1)
)
if (any(is_table)) {
nr <- .data@n
for (i in which(is_table)) {
stopifnot(nrow(evaled_quosures[[i]]) == nr)
evaled_quosures[[i]] <- apply(
evaled_quosures[[i]], 1, function(x) list(x)[[1]],
simplify = FALSE
) |> as("CompressedList")
}
}

## Aggregate results
summarized_df <- DataFrame(evaled_quosures)
rownames(summarized_df) <- NULL
res <- cbind(
group_keys(.data),
summarized_df
)
res[order(res[, group_vars(.data)]), ]

}

#' @rdname ginteractions-summarize
#' @export
summarize.GroupedGInteractions <- summarise.GroupedGInteractions
8 changes: 7 additions & 1 deletion man/delegating-ginteractions-class.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit a8a84aa

Please sign in to comment.