Skip to content

Commit

Permalink
feat: tally and count
Browse files Browse the repository at this point in the history
  • Loading branch information
js2264 committed Sep 7, 2023
1 parent 8091d1b commit d3fb47b
Show file tree
Hide file tree
Showing 6 changed files with 255 additions and 2 deletions.
16 changes: 16 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ S3method(.DollarNames,GInteractions)
S3method(arrange,GInteractions)
S3method(as_ginteractions,data.frame)
S3method(as_ginteractions,default)
S3method(count,GInteractions)
S3method(filter,GInteractions)
S3method(group_by,GInteractions)
S3method(group_by,GroupedGInteractions)
Expand All @@ -21,12 +22,14 @@ S3method(select,GInteractions)
S3method(slice,GInteractions)
S3method(summarise,GroupedGInteractions)
S3method(summarize,GroupedGInteractions)
S3method(tally,GroupedGInteractions)
S3method(tbl_vars,GInteractions)
S3method(ungroup,GInteractions)
export(anchors1)
export(anchors2)
export(arrange)
export(as_ginteractions)
export(count)
export(end1)
export(end2)
export(filter)
Expand All @@ -53,6 +56,7 @@ export(strand1)
export(strand2)
export(summarise)
export(summarize)
export(tally)
export(ungroup)
export(width1)
export(width2)
Expand Down Expand Up @@ -96,6 +100,7 @@ importFrom(S4Vectors,showAsCell)
importFrom(dplyr,arrange)
importFrom(dplyr,bind_cols)
importFrom(dplyr,bind_rows)
importFrom(dplyr,count)
importFrom(dplyr,filter)
importFrom(dplyr,group_by)
importFrom(dplyr,group_data)
Expand All @@ -112,6 +117,7 @@ importFrom(dplyr,select)
importFrom(dplyr,slice)
importFrom(dplyr,summarise)
importFrom(dplyr,summarize)
importFrom(dplyr,tally)
importFrom(dplyr,tbl_vars)
importFrom(dplyr,ungroup)
importFrom(methods,as)
Expand All @@ -125,11 +131,21 @@ importFrom(methods,setMethod)
importFrom(methods,show)
importFrom(rlang,"!!!")
importFrom(rlang,`:=`)
importFrom(rlang,enquo)
importFrom(rlang,enquos)
importFrom(rlang,eval_tidy)
importFrom(rlang,expr)
importFrom(rlang,inform)
importFrom(rlang,is_call)
importFrom(rlang,local_options)
importFrom(rlang,quo)
importFrom(rlang,quo_get_expr)
importFrom(rlang,quo_is_null)
importFrom(rlang,quo_name)
importFrom(rlang,quos)
importFrom(rlang,sym)
importFrom(rlang,syms)
importFrom(rlang,warn)
importFrom(tibble,as_tibble)
importFrom(tidyselect,eval_rename)
importFrom(tidyselect,eval_select)
Expand Down
150 changes: 150 additions & 0 deletions R/count.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
#' Count or tally GInteractions per group
#'
#' @name ginteractions-count
#'
#' @param x A grouped GInteractions object
#' @param ... <[`data-masking`][rlang::args_data_masking]> Variables to group
#' by.
#' @param wt <[`data-masking`][rlang::args_data_masking]> Frequency weights.
#' Can be `NULL` or a variable:
#'
#' * If `NULL` (the default), counts the number of rows in each group.
#' * If a variable, computes `sum(wt)` for each group.
#'
#' @param sort If `TRUE`, will show the largest groups at the top.
#' @param name The name of the new column in the output.
#'
#' @return a \code{S4Vectors::\link[S4Vectors:DataFrame-class]{DataFrame()}}
#' object, with an added column with the count/tablly per group.
#'
#' @importFrom dplyr count
#' @importFrom dplyr tally
#'
#' @examples
#' gi <- read.table(text = "
#' chr1 11 20 chr1 21 30 + +
#' chr1 11 20 chr1 51 50 + +
#' chr1 11 30 chr1 51 50 - -
#' chr1 11 30 chr2 51 60 - -",
#' col.names = c(
#' "seqnames1", "start1", "end1",
#' "seqnames2", "start2", "end2", "strand1", "strand2")
#' ) |>
#' as_ginteractions() |>
#' mutate(score = runif(4), type = c('cis', 'cis', 'cis', 'trans'))
#'
#' ####################################################################
#' # 1. Tally groups
#' ####################################################################
#'
#' gi
#'
#' gi |> group_by(strand1) |> tally()
#'
#' gi |> group_by(type) |> tally()
#'
#' ####################################################################
#' # 2. Count per groups
#' ####################################################################
#'
#' gi |> count(type)
#'
#' gi |> group_by(type) |> count()
#'
#' gi |> group_by(type) |> count(strand1)
#'
#' @importFrom rlang local_options
#' @importFrom rlang enquo
#' @importFrom rlang sym
#' @importFrom rlang is_call
#' @importFrom rlang quo_get_expr
#' @importFrom rlang warn
#' @importFrom rlang quo
#' @importFrom rlang quo_is_null
#' @importFrom rlang inform
#' @importFrom rlang expr
#' @importFrom dplyr desc
#' @export
tally.GroupedGInteractions <- function(x, wt = NULL, sort = FALSE, name = NULL) {

## Check provided name against gorup names
name <- check_n_name(name, group_vars(x))

## Prepare tally strategy
n <- tally_n(x, {{ wt }})

## Tally groups
rlang::local_options(dplyr.summarise.inform = FALSE)
out <- summarise(x, !!name := !!n)

## Sort by group by default
if (sort) {
arrange(out, dplyr::desc(!!rlang::sym(name)))
} else {
out
}

}

#' @rdname ginteractions-count
#' @export
count.GInteractions <- function(x, ..., wt = NULL, sort = FALSE, name = NULL) {

## Add new groups before counting
if (!missing(...)) {
out <- group_by(x, ..., .add = TRUE)
} else {
out <- x
}

## count through weighted tally
tally(out, wt = !!rlang::enquo(wt), sort = sort, name = name)

}

tally_n <- function(x, wt) {
wt <- rlang::enquo(wt)
if (rlang::is_call(rlang::quo_get_expr(wt), "n", n = 0)) {
# Provided only by dplyr 1.0.0. See #5349 for discussion.
rlang::warn(c(
"`wt = n()` is deprecated",
i = "You can now omit the `wt` argument"
))
wt <- rlang::quo(NULL)
}
if (rlang::quo_is_null(wt)) {
group_size(x)
} else {
rlang::expr(sum(!!wt, na.rm = TRUE))
}
}

check_n_name <- function(
name,
vars,
arg = rlang::caller_arg(name),
call = rlang::caller_env()
) {
if (is.null(name)) {
name <- n_name(vars)
if (name != "n") {
rlang::inform(c(
paste0(
"Storing counts in `", name,
"`, as `n` already present in input"
),
i = "Use `name = \"new_name\"` to pick a new name."
))
}
} else {
stopifnot(!is.na(name))
stopifnot(name != "")
}
name
}

n_name <- function(x) {
name <- "n"
while (name %in% x) name <- paste0("n", name)
name
}
10 changes: 10 additions & 0 deletions R/reexports-dplyr.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,13 @@ dplyr::summarize
#' @importFrom dplyr summarise
#' @export
dplyr::summarise

#' @rdname reexports
#' @importFrom dplyr tally
#' @export
dplyr::tally

#' @rdname reexports
#' @importFrom dplyr count
#' @export
dplyr::count
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,14 @@ The operations currently available for `GInteractions` objects are:

- Group genomic interactions with `group_by`;
- Summarize grouped genomic interactions with `summarize`;
- Tally/count grouped genomic interactions with `tally` and `count`;
- Modify genomic interactions with `mutate`;
- Subset genomic interactions with `filter` using
[`<data-masking>`](https://rlang.r-lib.org/reference/args_data_masking.html)
and logical expressions;
- Pick out any columns from the associated metadata with `select`
using [`<tidy-select>` arguments](https://dplyr.tidyverse.org/reference/dplyr_tidy_select.html);
- Subset using indices with `slice`.
- Subset using indices with `slice`;
- Order genomic interactions with `arrange` using categorical/numerical
variables.

Expand All @@ -33,6 +34,10 @@ variables.
BiocManager::install("tidyomics/plyinteractions")
```

## Using `plyinteractions`

For more details, check our [vignette](vignettes/plyinteractions.Rmd).

## Code of Conduct

Please note that this project is released with a
Expand Down
70 changes: 70 additions & 0 deletions man/ginteractions-count.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/reexports.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit d3fb47b

Please sign in to comment.