Skip to content

Commit

Permalink
feat: implement filter_low_counts()
Browse files Browse the repository at this point in the history
resolves #2
  • Loading branch information
kelly-sovacool committed Dec 22, 2023
1 parent baa7c8c commit 245a819
Show file tree
Hide file tree
Showing 10 changed files with 139 additions and 27 deletions.
3 changes: 2 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ exclude: |
(?x)(
^assets/|
^docs/.*.html|
^data-raw/*.txt
^data-raw/*.txt|
^man/
)
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
Expand Down
1 change: 1 addition & 0 deletions .prettierignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ results/
*.code-workspace
assets/*.html
data-raw/*.txt
man/*
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@ Depends:
R (>= 2.10)
Imports:
dplyr,
readr
tidyr
Suggests:
readr,
testthat (>= 3.0.0)
Config/testthat/edition: 3
Encoding: UTF-8
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# Generated by roxygen2: do not edit by hand

export("%>%")
export(filter_low_counts)
export(read_raw_counts)
importFrom(dplyr,"%>%")
29 changes: 19 additions & 10 deletions R/filter_low_counts.R
Original file line number Diff line number Diff line change
@@ -1,18 +1,27 @@
#' filter_low_counts
#'
#' @param raw_counts_matrix raw_counts_matrix object
#' @param min_counts integer number of min_counts across all samples, default 0
#' @param min_cpm float minimum cpm value, default 0
#' @param min_cpm_fraction float fraction of samples that need to satisfy min_cpm filter, default 1.0
#' @param counts_dat dataframe of expected gene counts from RSEM
#' @param min_counts integer number of minimum counts across all samples (default: 0)
#'
#' @return filtered_raw_count_matrix
#' @return filtered counts dataframe
#' @export
#'
#' @examples
#' filter_low_counts(gene_counts)
filter_low_counts <- function(
raw_counts_matrix,
min_counts = 0,
min_cpm = 0,
min_cpm_fraction = 1.0) {

counts_dat,
min_counts = 0) {
gene_id <- count <- count_sum <- NULL
genes_above_threshold <- counts_dat %>%
tidyr::pivot_longer(!c("gene_id", "GeneName"),
names_to = "sample_id", values_to = "count"
) %>%
dplyr::group_by(gene_id) %>%
dplyr::summarize(count_sum = sum(count)) %>%
dplyr::filter(count_sum >= min_counts) %>%
dplyr::pull(gene_id)
return(
counts_dat %>%
dplyr::filter(gene_id %in% (genes_above_threshold))
)
}
20 changes: 7 additions & 13 deletions man/filter_low_counts.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 23 additions & 0 deletions man/gene_counts.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions man/reexports.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

35 changes: 35 additions & 0 deletions man/reneeTools-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 32 additions & 2 deletions tests/testthat/test-filter_low_counts.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,33 @@
test_that("multiplication works", {
expect_equal(2 * 2, 4) # TODO write real tests
test_that("filter_low_counts works", {
test_dat <- data.frame(
gene_id = c("A", "B", "C"),
GeneName = c("geneA", "geneB", "geneC"),
s1 = c(0, 0, 0),
s2 = c(0, 1, 0),
s3 = c(0, 0, 3)
)
expect_equal(filter_low_counts(test_dat), test_dat)
expect_equal(
filter_low_counts(test_dat, min_counts = 1),
data.frame(
gene_id = c("B", "C"),
GeneName = c("geneB", "geneC"),
s1 = c(0, 0), s2 = c(1, 0), s3 = c(0, 3)
)
)
expect_equal(
filter_low_counts(test_dat, min_counts = 2),
data.frame(
gene_id = "C", GeneName = "geneC",
s1 = 0, s2 = 0, s3 = 3
)
)
expect_equal(
filter_low_counts(test_dat, min_counts = 5),
data.frame(
gene_id = character(0),
GeneName = character(0),
s1 = numeric(0), s2 = numeric(0), s3 = numeric(0)
)
)
})

0 comments on commit 245a819

Please sign in to comment.