Skip to content

Commit

Permalink
Merge pull request #13 from CCBR/iss-11
Browse files Browse the repository at this point in the history
feat: create DESeq object from RSEM expected counts
  • Loading branch information
kelly-sovacool authored Dec 28, 2023
2 parents b20327a + 1fcdde1 commit ff0908c
Show file tree
Hide file tree
Showing 10 changed files with 126 additions and 7 deletions.
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ BugReports: https://github.com/CCBR/reneeTools/issues
Depends:
R (>= 2.10)
Imports:
assertthat,
DESeq2,
dplyr,
tidyr
Suggests:
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Generated by roxygen2: do not edit by hand

export("%>%")
export(create_deseq_obj)
export(filter_low_counts)
export(read_raw_counts)
importFrom(dplyr,"%>%")
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
## New functions

- `filter_low_counts()` (#10)
- `create_deseq_obj()` (#13)
4 changes: 2 additions & 2 deletions R/data.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#' RSEM gene counts
#' RSEM expected gene counts
#'
#' @format ## `gene_counts`
#' A data frame with columns 'gene_id', 'GeneName', and a column for each sample's count.
#' A data frame with columns 'gene_id', 'GeneName', and a column for each sample's expected count.
#'
#' @source Generated by running RENEE v2.5.8 on the
#' [test dataset](https://github.com/CCBR/RENEE/tree/e08f7db6c6e638cfd330caa182f64665d2ef37fa/.tests)
Expand Down
37 changes: 37 additions & 0 deletions R/deseq2.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#' Create DESeq2 object from gene counts and sample metadata
#'
#' @param counts_tbl expected gene counts from RSEM as a data frame or tibble.
#' @param meta_dat sample metadata as a data frame with rownames as sample IDs.
#' @param design model formula for experimental design. Columns must exist in `meta_dat`.
#'
#' @return DESeqDataSet
#' @export
#'
#' @examples
#' sample_meta <- data.frame(
#' row.names = c("KO_S3", "KO_S4", "WT_S1", "WT_S2"),
#' condition = factor(c("knockout", "knockout", "wildtype", "wildtype"),
#' levels = c("wildtype", "knockout")
#' )
#' )
#' dds <- create_deseq_obj(gene_counts, sample_meta, ~condition)
create_deseq_obj <- function(counts_tbl, meta_dat, design) {
gene_id <- GeneName <- NULL
counts_dat <- counts_tbl %>%
# deseq2 requires integer counts
dplyr::mutate(dplyr::across(
dplyr::where(is.numeric),
\(x) as.integer(round(x, 0))
)) %>%
as.data.frame()
row.names(counts_dat) <- counts_dat %>% dplyr::pull("gene_id")
# convert counts tibble to matrix
counts_mat <- counts_dat %>%
dplyr::select(-c(gene_id, GeneName)) %>%
as.matrix()

# sample IDs must be in the same order
assertthat::are_equal(colnames(counts_mat), rownames(meta_dat))

return(DESeq2::DESeqDataSetFromMatrix(counts_mat, meta_dat, design))
}
2 changes: 1 addition & 1 deletion R/filter_low_counts.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#' filter_low_counts
#'
#' @param counts_dat dataframe of expected gene counts from RSEM
#' @param counts_dat expected gene counts from RSEM as a data frame or tibble
#' @param min_counts integer number of minimum counts across all samples (default: 0)
#'
#' @return filtered counts dataframe
Expand Down
30 changes: 30 additions & 0 deletions man/create_deseq_obj.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/filter_low_counts.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/gene_counts.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

48 changes: 48 additions & 0 deletions tests/testthat/test-deseq2.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
set.seed(20231228)
test_that("create_deseq_obj works", {
sample_meta <-
data.frame(
row.names = c("KO_S3", "KO_S4", "WT_S1", "WT_S2"),
condition = factor(
c("knockout", "knockout", "wildtype", "wildtype"),
levels = c("wildtype", "knockout")
)
)
dds <- create_deseq_obj(gene_counts, sample_meta, ~condition)

expect_equal(
dds@colData %>% as.data.frame(),
structure(
list(condition = structure(
c(2L, 2L, 1L, 1L),
levels = c(
"wildtype",
"knockout"
),
class = "factor"
)),
class = "data.frame",
row.names = c(
"KO_S3",
"KO_S4", "WT_S1", "WT_S2"
)
)
)
expect_equal(
dds@assays@data@listData %>% as.data.frame() %>% dplyr::filter(counts.KO_S3 > 15),
structure(
list(
counts.KO_S3 = c(25L, 16L, 19L),
counts.KO_S4 = c(22L, 10L, 26L),
counts.WT_S1 = c(74L, 0L, 10L),
counts.WT_S2 = c(104L, 0L, 8L)
),
class = "data.frame",
row.names = c(
"ENSG00000185658.13",
"ENSG00000233922.2",
"ENSG00000157601.14"
)
)
)
})

0 comments on commit ff0908c

Please sign in to comment.