Skip to content

Commit

Permalink
feat: create reneeDataSet class
Browse files Browse the repository at this point in the history
  • Loading branch information
kelly-sovacool committed Dec 29, 2023
1 parent 7eb8c7e commit 22f551a
Show file tree
Hide file tree
Showing 16 changed files with 232 additions and 34 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ exclude: |
(?x)(
^assets/|
^docs/.*.html|
^data-raw/*.txt|
^inst/extdata|
^man/
)
repos:
Expand Down
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Imports:
assertthat,
DESeq2,
dplyr,
S7,
tidyr
Suggests:
readr,
Expand Down
6 changes: 5 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
# Generated by roxygen2: do not edit by hand

export("%>%")
export(counts_dat_to_matrix)
export(create_deseq_obj)
export(filter_low_counts)
export(read_raw_counts)
export(meta_tbl_to_dat)
export(reneeDataSetFromFiles)
export(run_deseq2)
if (getRversion() < "4.3.0") importFrom("S7", "@")
importFrom(dplyr,"%>%")
25 changes: 25 additions & 0 deletions R/counts.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#' Convert a data frame of gene counts to a matrix
#'
#' @param counts_tbl expected gene counts from RSEM as a data frame or tibble.
#'
#' @return matrix of gene counts with rows as gene IDs
#' @export
#'
#' @examples
#' counts_dat_to_matrix(head(gene_counts))
counts_dat_to_matrix <- function(counts_tbl) {
gene_id <- GeneName <- NULL
counts_dat <- counts_tbl %>%
# deseq2 requires integer counts
dplyr::mutate(dplyr::across(
dplyr::where(is.numeric),
\(x) as.integer(round(x, 0))
)) %>%
as.data.frame()
row.names(counts_dat) <- counts_dat %>% dplyr::pull("gene_id")
# convert counts tibble to matrix
counts_mat <- counts_dat %>%
dplyr::select(-c(gene_id, GeneName)) %>%
as.matrix()
return(counts_mat)
}
42 changes: 30 additions & 12 deletions R/deseq2.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,40 @@
#' )
#' dds <- create_deseq_obj(gene_counts, sample_meta, ~condition)
create_deseq_obj <- function(counts_tbl, meta_dat, design) {
gene_id <- GeneName <- NULL
counts_dat <- counts_tbl %>%
# deseq2 requires integer counts
dplyr::mutate(dplyr::across(
dplyr::where(is.numeric),
\(x) as.integer(round(x, 0))
)) %>%
as.data.frame()
row.names(counts_dat) <- counts_dat %>% dplyr::pull("gene_id")
# convert counts tibble to matrix
counts_mat <- counts_dat %>%
dplyr::select(-c(gene_id, GeneName)) %>%
as.matrix()
counts_mat <- counts_dat_to_matrix(counts_tbl)

# sample IDs must be in the same order
assertthat::are_equal(colnames(counts_mat), rownames(meta_dat))

return(DESeq2::DESeqDataSetFromMatrix(counts_mat, meta_dat, design))
}

#' Run DESeq2 on a reneeDataSet
#'
#' @param renee_ds reneeDataSet object
#' @param design model formula for experimental design. Columns must exist in `meta_dat`.
#'
#' @return reneeDataSet object with DESeq2 slot filled
#' @export
#'
#' @examples
#' rds <- reneeDataSetFromFiles(
#' system.file("extdata",
#' "RSEM.genes.expected_count.all_samples.txt",
#' package = "reneeTools"
#' ),
#' system.file("extdata", "sample_metadata.tsv",
#' package = "reneeTools"
#' )
#' )
#' run_deseq2(rds, ~condition)
run_deseq2 <- function(renee_ds, design) {
dds <- DESeq2::DESeqDataSetFromMatrix(
renee_ds$counts,
renee_ds$sample_meta,
design
)
renee_ds$deseq2 <- DESeq2::DESeq(dds)
return(renee_ds)
}
22 changes: 22 additions & 0 deletions R/metadata.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#' Convert sample metadata from a tibble to a dataframe with sample IDs as row names
#'
#' @param meta_tbl tibble with `sample_id` column
#'
#' @return dataframe where row names are the sample IDs
#' @export
#'
#' @examples
#' sample_meta_tbl <- readr::read_tsv(system.file("extdata",
#' "sample_metadata.tsv",
#' package = "reneeTools"
#' ))
#' head(sample_meta_tbl)
#' meta_tbl_to_dat(sample_meta_tbl)
meta_tbl_to_dat <- function(meta_tbl) {
sample_id <- NULL
meta_dat <- meta_tbl %>%
as.data.frame() %>%
dplyr::select(-sample_id)
rownames(meta_dat) <- meta_tbl %>% dplyr::pull(sample_id)
return(meta_dat)
}
34 changes: 34 additions & 0 deletions R/renee-class.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
reneeDataSet <- S7::new_class("renee",
parent = S7::class_list,
constructor = function(count_matrix, sample_meta_dat) {
S7::new_object(list(
counts = count_matrix,
sample_meta = sample_meta_dat
))
}
)

#' Create a reneeDataSet object from TSV files
#'
#' @param gene_counts_filepath path to TSV file of expected gene counts from RSEM
#' @param sample_meta_filepath path to TSV file with sample IDs and metadata for differential analysis
#'
#' @return reneeDataSet object
#' @export
#'
#' @examples
#' reneeDataSetFromFiles(
#' system.file("extdata", "RSEM.genes.expected_count.all_samples.txt", package = "reneeTools"),
#' system.file("extdata", "sample_metadata.tsv", package = "reneeTools")
#' )
reneeDataSetFromFiles <- function(gene_counts_filepath, sample_meta_filepath) {
count_mat <- readr::read_tsv(gene_counts_filepath) %>%
counts_dat_to_matrix()
sample_meta_dat <- readr::read_tsv(sample_meta_filepath) %>%
meta_tbl_to_dat()

# sample IDs must be in the same order
assertthat::are_equal(colnames(count_mat), rownames(sample_meta_dat))

return(reneeDataSet(count_mat, sample_meta_dat))
}
9 changes: 9 additions & 0 deletions R/zzz.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# source: https://rconsortium.github.io/S7/articles/packages.html#method-registration
.onLoad <- function(...) {
S7::methods_register()
}

# enable usage of <S7_object>@name in package code
# source: https://rconsortium.github.io/S7/articles/packages.html#backward-compatibility
#' @rawNamespace if (getRversion() < "4.3.0") importFrom("S7", "@")
NULL
2 changes: 1 addition & 1 deletion data-raw/gene_counts.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# WT_S1.RSEM.genes.results was generated from running RENEE v2.5.3 on the test dataset https://github.com/CCBR/RENEE/tree/e08f7db6c6e638cfd330caa182f64665d2ef37fa/.tests
gene_counts <- readr::read_tsv("data-raw/RSEM.genes.expected_count.all_samples.txt")
gene_counts <- readr::read_tsv(system.file("inst", "extdata", "RSEM.genes.expected_count.all_samples.txt", package = "reneeTools"))
usethis::use_data(gene_counts, overwrite = TRUE)
5 changes: 5 additions & 0 deletions inst/extdata/sample_metadata.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
sample_id condition
KO_S3 knockout
KO_S4 knockout
WT_S1 wildtype
WT_S2 wildtype
20 changes: 20 additions & 0 deletions man/counts_dat_to_matrix.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 25 additions & 0 deletions man/meta_tbl_to_dat.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 0 additions & 19 deletions man/read_raw_counts.Rd

This file was deleted.

25 changes: 25 additions & 0 deletions man/reneeDataSetFromFiles.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 29 additions & 0 deletions man/run_deseq2.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 22f551a

Please sign in to comment.