Skip to content

Commit

Permalink
Merge pull request #16 from CCBR/oop
Browse files Browse the repository at this point in the history
feat: create reneeDataSet class
  • Loading branch information
kelly-sovacool authored Dec 29, 2023
2 parents 7eb8c7e + b0739bc commit c8abe97
Show file tree
Hide file tree
Showing 21 changed files with 388 additions and 104 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ exclude: |
(?x)(
^assets/|
^docs/.*.html|
^data-raw/*.txt|
^inst/extdata|
^man/
)
repos:
Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ BugReports: https://github.com/CCBR/reneeTools/issues
Depends:
R (>= 2.10)
Imports:
assertthat,
DESeq2,
dplyr,
S7,
tidyr
Suggests:
readr,
Expand Down
8 changes: 6 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
# Generated by roxygen2: do not edit by hand

export("%>%")
export(create_deseq_obj)
export(counts_dat_to_matrix)
export(filter_low_counts)
export(read_raw_counts)
export(meta_tbl_to_dat)
export(reneeDataSetFromDataFrames)
export(reneeDataSetFromFiles)
export(run_deseq2)
if (getRversion() < "4.3.0") importFrom("S7", "@")
importFrom(dplyr,"%>%")
7 changes: 5 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@

- Create a `NEWS.md` file to track changes to the package.

## New functions
## Main functions & classes

- `reneeDataSet` (#16)
- `reneeDataSetFromFiles()` (#16)
- `reneeDataSetFromDataFrames()` (#16)
- `run_deseq2()` (#16)
- `filter_low_counts()` (#10)
- `create_deseq_obj()` (#13)
25 changes: 25 additions & 0 deletions R/counts.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#' Convert a data frame of gene counts to a matrix
#'
#' @param counts_tbl expected gene counts from RSEM as a data frame or tibble.
#'
#' @return matrix of gene counts with rows as gene IDs
#' @export
#'
#' @examples
#' counts_dat_to_matrix(head(gene_counts))
counts_dat_to_matrix <- function(counts_tbl) {
gene_id <- GeneName <- NULL
counts_dat <- counts_tbl %>%
# deseq2 requires integer counts
dplyr::mutate(dplyr::across(
dplyr::where(is.numeric),
\(x) as.integer(round(x, 0))
)) %>%
as.data.frame()
row.names(counts_dat) <- counts_dat %>% dplyr::pull("gene_id")
# convert counts tibble to matrix
counts_mat <- counts_dat %>%
dplyr::select(-c(gene_id, GeneName)) %>%
as.matrix()
return(counts_mat)
}
50 changes: 21 additions & 29 deletions R/deseq2.R
Original file line number Diff line number Diff line change
@@ -1,37 +1,29 @@
#' Create DESeq2 object from gene counts and sample metadata
#' Run DESeq2 on a reneeDataSet
#'
#' @param counts_tbl expected gene counts from RSEM as a data frame or tibble.
#' @param meta_dat sample metadata as a data frame with rownames as sample IDs.
#' @param design model formula for experimental design. Columns must exist in `meta_dat`.
#' @param renee_ds reneeDataSet object
#' @param design model formula for experimental design. Columns must exist in `meta_dat`.
#' @param ... remaining variables are forwarded to `DESeq2::DESeq()`.
#'
#' @return DESeqDataSet
#' @return reneeDataSet object with DESeq2 slot filled
#' @export
#'
#' @examples
#' sample_meta <- data.frame(
#' row.names = c("KO_S3", "KO_S4", "WT_S1", "WT_S2"),
#' condition = factor(c("knockout", "knockout", "wildtype", "wildtype"),
#' levels = c("wildtype", "knockout")
#' rds <- reneeDataSetFromFiles(
#' system.file("extdata",
#' "RSEM.genes.expected_count.all_samples.txt",
#' package = "reneeTools"
#' ),
#' system.file("extdata", "sample_metadata.tsv",
#' package = "reneeTools"
#' )
#' )
#' dds <- create_deseq_obj(gene_counts, sample_meta, ~condition)
create_deseq_obj <- function(counts_tbl, meta_dat, design) {
gene_id <- GeneName <- NULL
counts_dat <- counts_tbl %>%
# deseq2 requires integer counts
dplyr::mutate(dplyr::across(
dplyr::where(is.numeric),
\(x) as.integer(round(x, 0))
)) %>%
as.data.frame()
row.names(counts_dat) <- counts_dat %>% dplyr::pull("gene_id")
# convert counts tibble to matrix
counts_mat <- counts_dat %>%
dplyr::select(-c(gene_id, GeneName)) %>%
as.matrix()

# sample IDs must be in the same order
assertthat::are_equal(colnames(counts_mat), rownames(meta_dat))

return(DESeq2::DESeqDataSetFromMatrix(counts_mat, meta_dat, design))
#' run_deseq2(rds, ~condition)
run_deseq2 <- function(renee_ds, design, ...) {
dds <- DESeq2::DESeqDataSetFromMatrix(
renee_ds@counts,
renee_ds@sample_meta,
design
)
renee_ds@analyses$deseq2_ds <- DESeq2::DESeq(dds, ...)
return(renee_ds)
}
22 changes: 22 additions & 0 deletions R/metadata.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#' Convert sample metadata from a tibble to a dataframe with sample IDs as row names
#'
#' @param meta_tbl tibble with `sample_id` column
#'
#' @return dataframe where row names are the sample IDs
#' @export
#'
#' @examples
#' sample_meta_tbl <- readr::read_tsv(system.file("extdata",
#' "sample_metadata.tsv",
#' package = "reneeTools"
#' ))
#' head(sample_meta_tbl)
#' meta_tbl_to_dat(sample_meta_tbl)
meta_tbl_to_dat <- function(meta_tbl) {
sample_id <- NULL
meta_dat <- meta_tbl %>%
as.data.frame() %>%
dplyr::select(-sample_id)
rownames(meta_dat) <- meta_tbl %>% dplyr::pull(sample_id)
return(meta_dat)
}
64 changes: 64 additions & 0 deletions R/renee-class.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
reneeDataSet <- S7::new_class("renee",
properties = list(
counts = S7::new_S3_class("matrix"),
sample_meta = S7::class_data.frame,
analyses = S7::class_list
),
constructor = function(count_matrix, sample_meta_dat) {
S7::new_object(S7::S7_object(),
counts = count_matrix,
sample_meta = sample_meta_dat,
analyses = list()
)
}
)

#' Construct a reneeDataSet object from tsv files.
#'
#' @param gene_counts_filepath path to tsv file of expected gene counts from RSEM.
#' @param sample_meta_filepath path to tsv file with sample IDs and metadata for differential analysis.
#'
#' @return reneeDataSet object
#' @export
#'
#' @examples
#' reneeDataSetFromFiles(
#' system.file("extdata", "RSEM.genes.expected_count.all_samples.txt", package = "reneeTools"),
#' system.file("extdata", "sample_metadata.tsv", package = "reneeTools")
#' )
reneeDataSetFromFiles <- function(gene_counts_filepath, sample_meta_filepath) {
count_dat <- readr::read_tsv(gene_counts_filepath)
sample_meta_dat <- readr::read_tsv(sample_meta_filepath)
return(reneeDataSetFromDataFrames(count_dat, sample_meta_dat))
}

#' Construct a reneeDataSet object from data frames
#'
#' @param gene_counts_dat expected gene counts from RSEM as a data frame or tibble.
#' Must contain a `gene_id` column and a column for each sample ID in the metadata.
#' @param sample_meta_dat sample metadata as a data frame or tibble.
#' Must contain a `sample_ID` column.
#'
#' @return reneeDataSet object
#' @export
#'
#' @examples
#' sample_meta <- data.frame(
#' sample_id = c("KO_S3", "KO_S4", "WT_S1", "WT_S2"),
#' condition = factor(
#' c("knockout", "knockout", "wildtype", "wildtype"),
#' levels = c("wildtype", "knockout")
#' )
#' )
#' reneeDataSetFromDataFrames(gene_counts, sample_meta)
reneeDataSetFromDataFrames <- function(gene_counts_dat, sample_meta_dat) {
count_mat <- gene_counts_dat %>% counts_dat_to_matrix()
sample_meta_dat <- sample_meta_dat %>% meta_tbl_to_dat()

# sample IDs must be in the same order
if (!all(colnames(count_mat) == rownames(sample_meta_dat))) {
stop("Not all columns in the count matrix equal the rows in the sample metadata. Sample IDs must be in the same order.")
}

return(reneeDataSet(count_mat, sample_meta_dat))
}
9 changes: 9 additions & 0 deletions R/zzz.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# source: https://rconsortium.github.io/S7/articles/packages.html#method-registration
.onLoad <- function(...) {
S7::methods_register()
}

# enable usage of <S7_object>@name in package code
# source: https://rconsortium.github.io/S7/articles/packages.html#backward-compatibility
#' @rawNamespace if (getRversion() < "4.3.0") importFrom("S7", "@")
NULL
2 changes: 1 addition & 1 deletion data-raw/gene_counts.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# WT_S1.RSEM.genes.results was generated from running RENEE v2.5.3 on the test dataset https://github.com/CCBR/RENEE/tree/e08f7db6c6e638cfd330caa182f64665d2ef37fa/.tests
gene_counts <- readr::read_tsv("data-raw/RSEM.genes.expected_count.all_samples.txt")
gene_counts <- readr::read_tsv(system.file("inst", "extdata", "RSEM.genes.expected_count.all_samples.txt", package = "reneeTools"))
usethis::use_data(gene_counts, overwrite = TRUE)
5 changes: 5 additions & 0 deletions inst/extdata/sample_metadata.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
sample_id condition
KO_S3 knockout
KO_S4 knockout
WT_S1 wildtype
WT_S2 wildtype
20 changes: 20 additions & 0 deletions man/counts_dat_to_matrix.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 0 additions & 30 deletions man/create_deseq_obj.Rd

This file was deleted.

25 changes: 25 additions & 0 deletions man/meta_tbl_to_dat.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 0 additions & 19 deletions man/read_raw_counts.Rd

This file was deleted.

31 changes: 31 additions & 0 deletions man/reneeDataSetFromDataFrames.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 25 additions & 0 deletions man/reneeDataSetFromFiles.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit c8abe97

Please sign in to comment.