diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9693bf4..c2a3d98 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,7 +4,7 @@ exclude: | (?x)( ^assets/| ^docs/.*.html| - ^data-raw/*.txt| + ^inst/extdata| ^man/ ) repos: diff --git a/DESCRIPTION b/DESCRIPTION index 40c34ff..29853e6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,6 +19,7 @@ Imports: assertthat, DESeq2, dplyr, + S7, tidyr Suggests: readr, diff --git a/NAMESPACE b/NAMESPACE index 024a199..fd4a4c4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,11 @@ # Generated by roxygen2: do not edit by hand export("%>%") +export(counts_dat_to_matrix) export(create_deseq_obj) export(filter_low_counts) -export(read_raw_counts) +export(meta_tbl_to_dat) +export(reneeDataSetFromFiles) +export(run_deseq2) +if (getRversion() < "4.3.0") importFrom("S7", "@") importFrom(dplyr,"%>%") diff --git a/R/counts.R b/R/counts.R new file mode 100644 index 0000000..8bb3a9b --- /dev/null +++ b/R/counts.R @@ -0,0 +1,25 @@ +#' Convert a data frame of gene counts to a matrix +#' +#' @param counts_tbl expected gene counts from RSEM as a data frame or tibble. +#' +#' @return matrix of gene counts with rows as gene IDs +#' @export +#' +#' @examples +#' counts_dat_to_matrix(head(gene_counts)) +counts_dat_to_matrix <- function(counts_tbl) { + gene_id <- GeneName <- NULL + counts_dat <- counts_tbl %>% + # deseq2 requires integer counts + dplyr::mutate(dplyr::across( + dplyr::where(is.numeric), + \(x) as.integer(round(x, 0)) + )) %>% + as.data.frame() + row.names(counts_dat) <- counts_dat %>% dplyr::pull("gene_id") + # convert counts tibble to matrix + counts_mat <- counts_dat %>% + dplyr::select(-c(gene_id, GeneName)) %>% + as.matrix() + return(counts_mat) +} diff --git a/R/deseq2.R b/R/deseq2.R index c6459e8..ccf84bf 100644 --- a/R/deseq2.R +++ b/R/deseq2.R @@ -16,22 +16,40 @@ #' ) #' dds <- create_deseq_obj(gene_counts, sample_meta, ~condition) create_deseq_obj <- function(counts_tbl, meta_dat, design) { - gene_id <- GeneName <- NULL - counts_dat <- counts_tbl %>% - # deseq2 requires integer counts - dplyr::mutate(dplyr::across( - dplyr::where(is.numeric), - \(x) as.integer(round(x, 0)) - )) %>% - as.data.frame() - row.names(counts_dat) <- counts_dat %>% dplyr::pull("gene_id") # convert counts tibble to matrix - counts_mat <- counts_dat %>% - dplyr::select(-c(gene_id, GeneName)) %>% - as.matrix() + counts_mat <- counts_dat_to_matrix(counts_tbl) # sample IDs must be in the same order assertthat::are_equal(colnames(counts_mat), rownames(meta_dat)) return(DESeq2::DESeqDataSetFromMatrix(counts_mat, meta_dat, design)) } + +#' Run DESeq2 on a reneeDataSet +#' +#' @param renee_ds reneeDataSet object +#' @param design model formula for experimental design. Columns must exist in `meta_dat`. +#' +#' @return reneeDataSet object with DESeq2 slot filled +#' @export +#' +#' @examples +#' rds <- reneeDataSetFromFiles( +#' system.file("extdata", +#' "RSEM.genes.expected_count.all_samples.txt", +#' package = "reneeTools" +#' ), +#' system.file("extdata", "sample_metadata.tsv", +#' package = "reneeTools" +#' ) +#' ) +#' run_deseq2(rds, ~condition) +run_deseq2 <- function(renee_ds, design) { + dds <- DESeq2::DESeqDataSetFromMatrix( + renee_ds$counts, + renee_ds$sample_meta, + design + ) + renee_ds$deseq2 <- DESeq2::DESeq(dds) + return(renee_ds) +} diff --git a/R/metadata.R b/R/metadata.R new file mode 100644 index 0000000..730edad --- /dev/null +++ b/R/metadata.R @@ -0,0 +1,22 @@ +#' Convert sample metadata from a tibble to a dataframe with sample IDs as row names +#' +#' @param meta_tbl tibble with `sample_id` column +#' +#' @return dataframe where row names are the sample IDs +#' @export +#' +#' @examples +#' sample_meta_tbl <- readr::read_tsv(system.file("extdata", +#' "sample_metadata.tsv", +#' package = "reneeTools" +#' )) +#' head(sample_meta_tbl) +#' meta_tbl_to_dat(sample_meta_tbl) +meta_tbl_to_dat <- function(meta_tbl) { + sample_id <- NULL + meta_dat <- meta_tbl %>% + as.data.frame() %>% + dplyr::select(-sample_id) + rownames(meta_dat) <- meta_tbl %>% dplyr::pull(sample_id) + return(meta_dat) +} diff --git a/R/renee-class.R b/R/renee-class.R new file mode 100644 index 0000000..aab1ac0 --- /dev/null +++ b/R/renee-class.R @@ -0,0 +1,34 @@ +reneeDataSet <- S7::new_class("renee", + parent = S7::class_list, + constructor = function(count_matrix, sample_meta_dat) { + S7::new_object(list( + counts = count_matrix, + sample_meta = sample_meta_dat + )) + } +) + +#' Create a reneeDataSet object from TSV files +#' +#' @param gene_counts_filepath path to TSV file of expected gene counts from RSEM +#' @param sample_meta_filepath path to TSV file with sample IDs and metadata for differential analysis +#' +#' @return reneeDataSet object +#' @export +#' +#' @examples +#' reneeDataSetFromFiles( +#' system.file("extdata", "RSEM.genes.expected_count.all_samples.txt", package = "reneeTools"), +#' system.file("extdata", "sample_metadata.tsv", package = "reneeTools") +#' ) +reneeDataSetFromFiles <- function(gene_counts_filepath, sample_meta_filepath) { + count_mat <- readr::read_tsv(gene_counts_filepath) %>% + counts_dat_to_matrix() + sample_meta_dat <- readr::read_tsv(sample_meta_filepath) %>% + meta_tbl_to_dat() + + # sample IDs must be in the same order + assertthat::are_equal(colnames(count_mat), rownames(sample_meta_dat)) + + return(reneeDataSet(count_mat, sample_meta_dat)) +} diff --git a/R/zzz.R b/R/zzz.R new file mode 100644 index 0000000..3a6f2a2 --- /dev/null +++ b/R/zzz.R @@ -0,0 +1,9 @@ +# source: https://rconsortium.github.io/S7/articles/packages.html#method-registration +.onLoad <- function(...) { + S7::methods_register() +} + +# enable usage of @name in package code +# source: https://rconsortium.github.io/S7/articles/packages.html#backward-compatibility +#' @rawNamespace if (getRversion() < "4.3.0") importFrom("S7", "@") +NULL diff --git a/data-raw/gene_counts.R b/data-raw/gene_counts.R index 6b68dc8..36d2942 100644 --- a/data-raw/gene_counts.R +++ b/data-raw/gene_counts.R @@ -1,3 +1,3 @@ # WT_S1.RSEM.genes.results was generated from running RENEE v2.5.3 on the test dataset https://github.com/CCBR/RENEE/tree/e08f7db6c6e638cfd330caa182f64665d2ef37fa/.tests -gene_counts <- readr::read_tsv("data-raw/RSEM.genes.expected_count.all_samples.txt") +gene_counts <- readr::read_tsv(system.file("inst", "extdata", "RSEM.genes.expected_count.all_samples.txt", package = "reneeTools")) usethis::use_data(gene_counts, overwrite = TRUE) diff --git a/data-raw/RSEM.genes.expected_count.all_samples.txt b/inst/extdata/RSEM.genes.expected_count.all_samples.txt similarity index 100% rename from data-raw/RSEM.genes.expected_count.all_samples.txt rename to inst/extdata/RSEM.genes.expected_count.all_samples.txt diff --git a/inst/extdata/sample_metadata.tsv b/inst/extdata/sample_metadata.tsv new file mode 100644 index 0000000..48f3de4 --- /dev/null +++ b/inst/extdata/sample_metadata.tsv @@ -0,0 +1,5 @@ +sample_id condition +KO_S3 knockout +KO_S4 knockout +WT_S1 wildtype +WT_S2 wildtype diff --git a/man/counts_dat_to_matrix.Rd b/man/counts_dat_to_matrix.Rd new file mode 100644 index 0000000..7f4917a --- /dev/null +++ b/man/counts_dat_to_matrix.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/counts.R +\name{counts_dat_to_matrix} +\alias{counts_dat_to_matrix} +\title{Convert a data frame of gene counts to a matrix} +\usage{ +counts_dat_to_matrix(counts_tbl) +} +\arguments{ +\item{counts_tbl}{expected gene counts from RSEM as a data frame or tibble.} +} +\value{ +matrix of gene counts with rows as gene IDs +} +\description{ +Convert a data frame of gene counts to a matrix +} +\examples{ +counts_dat_to_matrix(head(gene_counts)) +} diff --git a/man/meta_tbl_to_dat.Rd b/man/meta_tbl_to_dat.Rd new file mode 100644 index 0000000..2d043f5 --- /dev/null +++ b/man/meta_tbl_to_dat.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/metadata.R +\name{meta_tbl_to_dat} +\alias{meta_tbl_to_dat} +\title{Convert sample metadata from a tibble to a dataframe with sample IDs as row names} +\usage{ +meta_tbl_to_dat(meta_tbl) +} +\arguments{ +\item{meta_tbl}{tibble with \code{sample_id} column} +} +\value{ +dataframe where row names are the sample IDs +} +\description{ +Convert sample metadata from a tibble to a dataframe with sample IDs as row names +} +\examples{ +sample_meta_tbl <- readr::read_tsv(system.file("extdata", + "sample_metadata.tsv", + package = "reneeTools") +) +head(sample_meta_tbl) +meta_tbl_to_dat(sample_meta_tbl) +} diff --git a/man/read_raw_counts.Rd b/man/read_raw_counts.Rd deleted file mode 100644 index fb9fb09..0000000 --- a/man/read_raw_counts.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/read_raw_counts.R -\name{read_raw_counts} -\alias{read_raw_counts} -\title{read_raw_counts} -\usage{ -read_raw_counts(pathtofile, delimiter = "\\t") -} -\arguments{ -\item{pathtofile}{string absolute file path to raw counts matrix} - -\item{delimiter}{string default is tab} -} -\value{ -raw_counts_matrix -} -\description{ -read_raw_counts -} diff --git a/man/reneeDataSetFromFiles.Rd b/man/reneeDataSetFromFiles.Rd new file mode 100644 index 0000000..fe98dcd --- /dev/null +++ b/man/reneeDataSetFromFiles.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/renee-class.R +\name{reneeDataSetFromFiles} +\alias{reneeDataSetFromFiles} +\title{Create a reneeDataSet object from TSV files} +\usage{ +reneeDataSetFromFiles(gene_counts_filepath, sample_meta_filepath) +} +\arguments{ +\item{gene_counts_filepath}{path to TSV file of expected gene counts from RSEM} + +\item{sample_meta_filepath}{path to TSV file with sample IDs and metadata for differential analysis} +} +\value{ +reneeDataSet object +} +\description{ +Create a reneeDataSet object from TSV files +} +\examples{ +reneeDataSetFromFiles( + system.file("extdata", "RSEM.genes.expected_count.all_samples.txt", package = "reneeTools"), + system.file("extdata", "sample_metadata.tsv", package = "reneeTools") +) +} diff --git a/man/run_deseq2.Rd b/man/run_deseq2.Rd new file mode 100644 index 0000000..a02842b --- /dev/null +++ b/man/run_deseq2.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/deseq2.R +\name{run_deseq2} +\alias{run_deseq2} +\title{Run DESeq2 on a reneeDataSet} +\usage{ +run_deseq2(renee_ds, design) +} +\arguments{ +\item{renee_ds}{reneeDataSet object} + +\item{design}{model formula for experimental design. Columns must exist in \code{meta_dat}.} +} +\value{ +reneeDataSet object with DESeq2 slot filled +} +\description{ +Run DESeq2 on a reneeDataSet +} +\examples{ +rds <- reneeDataSetFromFiles( + system.file("extdata", + "RSEM.genes.expected_count.all_samples.txt", + package = "reneeTools"), + system.file("extdata", "sample_metadata.tsv", + package = "reneeTools") +) +run_deseq2(rds, ~condition) +}