diff --git a/NAMESPACE b/NAMESPACE index a858874..c2bc909 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,6 +13,7 @@ export(nbGAM) export(plotClusteredGenes) export(plotModels) export(smoothedCountsMatrix) +export(sortGenesHeatmap) export(stripGLM) export(testDynamic) export(testSlope) @@ -48,6 +49,7 @@ importFrom(dplyr,rename) importFrom(dplyr,rowwise) importFrom(dplyr,sample_frac) importFrom(dplyr,select) +importFrom(dplyr,summarise) importFrom(dplyr,ungroup) importFrom(dplyr,with_groups) importFrom(foreach,"%dopar%") diff --git a/R/fitGLMM.R b/R/fitGLMM.R index 1d5e751..f9349b8 100644 --- a/R/fitGLMM.R +++ b/R/fitGLMM.R @@ -120,7 +120,7 @@ fitGLMM <- function(X_pred = NULL, } else { glmm_mod <- glmmTMB::glmmTMB(Y ~ X1 + X2 + X3 + X4 + (1 + X1 + X2 + X3 + X4 | subject), data = glmm_basis_df, - offset = log(Y.offset), + offset = log(1 / Y.offset), family = glmmTMB::nbinom2(link = "log"), se = TRUE, REML = FALSE) diff --git a/R/sortGenesHeatmap.R b/R/sortGenesHeatmap.R new file mode 100644 index 0000000..1a20711 --- /dev/null +++ b/R/sortGenesHeatmap.R @@ -0,0 +1,48 @@ +#' Generate a table of fitted values and celltype metadata for genes of interest. +#' +#' @name sortGenesHeatmap +#' @author Jack Leary +#' @import magrittr +#' @importFrom purrr map reduce +#' @importFrom dplyr filter distinct select with_groups summarise arrange pull +#' @description Sort genes such that genes with peak expression occurring earlier in pseudotime are first, and vice versa for genes with late peak expression. Useful for ordering genes in order to create heatmaps of expression cascades. +#' @param heatmap.mat A matrix of raw or smoothed expression values with genes as columns and cells as rows. Defaults to NULL. +#' @param pt.vec A numeric vector of pseudotime values for each cell i.e., for each row in the heatmap matrix. Defaults to NULL. +#' @return A character vector of genes sorted by their peak expression values over pseudotime. +#' @export +#' @examples +#' \dontrun{ +#' smoothed_counts <- smoothedCountsMatrix(gene_stats, pt = pt_df) +#' sortGenesheatmap(heatmap.mat = smoothed_counts$Lineage_A, +#' pt.vec = pt_df[!is.na(pt_df$Lineage_A), ]$Lineage_A) +#' } + +sortGenesHeatmap <- function(heatmap.mat = NULL, pt.vec = NULL) { + # check inputs + if (!inherits(heatmap.mat, "matrix")) { + heatmap.mat <- try(as.matrix(heatmap.mat), silent = TRUE) + if (inherits(heatmap.mat, "try-error")) { + stop("heatmap.mat must be coerceable to a matrix.") + } + } + if (!is.numeric(pt.vec) || any(is.na(pt.vec))) { stop("pt.vec must be a numeric vector with no NA values.") } + + # identify point at which peak expression occurs for each gene across pseudotime + gene_peak_order <- purrr::map(seq(ncol(heatmap.mat)), \(x) { + data.frame(gene = colnames(heatmap.mat)[x], + pt = pt.vec, + mRNA = heatmap.mat[, x]) %>% + dplyr::filter(mRNA == max(mRNA)) %>% + dplyr::distinct() %>% + dplyr::select(gene, + pt, + mRNA) + }) %>% + purrr::reduce(rbind) %>% + dplyr::with_groups(gene, + dplyr::summarise, + mu = mean(pt)) %>% + dplyr::arrange(mu) %>% + dplyr::pull(gene) + return(gene_peak_order) +} diff --git a/R/testDynamic.R b/R/testDynamic.R index eb05583..79b0867 100644 --- a/R/testDynamic.R +++ b/R/testDynamic.R @@ -512,7 +512,7 @@ testDynamic <- function(expr.mat = NULL, }, silent = TRUE) } else { null_sumy_df <- try({ - as.data.frame(broom.mixed::tidy(null_mod)) # saves a few bytes by converting from tibble + as.data.frame(broom.mixed::tidy(null_mod)) # saves a few bytes by converting from tibble }, silent = TRUE) null_pred_df <- try({ data.frame(stats::predict(null_mod, type = "link", se.fit = TRUE)[1:2]) %>% @@ -631,11 +631,11 @@ testDynamic <- function(expr.mat = NULL, total_time_numeric <- as.numeric(total_time) print(paste0("testDynamic evaluated ", length(genes), - " genes with ", + " genes across ", n_lineages, " ", ifelse(n_lineages == 1, "lineage ", "lineages "), - "apiece in ", + "in ", round(total_time_numeric, 3), " ", total_time_units)) diff --git a/man/sortGenesHeatmap.Rd b/man/sortGenesHeatmap.Rd new file mode 100644 index 0000000..712f3f1 --- /dev/null +++ b/man/sortGenesHeatmap.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sortGenesHeatmap.R +\name{sortGenesHeatmap} +\alias{sortGenesHeatmap} +\title{Generate a table of fitted values and celltype metadata for genes of interest.} +\usage{ +sortGenesHeatmap(heatmap.mat = NULL, pt.vec = NULL) +} +\arguments{ +\item{heatmap.mat}{A matrix of raw or smoothed expression values with genes as columns and cells as rows. Defaults to NULL.} + +\item{pt.vec}{A numeric vector of pseudotime values for each cell i.e., for each row in the heatmap matrix. Defaults to NULL.} +} +\value{ +A character vector of genes sorted by their peak expression values over pseudotime. +} +\description{ +Sort genes such that genes with peak expression occurring earlier in pseudotime are first, and vice versa for genes with late peak expression. Useful for ordering genes in order to create heatmaps of expression cascades. +} +\examples{ +\dontrun{ +smoothed_counts <- smoothedCountsMatrix(gene_stats, pt = pt_df) +sortGenesheatmap(heatmap.mat = smoothed_counts$Lineage_A, + pt.vec = pt_df[!is.na(pt_df$Lineage_A), ]$Lineage_A) +} +} +\author{ +Jack Leary +} diff --git a/tests/testthat/test_scLANE.R b/tests/testthat/test_scLANE.R index 9d29a43..74fa841 100644 --- a/tests/testthat/test_scLANE.R +++ b/tests/testthat/test_scLANE.R @@ -147,6 +147,8 @@ withr::with_output_sink(tempfile(), { size.factor.offset = cell_offset, parallel.exec = TRUE, n.cores = 2) + sorted_genes <- sortGenesHeatmap(heatmap.mat = smoothed_counts$Lineage_A, + pt.vec = pt_test$PT) fitted_values_table <- getFittedValues(test.dyn.res = glm_gene_stats, genes = names(glm_gene_stats), pt = pt_test, @@ -276,6 +278,11 @@ test_that("smoothedCountsMatrix() output", { expect_type(smoothed_counts$Lineage_A, "double") }) +test_that("sortGenesHeatmap() output", { + expect_type(sorted_genes, "character") + expect_length(sorted_genes, ncol(smoothed_counts$Lineage_A)) +}) + test_that("getFittedValues() output", { expect_s3_class(fitted_values_table, "data.frame") expect_equal(ncol(fitted_values_table), 17)