Skip to content

Commit

Permalink
added GAM-based fn to test relationship bt gene program and pseudotim…
Browse files Browse the repository at this point in the history
…e -- closes #183
  • Loading branch information
jr-leary7 committed Jan 3, 2024
1 parent 2e0eec5 commit 694d9dc
Show file tree
Hide file tree
Showing 9 changed files with 150 additions and 10 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export(extractBreakpoints)
export(fitGLMM)
export(geneProgramDrivers)
export(geneProgramScoring)
export(geneProgramSignificance)
export(getFittedValues)
export(getKnotDist)
export(getResultsDE)
Expand Down Expand Up @@ -71,6 +72,7 @@ importFrom(furrr,future_map)
importFrom(future,multisession)
importFrom(future,plan)
importFrom(future,sequential)
importFrom(gamlss,LR.test)
importFrom(gamlss,gamlss)
importFrom(gamlss,gamlss.control)
importFrom(gamlss,pb)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

+ Added `geneProgramDrivers()` function to compute & test correlations of expression with gene module scores.
+ Updated documentation & unit tests.
+ Added `geneProgramSignificance` function to estimate associations between gene program module scores and pseudotime.

# Changes in version 0.7.8

Expand Down
5 changes: 3 additions & 2 deletions R/geneProgramDrivers.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#' @importFrom doSNOW registerDoSNOW
#' @importFrom stats cor.test p.adjust
#' @importFrom dplyr arrange desc mutate filter
#' @description This function computes the correlation
#' @description This function computes the correlation between smoothed gene expression and gene program scores in order to identify genes are significantly associated with program scores i.e., the "drivers" of the gene program.
#' @param expr.mat Either a \code{SingleCellExperiment} or \code{Seurat} object from which counts can be extracted, or a matrix of normalized counts with genes as rows & cells as columns. Defaults to NULL.
#' @param genes A character vector of genes to test. Defaults to NULL.
#' @param gene.program A vector of program scores as returned by \code{\link{geneProgramScoring}}. Defaults to NULL.
Expand Down Expand Up @@ -39,7 +39,8 @@
#' program_drivers <- geneProgramDrivers(sim_counts,
#' genes = gene_embed$gene,
#' gene.program = sim_counts$cluster_0,
#' fdr.cutoff = 0.05)
#' fdr.cutoff = 0.05,
#' n.cores = 1L)

geneProgramDrivers <- function(expr.mat = NULL,
genes = NULL,
Expand Down
10 changes: 6 additions & 4 deletions R/geneProgramScoring.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
#' @param genes A character vector of gene IDs. Defaults to NULL.
#' @param gene.clusters A factor containing the cluster assignment of each gene in \code{genes}. Defaults to NULL.
#' @param program.labels (Optional) A character vector specifying a label for each gene cluster. Defaults to NULL.
#' @param minmax.norm (Optional) Should each program's score be min-max normalized to be on [0, 1]? Defaults to FALSE.
#' @param minmax.norm (Optional) Should each program's score be min-max normalized to be on (0, 1)? Defaults to TRUE.
#' @param minmax.epsilon (Optional) The tolerance used to ensure that program scores equal to 0 or 1 do not occur. Defaults to 0.01.
#' @param n.cores (Optional) The number of cores used under the hood in \code{\link[UCell]{ScoreSignatures_UCell}}. Defaults to 2.
#' @return Either a \code{Seurat} or \code{SingleCellExperiment} object if \code{expr.mat} is in either form, or a data.frame containing per-cell program scores if \code{expr.mat} is a matrix.
#' @seealso \code{\link[UCell]{ScoreSignatures_UCell}}
Expand All @@ -31,7 +32,8 @@ geneProgramScoring <- function(expr.mat = NULL,
genes = NULL,
gene.clusters = NULL,
program.labels = NULL,
minmax.norm = FALSE,
minmax.norm = TRUE,
minmax.epsilon = 1e-2,
n.cores = 2L) {
# check inputs
if (is.null(expr.mat) || is.null(genes) || is.null(gene.clusters)) { stop("Arguments to geneProgramScoring() are missing.") }
Expand Down Expand Up @@ -65,11 +67,11 @@ geneProgramScoring <- function(expr.mat = NULL,
program_scores <- UCell::ScoreSignatures_UCell(counts_matrix,
features = program_list,
ncores = n.cores)
# min-max normalize if desired
# min-max normalize with tol to (0, 1) if desired
if (minmax.norm) {
program_scores <- purrr::map(seq(ncol(program_scores)), \(i) {
scores <- program_scores[, i]
normed_scores <- (scores - min(scores)) / (max(scores) - min(scores))
normed_scores <- minmax.epsilon + (((scores - min(scores)) * (1 - 2 * minmax.epsilon)) / (max(scores) - min(scores)))
return(normed_scores)
})
program_scores <- purrr::reduce(program_scores, cbind)
Expand Down
71 changes: 71 additions & 0 deletions R/geneProgramSignificance.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#' Test significance of gene program enrichment across a trajectory.
#'
#' @name geneProgramSignificance
#' @author Jack Leary
#' @import magrittr
#' @importFrom purrr map reduce
#' @importFrom gamlss gamlss LR.test
#' @importFrom dplyr arrange desc mutate
#' @importFrom stats p.adjust
#' @description This function fits a Beta GAM with pseudotime as a covariate and gene program score as the response. The fitted model is then compared to a null, intercept-only model in order to determine the significance of the relationship between gene program and pseudotime.
#' @param gene.programs A list of vectors of program scores as returned by \code{\link{geneProgramScoring}}. Defaults to NULL.
#' @param pt A vector of pseudotime values for each cell. May contain NAs, which are handled internally. Defaults to NULL.
#' @param program.labels (Optional) A character vector specifying a label for each gene cluster. Defaults to NULL.
#' @param p.adj.method (Optional) The method used to adjust \emph{p}-values for multiple hypothesis testing. Defaults to "holm".
#' @return A table of statistical output showing the significance of the association between pseudotime and program scores.
#' @seealso \code{\link{geneProgramScoring}}
#' @export
#' @examples
#' data(sim_counts)
#' data(scLANE_models)
#' data(sim_pseudotime)
#' smoothed_dynamics <- smoothedCountsMatrix(scLANE_models,
#' pt = sim_pseudotime,
#' n.cores = 1L)
#' gene_embed <- embedGenes(smoothed_dynamics$Lineage_A, n.cores = 1L)
#' sim_counts <- geneProgramScoring(sim_counts,
#' genes = gene_embed$gene,
#' gene.clusters = gene_embed$leiden,
#' n.cores = 1L)
#' program_enrichment_stats <- geneProgramSignificance(list(sim_counts$cluster_0),
#' pt = sim_pseudotime$PT,
#' program.labels = c("Program Name"))

geneProgramSignificance <- function(gene.programs = NULL,
pt = NULL,
program.labels = NULL,
p.adj.method = "holm") {
# check inputs
if (is.null(gene.programs) || is.null(pt)) { stop("Inputs to geneProgramSignificance() are missing.") }
if (is.null(program.labels)) {
program.labels <- paste0("cluster_", seq(length(gene.programs)) - 1)
}
if (length(gene.programs) != length(program.labels)) { stop("program.labels must have the same length as gene.programs.") }
# iterate over gene programs and fit Beta GAMs
program_models <- purrr::map(seq(gene.programs), \(g) {
pseudotime_vec <- pt[!is.na(pt)]
program_score_vec <- gene.programs[[g]][!is.na(pt)]
alt_model <- gamlss::gamlss(program_score_vec ~ pseudotime_vec,
family = "BE",
data = NULL,
control = gamlss::gamlss.control(trace = FALSE))
null_model <- gamlss::gamlss(program_score_vec ~ 1,
family = "BE",
data = NULL,
control = gamlss::gamlss.control(trace = FALSE))
lr_test_res <- gamlss::LR.test(null = null_model,
alternative = alt_model,
print = FALSE)
res <- data.frame(Program = program.labels[g],
LRT_Stat = lr_test_res$chi,
DF = lr_test_res$df,
P_Value = lr_test_res$p.val)
return(res)
})
# prepare test results
program_sig_df <- purrr::reduce(program_models, rbind) %>%
dplyr::arrange(P_Value, dplyr::desc(LRT_Stat)) %>%
dplyr::mutate(P_Value = dplyr::if_else(P_Value == 0, 2e-16, P_Value),
P_Value_Adj = stats::p.adjust(P_Value, method = p.adj.method))
return(program_sig_df)
}
5 changes: 3 additions & 2 deletions man/geneProgramDrivers.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 5 additions & 2 deletions man/geneProgramScoring.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

50 changes: 50 additions & 0 deletions man/geneProgramSignificance.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions tests/testthat/test_scLANE.R
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,10 @@ withr::with_output_sink(tempfile(), {
sim_data_seu <- geneProgramScoring(sim_data_seu,
genes = gene_embedding$gene,
gene.clusters = gene_embedding$leiden)
# gene program significance
progam_significance <- geneProgramSignificance(list(sim_data$cluster_0),
pt = pt_test$PT,
program.labels = c("Cluster0"))
# gene program drivers
program_drivers <- geneProgramDrivers(sim_data,
genes = gene_embedding$gene,
Expand Down Expand Up @@ -407,6 +411,11 @@ test_that("geneProgramScoring() output", {
expect_equal(colnames(sim_data_seu@meta.data)[11], "cluster_1")
})

test_that("geneProgramSignificance() output", {
expect_s3_class(program_significance, "data.frame")
expect_equal(ncol(program_significance), 5)
})

test_that("geneProgramDrivers() output", {
expect_s3_class(program_drivers, "data.frame")
expect_s3_class(program_drivers_seu, "data.frame")
Expand Down

0 comments on commit 694d9dc

Please sign in to comment.