diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index ee60853..0000000 Binary files a/.DS_Store and /dev/null differ diff --git a/.Rbuildignore b/.Rbuildignore deleted file mode 100644 index b46f55f..0000000 --- a/.Rbuildignore +++ /dev/null @@ -1,5 +0,0 @@ -^.*\.Rproj$ -^\.Rproj\.user$ -^\.github$ -^doc$ -^Meta$ diff --git a/.github/workflows/bioc-check.yaml b/.github/workflows/bioc-check.yaml index 163fbf1..96ec487 100644 --- a/.github/workflows/bioc-check.yaml +++ b/.github/workflows/bioc-check.yaml @@ -36,5 +36,6 @@ jobs: shell: Rscript {0} - name: Check BiocCheck results run: | + echo "total errors: $BC_ERRORS_TOTAL" if [[ $BC_ERRORS_TOTAL -gt 0 ]]; then exit 1; else echo "BiocCheck passed!"; fi shell: bash diff --git a/.gitignore b/.gitignore index 9e95317..0e6226c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,13 +1,19 @@ +scLANE.Rproj .Rproj.user .Rhistory .RData .Ruserdata .Rbuildignore inst/doc +inst/.DS_Store +inst/rmarkdown/.DS_Store +inst/rmarkdown/templates/.DS_Store +inst/rmarkdown/templates/Bacher_Group_HTML/.DS_Store codecov.yml /doc/ /Meta/ R/.DS_Store .Rproj -src/*.o -src/*.so +src/RcppExports.o +src/eigenMapMatMult.o +src/scLANE.so diff --git a/NAMESPACE b/NAMESPACE index 9bf61ff..b08da02 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,7 @@ export(embedGenes) export(enrichDynamicGenes) export(extractBreakpoints) export(fitGLMM) +export(geneProgramDrivers) export(geneProgramScoring) export(getFittedValues) export(getKnotDist) @@ -114,6 +115,7 @@ importFrom(stats,as.dist) importFrom(stats,as.formula) importFrom(stats,coef) importFrom(stats,convolve) +importFrom(stats,cor.test) importFrom(stats,cutree) importFrom(stats,deviance) importFrom(stats,fitted) diff --git a/R/.DS_Store b/R/.DS_Store deleted file mode 100644 index 5008ddf..0000000 Binary files a/R/.DS_Store and /dev/null differ diff --git a/R/embedGenes.R b/R/embedGenes.R index e75afdc..1c2c0f5 100644 --- a/R/embedGenes.R +++ b/R/embedGenes.R @@ -56,7 +56,8 @@ embedGenes <- function(smoothed.counts = NULL, init = "spectral", nn_method = "annoy", seed = random.seed, - n_threads = n.cores) + n_threads = n.cores, + verbose = FALSE) } else { smoothed_counts_umap <- uwot::umap(smoothed.counts, n_components = 2, @@ -65,7 +66,8 @@ embedGenes <- function(smoothed.counts = NULL, init = "spectral", nn_method = "annoy", seed = random.seed, - n_threads = n.cores) + n_threads = n.cores, + verbose = FALSE) } # clustering w/ silhouette score parameter tuning if (cluster.genes) { @@ -74,13 +76,13 @@ embedGenes <- function(smoothed.counts = NULL, k = k.param, type = "jaccard", BNPARAM = BiocNeighbors::AnnoyParam(distance = "Cosine"), - BPPARAM = BiocParallel::SnowParam(workers = n.cores)) + BPPARAM = BiocParallel::SnowParam(workers = n.cores, RNGseed = random.seed)) } else { smoothed_counts_snn <- bluster::makeSNNGraph(smoothed.counts, k = k.param, type = "jaccard", BNPARAM = BiocNeighbors::AnnoyParam(distance = "Cosine"), - BPPARAM = BiocParallel::SnowParam(workers = n.cores)) + BPPARAM = BiocParallel::SnowParam(workers = n.cores, RNGseed = random.seed)) } if (is.null(resolution.param)) { if (pca.init) { diff --git a/R/geneProgramDrivers.R b/R/geneProgramDrivers.R new file mode 100644 index 0000000..736ee50 --- /dev/null +++ b/R/geneProgramDrivers.R @@ -0,0 +1,71 @@ +#' Identify driver genes for a given gene program. +#' +#' @name geneProgramDrivers +#' @author Jack Leary +#' @importFrom Matrix Matrix +#' @importFrom purrr map reduce +#' @importFrom stats cor.test p.adjust +#' @importFrom dplyr arrange desc mutate filter +#' @description This function computes the correlation +#' @param expr.mat Either a \code{SingleCellExperiment} or \code{Seurat} object from which counts can be extracted, or a matrix of normalized counts with genes as rows & cells as columns. Defaults to NULL. +#' @param genes A character vector of genes to test. Defaults to NULL. +#' @param gene.program A vector of program scores as returned by \code{\link{geneProgramScoring}}. Defaults to NULL. +#' @param cor.method (Optional) The correlation method to be used. Defaults to "spearman". +#' @param fdr.cutoff (Optional) The FDR threshold for determining statistical significance. Defaults to 0.01. +#' @return Either a \code{Seurat} or \code{SingleCellExperiment} object if \code{expr.mat} is in either form, or a data.frame containing per-cell program scores if \code{expr.mat} is a matrix. +#' @seealso \code{\link{geneProgramScoring}} +#' @seealso \code{\link[stats]{cor.test}} +#' @export +#' @examples +#' data(sim_counts) +#' data(scLANE_models) +#' data(sim_pseudotime) +#' smoothed_dynamics <- smoothedCountsMatrix(scLANE_models, +#' pt = sim_pseudotime, +#' n.cores = 1L) +#' gene_embed <- embedGenes(smoothed_dynamics$Lineage_A, n.cores = 1L) +#' sim_counts <- geneProgramScoring(sim_counts, +#' genes = gene_embed$gene, +#' gene.clusters = gene_embed$leiden, +#' n.cores = 1L) +#' program_drivers <- geneProgramDrivers(sim_counts, +#' genes = gene_embed$gene, +#' gene.program = sim_counts$cluster_0, +#' fdr.cutoff = 0.05) + +geneProgramDrivers <- function(expr.mat = NULL, + genes = NULL, + gene.program = NULL, + cor.method = "spearman", + fdr.cutoff = 0.01) { + # check inputs + if (is.null(expr.mat) || is.null(genes) || is.null(gene.program)) { stop("Arguments to geneProgramDrivers() are missing.") } + # set up counts matrix + if (inherits(expr.mat, "SingleCellExperiment")) { + counts_matrix <- SingleCellExperiment::logcounts(expr.mat) + } else if (inherits(expr.mat, "Seurat")) { + counts_matrix <- Seurat::GetAssayData(expr.mat, + slot = "data", + assay = Seurat::DefaultAssay(expr.mat)) + } else if (inherits(expr.mat, "dgCMatrix")) { + counts_matrix <- Matrix::Matrix(expr.mat, sparse = FALSE) + } + # iteratively compute correlations + cor_tests <- purrr::map(genes, \(g) { + cor_res <- stats::cor.test(counts_matrix[g, ], + gene.program, + method = "spearman", + exact = FALSE) + cor_df <- data.frame(gene = g, + corr = unname(cor_res$estimate), + pvalue = cor_res$p.value) + return(cor_df) + }) + cor_tests <- purrr::reduce(cor_tests, rbind) + cor_tests <- dplyr::arrange(cor_tests, + pvalue, + dplyr::desc(abs(corr))) %>% + dplyr::mutate(pvalue_adj = stats::p.adjust(pvalue, method = "holm")) %>% + dplyr::filter(pvalue_adj < fdr.cutoff) + return(cor_tests) +} diff --git a/R/geneProgramScoring.R b/R/geneProgramScoring.R index 5bd091b..c09f8f5 100644 --- a/R/geneProgramScoring.R +++ b/R/geneProgramScoring.R @@ -3,13 +3,14 @@ #' @name geneProgramScoring #' @author Jack Leary #' @importFrom Matrix Matrix -#' @description This function uses \code{\link[UCell]{ScoreSignatures_UCell}} to create a per-cell module score for each of the provided gene clusters. If the +#' @description This function uses \code{\link[UCell]{ScoreSignatures_UCell}} to create a per-cell module score for each of the provided gene clusters. If the input matrix is a \code{Seurat} or \code{SingleCellExperiment} object, then the resulting scores will be added to the \code{meta.data} or the \code{colData} slot, respectively. Otherwise, a data.frame of the per-program scores is returned. #' @param expr.mat Either a \code{SingleCellExperiment} or \code{Seurat} object from which counts can be extracted, or a matrix of integer-valued counts with genes as rows & cells as columns. Defaults to NULL. #' @param genes A character vector of gene IDs. Defaults to NULL. #' @param gene.clusters A factor containing the cluster assignment of each gene in \code{genes}. Defaults to NULL. #' @param program.labels (Optional) A character vector specifying a label for each gene cluster. Defaults to NULL. #' @param n.cores (Optional) The number of cores used under the hood in \code{\link[UCell]{ScoreSignatures_UCell}}. Defaults to 2. #' @return Either a \code{Seurat} or \code{SingleCellExperiment} object if \code{expr.mat} is in either form, or a data.frame containing per-cell program scores if \code{expr.mat} is a matrix. +#' @seealso \code{\link{geneProgramDrivers}} #' @export #' @examples #' data(sim_counts) diff --git a/R/GetResultsDE.R b/R/getResultsDE.R similarity index 100% rename from R/GetResultsDE.R rename to R/getResultsDE.R diff --git a/R/ModelLRT.R b/R/modelLRT.R similarity index 100% rename from R/ModelLRT.R rename to R/modelLRT.R diff --git a/codecov.yml b/codecov.yml deleted file mode 100644 index bb2358a..0000000 --- a/codecov.yml +++ /dev/null @@ -1,20 +0,0 @@ -comment: false -language: R -sudo: false -token: 22c87571-03cf-4f2e-a913-0bdf858e606b -cache: packages -after_success: -- Rscript -e 'covr::codecov()' - -coverage: - status: - project: - default: - target: auto - threshold: 1% - informational: true - patch: - default: - target: auto - threshold: 1% - informational: true diff --git a/inst/.DS_Store b/inst/.DS_Store deleted file mode 100644 index 366ba3e..0000000 Binary files a/inst/.DS_Store and /dev/null differ diff --git a/inst/rmarkdown/.DS_Store b/inst/rmarkdown/.DS_Store deleted file mode 100644 index 72fada9..0000000 Binary files a/inst/rmarkdown/.DS_Store and /dev/null differ diff --git a/inst/rmarkdown/templates/.DS_Store b/inst/rmarkdown/templates/.DS_Store deleted file mode 100644 index cde3e62..0000000 Binary files a/inst/rmarkdown/templates/.DS_Store and /dev/null differ diff --git a/inst/rmarkdown/templates/Bacher_Group_HTML/.DS_Store b/inst/rmarkdown/templates/Bacher_Group_HTML/.DS_Store deleted file mode 100644 index a5921d1..0000000 Binary files a/inst/rmarkdown/templates/Bacher_Group_HTML/.DS_Store and /dev/null differ diff --git a/man/geneProgramDrivers.Rd b/man/geneProgramDrivers.Rd new file mode 100644 index 0000000..6f18360 --- /dev/null +++ b/man/geneProgramDrivers.Rd @@ -0,0 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/geneProgramDrivers.R +\name{geneProgramDrivers} +\alias{geneProgramDrivers} +\title{Identify driver genes for a given gene program.} +\usage{ +geneProgramDrivers( + expr.mat = NULL, + genes = NULL, + gene.program = NULL, + cor.method = "spearman", + fdr.cutoff = 0.01 +) +} +\arguments{ +\item{expr.mat}{Either a \code{SingleCellExperiment} or \code{Seurat} object from which counts can be extracted, or a matrix of normalized counts with genes as rows & cells as columns. Defaults to NULL.} + +\item{genes}{A character vector of genes to test. Defaults to NULL.} + +\item{gene.program}{A vector of program scores as returned by \code{\link{geneProgramScoring}}. Defaults to NULL.} + +\item{cor.method}{(Optional) The correlation method to be used. Defaults to "spearman".} + +\item{fdr.cutoff}{(Optional) The FDR threshold for determining statistical significance. Defaults to 0.01.} +} +\value{ +Either a \code{Seurat} or \code{SingleCellExperiment} object if \code{expr.mat} is in either form, or a data.frame containing per-cell program scores if \code{expr.mat} is a matrix. +} +\description{ +This function computes the correlation +} +\examples{ +data(sim_counts) +data(scLANE_models) +data(sim_pseudotime) +smoothed_dynamics <- smoothedCountsMatrix(scLANE_models, + pt = sim_pseudotime, + n.cores = 1L) +gene_embed <- embedGenes(smoothed_dynamics$Lineage_A, n.cores = 1L) +sim_counts <- geneProgramScoring(sim_counts, + genes = gene_embed$gene, + gene.clusters = gene_embed$leiden, + n.cores = 1L) +program_drivers <- geneProgramDrivers(sim_counts, + genes = gene_embed$gene, + gene.program = sim_counts$cluster_0, + fdr.cutoff = 0.05) +} +\seealso{ +\code{\link{geneProgramScoring}} + +\code{\link[stats]{cor.test}} +} +\author{ +Jack Leary +} diff --git a/man/geneProgramScoring.Rd b/man/geneProgramScoring.Rd index a807b12..e78a11f 100644 --- a/man/geneProgramScoring.Rd +++ b/man/geneProgramScoring.Rd @@ -27,7 +27,7 @@ geneProgramScoring( Either a \code{Seurat} or \code{SingleCellExperiment} object if \code{expr.mat} is in either form, or a data.frame containing per-cell program scores if \code{expr.mat} is a matrix. } \description{ -This function uses \code{\link[UCell]{ScoreSignatures_UCell}} to create a per-cell module score for each of the provided gene clusters. If the +This function uses \code{\link[UCell]{ScoreSignatures_UCell}} to create a per-cell module score for each of the provided gene clusters. If the input matrix is a \code{Seurat} or \code{SingleCellExperiment} object, then the resulting scores will be added to the \code{meta.data} or the \code{colData} slot, respectively. Otherwise, a data.frame of the per-program scores is returned. } \examples{ data(sim_counts) @@ -42,6 +42,9 @@ sim_counts <- geneProgramScoring(sim_counts, gene.clusters = gene_embed$leiden, n.cores = 1L) } +\seealso{ +\code{\link{geneProgramDrivers}} +} \author{ Jack Leary } diff --git a/scLANE.Rproj b/scLANE.Rproj deleted file mode 100644 index 497f8bf..0000000 --- a/scLANE.Rproj +++ /dev/null @@ -1,20 +0,0 @@ -Version: 1.0 - -RestoreWorkspace: Default -SaveWorkspace: Default -AlwaysSaveHistory: Default - -EnableCodeIndexing: Yes -UseSpacesForTab: Yes -NumSpacesForTab: 2 -Encoding: UTF-8 - -RnwWeave: Sweave -LaTeX: pdfLaTeX - -AutoAppendNewline: Yes -StripTrailingWhitespace: Yes - -BuildType: Package -PackageUseDevtools: Yes -PackageInstallArgs: --no-multiarch --with-keep.source diff --git a/tests/testthat/test_scLANE.R b/tests/testthat/test_scLANE.R index b97b424..39e65fc 100644 --- a/tests/testthat/test_scLANE.R +++ b/tests/testthat/test_scLANE.R @@ -226,6 +226,13 @@ withr::with_output_sink(tempfile(), { sim_data_seu <- geneProgramScoring(sim_data_seu, genes = gene_embedding$gene, gene.clusters = gene_embedding$leiden) + # gene program drivers + program_drivers <- geneProgramDrivers(sim_data, + genes = gene_embedding$gene, + gene.program = sim_data$cluster_0) + program_drivers_seu <- geneProgramDrivers(sim_data_seu, + genes = gene_embedding$gene, + gene.program = sim_data_seu$cluster_0) # enrichment analysis gsea_res <- enrichDynamicGenes(glm_test_results, species = "hsapiens") # coefficients @@ -399,6 +406,13 @@ test_that("geneProgramScoring() output", { expect_equal(colnames(sim_data_seu@meta.data)[11], "cluster_1") }) +test_that("geneProgramDrivers() output", { + expect_s3_class(program_drivers, "data.frame") + expect_s3_class(program_drivers_seu, "data.frame") + expect_equal(ncol(program_drivers), 4) + expect_equal(ncol(program_drivers_seu), 4) +}) + test_that("sortGenesHeatmap() output", { expect_type(sorted_genes, "character") expect_length(sorted_genes, ncol(smoothed_counts$Lineage_A))