Merge pull request #142 from jr-leary7/dev

Dev
jr-leary7 · Oct 18, 2023 · bf36806 · bf36806
2 parents 9246a99 + b38cdf2
commit bf36806
Show file tree

Hide file tree

Showing 10 changed files with 184 additions and 33 deletions.
diff --git a/.github/workflows/render-README.yaml b/.github/workflows/render-README.yaml
@@ -22,15 +22,13 @@ jobs:
       - uses: r-lib/actions/setup-pandoc@v2
 
       - name: install CRAN packages
-        run: Rscript -e 'install.packages(c("rmarkdown","ggplot2", "dplyr", "purrr", "remotes"))'
+        run: Rscript -e 'install.packages(c("rmarkdown","ggplot2", "dplyr", "purrr", "remotes", "BiocManager"))'
       - name: install BioConductor packages
-        run: Rscript -e 'install.packages("BiocManager"); BiocManager::install(c("SingleCellExperiment", "scater", "scran")'
+        run: Rscript -e 'BiocManager::install(c("SingleCellExperiment", "scater", "scran"))'
       - name: install GitHub packages
         run: Rscript -e 'remotes::install_github("jr-leary7/scLANE"); remotes::install_github("rhondabacher/scaffold")'
-
       - name: render README
         run: Rscript -e 'rmarkdown::render("README.Rmd", output_format = "md_document")'
-
       - name: commit rendered README
         run: |
           git add README.md man/figures/README-*

diff --git a/NAMESPACE b/NAMESPACE
@@ -12,6 +12,7 @@ export(getResultsDE)
 export(marge2)
 export(modelLRT)
 export(nbGAM)
+export(npConvolve)
 export(plotClusteredGenes)
 export(plotModels)
 export(smoothedCountsMatrix)
@@ -104,6 +105,7 @@ importFrom(splines,bs)
 importFrom(stats,as.dist)
 importFrom(stats,as.formula)
 importFrom(stats,coef)
+importFrom(stats,convolve)
 importFrom(stats,cutree)
 importFrom(stats,deviance)
 importFrom(stats,fitted)

diff --git a/R/embedGenes.R b/R/embedGenes.R
@@ -1,4 +1,4 @@
-#' Generate a table of fitted values and celltype metadata for genes of interest.
+#' Generate PCA & UMAP embeddings of fitted gene dynamics.
 #'
 #' @name embedGenes
 #' @author Jack Leary
@@ -9,8 +9,9 @@
 #' @description Embed genes in dimension-reduced space given a smoothed counts matrix.
 #' @param smoothed.counts The output from \code{\link{smoothedCountsMatrix}}. Defaults to NULL.
 #' @param genes A character vector of genes to embed. If not specified, all genes in \code{smoothed.counts} are used. Defaults to NULL.
+#' @param pca.init A boolean specifying whether or not the embedded PCs should be used as initialization for clustering and UMAP. The default is to cluster / embed the raw dynamics i.e., defaults to FALSE.
 #' @param pc.embed (Optional) How many PCs should be used to cluster the genes and run UMAP? Defaults to 30.
-#' @param pcs.return (Optional) How many principal components should be included in the output? Defaults to 2.
+#' @param pc.return (Optional) How many principal components should be included in the output? Defaults to 2.
 #' @param cluster.genes (Optional) Should genes be clustered in PCA space using the Leiden algorithm? Defaults to TRUE.
 #' @param gene.meta.data (Optional) A data.frame of metadata values for each gene (HVG status, Ensembl ID, gene biotype, etc.) that will be included in the result table. Defaults to NULL.
 #' @param k.param (Optional) The value of nearest-neighbors used in creating the SNN graph prior to clustering & in running UMAP. Defaults to 20.
@@ -27,8 +28,9 @@
 
 embedGenes <- function(smoothed.counts = NULL,
                        genes = NULL,
+                       pca.init = FALSE,
                        pc.embed = 30,
-                       pcs.return = 2,
+                       pc.return = 2,
                        cluster.genes = TRUE,
                        gene.meta.data = NULL,
                        k.param = 20,
@@ -44,20 +46,41 @@ embedGenes <- function(smoothed.counts = NULL,
                                              n = pc.embed,
                                              center = TRUE,
                                              scale. = TRUE)
-  smoothed_counts_umap <- uwot::umap(smoothed_counts_pca$x,
-                                     n_components = 2,
-                                     metric = "cosine",
-                                     n_neighbors = k.param,
-                                     init = "spectral")
+  if (pca.init) {
+    smoothed_counts_umap <- uwot::umap(smoothed_counts_pca$x,
+                                       n_components = 2,
+                                       metric = "cosine",
+                                       n_neighbors = k.param,
+                                       init = "spectral",
+                                       nn_method = "annoy")
+  } else {
+    smoothed_counts_umap <- uwot::umap(smoothed.counts,
+                                       n_components = 2,
+                                       metric = "cosine",
+                                       n_neighbors = k.param,
+                                       init = "spectral",
+                                       nn_method = "annoy")
+  }
   # clustering w/ silhouette score parameter tuning
   if (cluster.genes) {
-    smoothed_counts_snn <- bluster::makeSNNGraph(smoothed_counts_pca$x,
-                                                 k = k.param,
-                                                 type = "jaccard",
-                                                 BNPARAM = BiocNeighbors::AnnoyParam(distance = "Cosine"))
+    if (pca.init) {
+      smoothed_counts_snn <- bluster::makeSNNGraph(smoothed_counts_pca$x,
+                                                   k = k.param,
+                                                   type = "jaccard",
+                                                   BNPARAM = BiocNeighbors::AnnoyParam(distance = "Cosine"))
+    } else {
+      smoothed_counts_snn <- bluster::makeSNNGraph(smoothed.counts,
+                                                   k = k.param,
+                                                   type = "jaccard",
+                                                   BNPARAM = BiocNeighbors::AnnoyParam(distance = "Cosine"))
+    }
     if (is.null(resolution.param)) {
-      dist_matrix <- stats::as.dist(1 - coop::tcosine(x = smoothed_counts_pca$x))
-      clust_runs <- purrr::map(c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7), \(r) {
+      if (pca.init) {
+        dist_matrix <- stats::as.dist(1 - coop::tcosine(x = smoothed_counts_pca$x))
+      } else {
+        dist_matrix <- stats::as.dist(1 - coop::tcosine(x = smoothed.counts))
+      }
+      clust_runs <- purrr::map(seq(0.1, 0.7, by = 0.1), \(r) {
         smoothed_counts_clust <- igraph::cluster_leiden(smoothed_counts_snn,
                                                         objective_function = "modularity",
                                                         resolution_parameter = r)
@@ -83,8 +106,8 @@ embedGenes <- function(smoothed.counts = NULL,
     cluster_vec <- NA_integer_
   }
   # prepare results
-  pca_df <- as.data.frame(smoothed_counts_pca$x[, seq(pcs.return)])
-  colnames(pca_df) <- paste0("pc", seq(pcs.return))
+  pca_df <- as.data.frame(smoothed_counts_pca$x[, seq(pc.return)])
+  colnames(pca_df) <- paste0("pc", seq(pc.return))
   gene_df <- data.frame(gene = genes,
                         leiden = cluster_vec,
                         umap1 = smoothed_counts_umap[, 1],

diff --git a/R/npConvolve.R b/R/npConvolve.R
@@ -0,0 +1,44 @@
+#' Convolution that matches \code{np.convolve}.
+#'
+#' @name npConvolve
+#' @author Jack Leary
+#' @importFrom stats convolve
+#' @description Convolve a vector with a user-specified kernel. Can be useful for heatmap smoothing, weighted moving means, etc.
+#' @param x The vector to be convolved. Defaults to NULL.
+#' @param conv.kernel The kernel to be used in the convolution. If unspecified, defaults to a vector of \eqn{1 / n} where \eqn{n} is the length of the input vector. Defaults to NULL.
+#' @return A convolution with same length as the input vector.
+#' @details
+#' \itemize{
+#' \item The convolution here uses \code{\link[stats]{convolve}}, but creates the kernel and padding in such a way that it matches the output from `np.convolve` in Python's \code{numpy} matrix algebra package.
+#' }
+#' @seealso \code{\link[stats]{convolve}}
+#' @export
+#' @examples
+#' npConvolve(x = rnorm(20), conv.kernel = rep(1/5, 5))
+#'
+
+npConvolve <- function(x = NULL, conv.kernel = NULL) {
+  # check inputs & generate kernel if necessary
+  if (is.null(x)) { stop("Cannot convolve with no input.") }
+  n <- length(x)
+  if (is.null(conv.kernel)) {
+    conv.kernel <- rep(1, n) / n
+  }
+  m <- length(conv.kernel)
+  # convolution
+  padded_x <- c(rep(0, m %/% 2), x, rep(0, m %/% 2))
+  padded_conv <- stats::convolve(padded_x,
+                                 conv.kernel,
+                                 type = "open")
+  start_idx <- (length(padded_conv) - n) %/% 2 + 1
+  end_idx <- start_idx + n - 1
+  filtered_conv <- padded_conv[start_idx:end_idx]
+  # check convolution output
+  if (length(filtered_conv) != n) {
+    warning("Filtered convolution does not match original vector length, returning unfiltered convolution.")
+    res <- padded_conv
+  } else {
+    res <- filtered_conv
+  }
+  return(res)
+}
diff --git a/R/smoothedCountsMatrix.R b/R/smoothedCountsMatrix.R
@@ -12,6 +12,7 @@
 #' @param pt A data.frame of pseudotime values for each cell. Defaults to NULL.
 #' @param size.factor.offset (Optional) An offset to be used to rescale the fitted values. Can be generated easily with \code{\link{createCellOffset}}. No need to provide if the GEE backend was used. Defaults to NULL.
 #' @param genes (Optional) A character vector of genes with which to subset the results. Defaults to NULL.
+#' @param log1p.norm A boolean specifying whether the smoothed counts should be log1p-transformed after depth normalization. Defaults to FALSE.
 #' @param parallel.exec Should \code{furrr} be used to speed up execution? Defaults to TRUE.
 #' @param n.cores If parallel execution is desired, how many cores should be utilized? Defaults to 2.
 #' @return A list of matrices of smoothed counts, with each element of the list being a single pseudotime lineage.
@@ -31,6 +32,7 @@ smoothedCountsMatrix <- function(test.dyn.res = NULL,
                                  size.factor.offset = NULL,
                                  pt = NULL,
                                  genes = NULL,
+                                 log1p.norm = FALSE,
                                  parallel.exec = TRUE,
                                  n.cores = 2) {
   # check inputs
@@ -65,6 +67,9 @@ smoothedCountsMatrix <- function(test.dyn.res = NULL,
                        }) %>%
                        purrr::reduce(cbind)
     colnames(fitted_vals_mat) <- names(fitted_vals_list)
+    if (log1p.norm) {
+      fitted_vals_mat <- log1p(fitted_vals_mat)
+    }
     return(fitted_vals_mat)
   })
   names(lineage_mat_list) <- paste0("Lineage_", lineages)

diff --git a/README.Rmd b/README.Rmd
@@ -34,7 +34,7 @@ knitr::opts_chunk$set(message = FALSE,
 
 ## Installation
 
-You can install the most recent version of `scLANE` with:
+You can install the most recent version of `scLANE` using:
 
 ```{r install-scLANE, eval=FALSE}
 remotes::install_github("jr-leary7/scLANE")

diff --git a/man/embedGenes.Rd b/man/embedGenes.Rd
diff --git a/man/npConvolve.Rd b/man/npConvolve.Rd
diff --git a/man/smoothedCountsMatrix.Rd b/man/smoothedCountsMatrix.Rd