Merge pull request #134 from NIDAP-Community/fix_sankey

Fix sankey
NIDAP-Community · Oct 23, 2023 · b96dba8 · b96dba8
2 parents b1719d0 + d69b216
commit b96dba8
Show file tree

Hide file tree

Showing 13 changed files with 195 additions and 103 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -31,8 +31,8 @@ Imports:
     cowplot (>= 1.1.1),
     dplyr (>= 1.0.9),
     GeomxTools (>= 3.1.1),
-    ggforce (== 0.3.4),
-    ggplot2 (== 3.3.6),
+    ggforce (>= 0.3.4),
+    ggplot2 (>= 3.3.6),
     gridExtra (>= 2.3),
     grid (>= 4.1.3),
     gtable (>= 0.3.0),

diff --git a/NAMESPACE b/NAMESPACE
@@ -45,6 +45,7 @@ importFrom(dplyr,count)
 importFrom(dplyr,filter)
 importFrom(dplyr,group_by)
 importFrom(dplyr,pull)
+importFrom(dplyr,rename)
 importFrom(dplyr,select)
 importFrom(ggforce,gather_set_data)
 importFrom(ggforce,geom_parallel_sets)

diff --git a/R/filtering.R b/R/filtering.R
@@ -10,6 +10,19 @@
 #' @details This function will run various filtering parameters for NanoStringGeoMxSet datasets
 #' 
 #' @param object A NanoStringGeoMxSet dataset
+#' @param loq.cutoff The number of standard deviations above the negative probe 
+#' geometric mean to use as a cutoff for the limit of quantification
+#' @param loq.min The minimum value for the limit of quantification
+#' @param segment.gene.rate.cutoff A decimal for the minimum cutoff for the 
+#' genes detected in a given segment over the total number of genes in the 
+#' probe set
+#' @param study.gene.rate.cutoff = A decimal for the minimum cutoff for the 
+#' average amount a given gene is detected in all segments
+#' @param sankey.exclude.slide A toggle for including the slide name in the 
+#' Sankey Plot
+#' @param goi A list of genes of interest to evaluate for their study-wide 
+#' detection rate
+#' 
 #' @importFrom scales percent
 #' @importFrom Biobase pData
 #' @importFrom Biobase fData
@@ -18,12 +31,6 @@
 #' @export
 #' @return A list containing the ....
 
-# To call function, must have data = raw object, dsp.obj = QC demoData, 
-# loq.cutoff 2 is recommended,
-# loq.min 2 is recommend, 
-# segment.gene.rate.cutoff = remove segments with less than x% of the gene set detected; .05-.1 recommended,
-# study.gene.rate.cutoff = remove genes detected in less than x% of segments; .05-.2 recommended,
-# goi = goi (genes of interest). Must be a vector of genes (i.e c("PDCD1", "CD274")),
 filtering <- function(object, 
                       loq.cutoff = 2, 
                       loq.min = 2, 
@@ -135,32 +142,46 @@ filtering <- function(object,
     stop(paste0("Error: You have the wrong data class, must be NanoStringGeoMxSet" ))
   }
 
-  # Gather the data and plot in order: class, slide name, region, segment
   # gather_set_data creates x, id, y, and n fields within sankey.count.data
   # Establish the levels of the Sankey with or without the slide name
   if(sankey.exclude.slide == TRUE){
+    # Create a dataframe used to make the Sankey plot
     sankey.count.data <- gather_set_data(count.mat, 1:3)
-    sankey.count.data$x <-
-      factor(
-        sankey.count.data$x,
-        levels = c("class", "region", "segment")
-      )
+
+    # Define the annotations to use for the Sankey x axis labels
+    sankey.count.data$x[sankey.count.data$x == 1] <- "class"
+    sankey.count.data$x[sankey.count.data$x == 2] <- "region"
+    sankey.count.data$x[sankey.count.data$x == 3] <- "segment"
+
+    factor(
+      sankey.count.data$x,
+      levels = c("class", "region", "segment")
+    )
+
     # For position of Sankey 100 segment scale
     adjust.scale.pos = 1
   } else {
+    # Create a dataframe used to make the Sankey plot
     sankey.count.data <- gather_set_data(count.mat, 1:4)
-    sankey.count.data$x <-
-      factor(
-        sankey.count.data$x,
-        levels = c("class", "slide_name", "region", "segment")
-      )
+
+    # Define the annotations to use for the Sankey x axis labels
+    sankey.count.data$x[sankey.count.data$x == 1] <- "slide_name"
+    sankey.count.data$x[sankey.count.data$x == 2] <- "class"
+    sankey.count.data$x[sankey.count.data$x == 3] <- "region"
+    sankey.count.data$x[sankey.count.data$x == 4] <- "segment"
+
+    factor(
+      sankey.count.data$x,
+      levels = c("class", "slide_name", "region", "segment")
+    )
+
     # For position of Sankey 100 segment scale
     adjust.scale.pos = 0
   }
 
   # plot Sankey
   sankey.plot <- ggplot(sankey.count.data, aes(x, id = id, split = y, value = n)) +
-    geom_parallel_sets(aes(fill = region), alpha = 0.5, axis.width = 0.1) +
+    geom_parallel_sets(aes(fill = class), alpha = 0.5, axis.width = 0.1) +
     geom_parallel_sets_axes(axis.width = 0.2) +
     geom_parallel_sets_labels(color = "gray", size = 5, angle = 0) +
     theme_classic(base_size = 17) + 

diff --git a/R/spatial_deconvolution.R b/R/spatial_deconvolution.R
@@ -1,8 +1,8 @@
-#' @title Spatial Deconvolution
-#'
 #' Helper functions comes from
 #' https://bioconductor.org/packages/release/bioc/vignettes/SpatialDecon/inst/doc/SpatialDecon_vignette_NSCLC.html
-#'
+#' 
+#' @title Spatial Deconvolution
+#' 
 #' @description spatialDeconvolution estimate cell composition across DSP
 #'              samples from reference expression matrix
 #'
@@ -41,18 +41,6 @@
 #' @importFrom ComplexHeatmap pheatmap
 #'
 #' @export
-#' @example Do not run: spatialDeconvolution(object = NanostringGeomx,
-#'                                           expr.type = "q_norm",
-#'                                           prof.mtx = profile_matrix,
-#'                                           clust.rows = TRUE,
-#'                                           clust.cols = TRUE,
-#'                                           group.by = "none",
-#'                                           plot.fontsize = 5,
-#'                                           use.custom.prof.mtx = FALSE,
-#'                                           ref.mtx = reference_matrix,
-#'                                           ref.annot = reference_annotation,
-#'                                           cell.id.col = "CellID",
-#'                                           celltype.col = "LabeledCellType")
 #'
 #' @return A list dsp.data containing the results of spatial deconvolution,
 #' res$beta: matrix of estimated cell abundances
@@ -66,7 +54,7 @@
 
 
 spatialDeconvolution <- function(object,
-                                 expr.type,
+                                 expr.type = "q_norm",
                                  prof.mtx,
                                  clust.rows = TRUE,
                                  clust.cols = TRUE,
@@ -79,8 +67,8 @@ spatialDeconvolution <- function(object,
                                  min.genes = 10,
                                  ref.mtx,
                                  ref.annot,
-                                 cell.id.col,
-                                 celltype.col) {
+                                 cell.id.col = "CellID",
+                                 celltype.col = "LabeledCellType") {
 
   # Check for Parameter Misspecification Error(s)
   if (!expr.type %in% names(object@assayData)) {

diff --git a/R/study_design.R b/R/study_design.R
@@ -27,10 +27,14 @@
 #' phenoDataFile containing data about the experiment's meta-data.
 #' @param slide.name.col The name of the field that contains the slide names
 #' @param class.col The name of the field that contains the class annotation
-#' @param region.col The name of the field that contains the class annotation
-#' @param segment.col The name of the field that contains the class annotation
-#' 
-#'
+#' @param region.col The name of the field that contains the region annotation
+#' @param segment.col The name of the field that contains the segment name
+#' @param area.col The name of the field that contains the segment area 
+#' @param nuclei.col The name of the field that contains the nuclei number
+#' @param sankey.exclude.slide A toggle for including the slide name in the
+#'  Sankey plot
+#' @param segment.id.length The number of characters to use from each of the 
+#' annotation fields class, region, and segment to create the segment ID
 #'
 #' @importFrom GeomxTools readNanoStringGeoMxSet
 #' @importFrom knitr kable
@@ -52,8 +56,6 @@
 #' @export
 #' @return A list containing the NanoString Object and the Sankey plot.
 
-
-
 studyDesign <- function(dcc.files,
                         pkc.files,
                         pheno.data.file,
@@ -177,26 +179,39 @@ studyDesign <- function(dcc.files,
     rownames(count.mat) <- 1:nrow(count.mat)
   }
 
-
-  # Gather the data and plot in order: class, slide name, region, segment
   # gather_set_data creates x, id, y, and n fields within sankey.count.data
   # Establish the levels of the Sankey with or without the slide name
   if(sankey.exclude.slide == TRUE){
+    # Create a dataframe used to make the Sankey plot
     sankey.count.data <- gather_set_data(count.mat, 1:3)
-    sankey.count.data$x <-
-      factor(
-        sankey.count.data$x,
-        levels = c("class", "region", "segment")
-      )
+
+    # Define the annotations to use for the Sankey x axis labels
+    sankey.count.data$x[sankey.count.data$x == 1] <- "class"
+    sankey.count.data$x[sankey.count.data$x == 2] <- "region"
+    sankey.count.data$x[sankey.count.data$x == 3] <- "segment"
+
+    factor(
+      sankey.count.data$x,
+      levels = c("class", "region", "segment")
+    )
+
     # For position of Sankey 100 segment scale
     adjust.scale.pos = 1
   } else {
+    # Create a dataframe used to make the Sankey plot
     sankey.count.data <- gather_set_data(count.mat, 1:4)
-    sankey.count.data$x <-
-      factor(
-        sankey.count.data$x,
-        levels = c("class", "slide_name", "region", "segment")
-      )
+
+    # Define the annotations to use for the Sankey x axis labels
+    sankey.count.data$x[sankey.count.data$x == 1] <- "slide_name"
+    sankey.count.data$x[sankey.count.data$x == 2] <- "class"
+    sankey.count.data$x[sankey.count.data$x == 3] <- "region"
+    sankey.count.data$x[sankey.count.data$x == 4] <- "segment"
+
+    factor(
+      sankey.count.data$x,
+      levels = c("class", "slide_name", "region", "segment")
+    )
+
     # For position of Sankey 100 segment scale
     adjust.scale.pos = 0
   }
@@ -210,7 +225,7 @@ studyDesign <- function(dcc.files,
              split = y,
              value = n
            )) +
-    geom_parallel_sets(aes(fill = region), alpha = 0.5, axis.width = 0.1) +
+    geom_parallel_sets(aes(fill = class), alpha = 0.5, axis.width = 0.1) +
     geom_parallel_sets_axes(axis.width = 0.2) +
     geom_parallel_sets_labels(color = "gray",
                               size = 5,

diff --git a/R/violin_plot.R b/R/violin_plot.R
@@ -19,18 +19,13 @@
 #' @importFrom gridExtra arrangeGrob
 #'
 #' @export
-#' @example Do not run: violinPlot(object = NanostringGeomx, 
-#'                                 expr.type = "q_norm", 
-#'                                 genes = c("FOXP3","CD4"), 
-#'                                 group = "CellType", 
-#'                                 facet.by = "segment")
 #'
 #' @return an arranged grob of violin plots
 
 violinPlot <- function(object,
-                       expr.type,
-                       genes,
-                       group,
+                       expr.type = "q_norm",
+                       genes = c("FOXP3","CD4"),
+                       group = "CellType",
                        facet.by = "none") {
 
   # Check for Parameter Misspecification Error(s)

diff --git a/man/diffExpr.Rd b/man/diffExpr.Rd
diff --git a/man/filtering.Rd b/man/filtering.Rd
diff --git a/man/geomxNorm.Rd b/man/geomxNorm.Rd