Merge pull request #102 from NIDAP-Community/dev

Dev
NIDAP-Community · May 26, 2023 · c7230f2 · c7230f2
2 parents eb070e9 + 35698a9
commit c7230f2
Show file tree

Hide file tree

Showing 94 changed files with 3,078,541 additions and 660 deletions.
diff --git a/R/differential_expression_analysis.R b/R/differential_expression_analysis.R
@@ -62,7 +62,7 @@ diffExpr <- function(object,
                      group.col,
                      regions,
                      region.col,
-                     slide.col = "slide name",
+                     slide.col = "slide_name",
                      element = "q_norm",
                      multi.core = TRUE,
                      n.cores = 1,
@@ -75,16 +75,12 @@ diffExpr <- function(object,
   # Check the number of cores available for the current machine
   available.cores <- detectCores()
 
+  # Adjust the number of cores selected within the machine's range
   if (n.cores > available.cores) {
     print(paste0("The number of cores selected is greater than the number of available cores, reducing number of cores to maximum of ", available.cores))
     n.cores <- available.cores
   }
 
-  # Adjust the number of cores selected within the machine's range
-
-
-
-
   testClass <- testRegion <- Gene <- Subset <- NULL
 
   # convert test variables to factors after checking input
@@ -126,25 +122,20 @@ diffExpr <- function(object,
         setdiff(unique(Biobase::pData(object)[[region.col]]),
                 unique(levels(Biobase::pData(object)$testRegion)))
       regdiff <- paste0(regdiff, collapse = ", ")
-      message(
-        paste0(
+      cat(
+        sprintf(
           "At least one of the regions within the Region Column was not selected
-            and is excluded:\n",
-          regdiff,
-          "\n"
-        )
+            and is excluded: %s\n", regdiff)
       )
-    }
-    else if (param.na[1] == "testClass") {
+    } else if (param.na[1] == "testClass") {
       classdiff <-
         setdiff(unique(Biobase::pData(object)[[group.col]]),
                 unique(levels(Biobase::pData(object)$testClass)))
       classdiff <- paste0(classdiff, collapse = ", ")
-      message(
-        "At least one of the groups within the Group Column was not selected and
-          is excluded:\n",
-        classdiff,
-        "\n"
+      cat(
+        sprintf(
+          "At least one of the groups within the Group Column was not selected and
+            is excluded: %s\n", classdiff)
       )
     }
   }
@@ -172,7 +163,7 @@ diffExpr <- function(object,
   options(digits = 3)
 
   if (analysis.type == "Within Groups") {
-    cat("Running Within Group Analysis between Regions")
+    cat("Running Within Group Analysis between Regions\n")
     if (reg.length < 2) {
       if ("testRegion" %in% param.na) {
         stop("Cannot run Within Group Analysis with 1 Region.\n")
@@ -185,17 +176,25 @@ diffExpr <- function(object,
       ind[is.na(ind)] <- FALSE
       ind2 <- Biobase::pData(object)$testRegion %in% regions
       ind2[is.na(ind2)] <- FALSE
-      mixed.out <- mixedModelDE(
-        object[, ind & ind2],
-        elt = element,
-        modelFormula = ~ testRegion + (1 + testRegion |
-                                         slide),
-        groupVar = "testRegion",
-        multiCore = multi.core,
-        nCores = n.cores,
-        pAdjust = p.adjust,
-        pairwise = pairwise
-      )
+      #Check to see if there are 2 regions for comparison:
+      object.sub <- object[, ind & ind2]
+      r <- length(unique(pData(object.sub)$testRegion))
+      cat(sprintf("Number of regions in group %s: %s \n", status,r))
+      if(r < 2){
+        stop("There are not enough sample regions to do the analysis")
+      } else {
+        mixed.out <- mixedModelDE(
+          object.sub,
+          elt = element,
+          modelFormula = ~ testRegion + (1 + testRegion |
+                                           slide),
+          groupVar = "testRegion",
+          multiCore = multi.core,
+          nCores = n.cores,
+          pAdjust = p.adjust,
+          pairwise = pairwise
+        )
+      }
 
       # format results as data.frame
       test.results <- do.call(rbind, mixed.out["lsmeans", ])
@@ -218,7 +217,7 @@ diffExpr <- function(object,
       results <- rbind(results, test.results)
     }
   } else {
-    cat("Running Between Group Analysis for Regions")
+    cat("Running Between Group Analysis for Regions\n")
     if (grp.length < 2) {
       if ("testClass" %in% param.na) {
         stop("Cannot run Between Group Analysis with 1 Group.\n")
@@ -231,17 +230,25 @@ diffExpr <- function(object,
       ind[is.na(ind)] <- FALSE
       ind2 <- Biobase::pData(object)$testClass %in% groups
       ind2[is.na(ind2)] <- FALSE
-      mixed.out <-
-        mixedModelDE(
-          object[, ind & ind2],
-          elt = element,
-          modelFormula = ~ testClass + (1 | slide),
-          groupVar = "testClass",
-          multiCore = multi.core,
-          nCores = n.cores,
-          pAdjust = p.adjust,
-          pairwise = pairwise
-        )
+      object.sub <- object[, ind & ind2]
+      #Check to see if there are 2 groups for comparison:
+      r <- length(unique(pData(object.sub)$testClass))
+      cat(sprintf("Number of groups in region %s: %s \n", region, r))
+      if(r < 2){
+        stop("There are not enough sample groups to do the analysis")
+      } else {
+        mixed.out <-
+          mixedModelDE(
+            object.sub,
+            elt = element,
+            modelFormula = ~ testClass + (1 | slide),
+            groupVar = "testClass",
+            multiCore = multi.core,
+            nCores = n.cores,
+            pAdjust = p.adjust,
+            pairwise = pairwise
+          )
+      }
 
       # format results as data.frame
       test.results <- do.call(rbind, mixed.out["lsmeans", ])

diff --git a/R/filtering.R b/R/filtering.R
@@ -115,15 +115,15 @@ filtering <- function(object, loq.cutoff, loq.min, cut.segment, goi) {
 
   # select the annotations we want to show, use `` to surround column names with
   # spaces or special symbols
-  count.mat <- count(pData(object), `slide name`, class, region, segment)
+  count.mat <- count(pData(object), `slide_name`, class, region, segment)
   if(class(object)[1] != "NanoStringGeoMxSet"){
     stop(paste0("Error: You have the wrong data class, must be NanoStringGeoMxSet" ))
   }
-  # simplify the slide names
-  count.mat$`slide name` <- gsub("disease", "d", gsub("normal", "n", count.mat$`slide name`))
-  # gather the data and plot in order: class, slide name, region, segment
+  # simplify the slide_names
+  count.mat$`slide_name` <- gsub("disease", "d", gsub("normal", "n", count.mat$`slide_name`))
+  # gather the data and plot in order: class, slide_name, region, segment
   test.gr <- gather_set_data(count.mat, 1:4)
-  test.gr$x <-factor(test.gr$x, levels = c("class", "slide name", "region", "segment"))
+  test.gr$x <-factor(test.gr$x, levels = c("class", "slide_name", "region", "segment"))
   # plot Sankey
   sankey.plot<- ggplot(test.gr, aes(x, id = id, split = y, value = n)) +
     geom_parallel_sets(aes(fill = region), alpha = 0.5, axis.width = 0.1) +
@@ -192,4 +192,4 @@ filtering <- function(object, loq.cutoff, loq.min, cut.segment, goi) {
   goi <- goi[goi %in% rownames(object)]
 
   return(list("stacked.bar.plot" = stacked.bar.plot, "tab" = tab, "sankey.plot" = sankey.plot, "genes.detected.plot" = genes.detected.plot, "object" = object))
-}
+}
diff --git a/R/normalization.R b/R/normalization.R
@@ -10,7 +10,7 @@
 #'          NanostringGeoMxSet, outputs a normalized NanostringGeoMxSet
 #'          
 #' @param object A NanoStringGeoMxSet dataset
-#' @param norm A vector with options of c(quant or neg)
+#' @param norm A vector with options of c(q3 or neg)
 #' @importFrom ggplot2 ggplot
 #' @importFrom ggplot2 geom_histogram
 #' @importFrom ggplot2 scale_x_continuous
@@ -32,7 +32,7 @@
 #' @return A list containing the ggplot grid, a boxplot, an normalized dataframe.
 
 
-# To call function, must have object = object; norm = c(quant or neg)
+# To call function, must have object = object; norm = c(q3 or neg)
 geomxNorm <- function(object, norm) {
 
   if(class(object)[1] != "NanoStringGeoMxSet"){
@@ -87,33 +87,50 @@ geomxNorm <- function(object, norm) {
                        rel_widths = c(0.43,0.57))
   multi.plot <- plot_grid(plt1, btm.row, ncol = 1, labels = c("A", ""))
 
-  if(norm == "quant"){
+  if(norm == "q3"){
     # Q3 norm (75th percentile) for WTA/CTA  with or without custom spike-ins
     object <- normalize(object,
                         norm_method = "quant", 
                         desiredQuantile = .75,
                         toElt = "q_norm")
 
-    transform1<- assayDataElement(object[,1:10], elt = "q_norm")
-    transform2<- as.data.frame(transform1)
-    transform3<- melt(transform2)
-    ggboxplot <- ggplot(transform3, aes(variable, value)) +
+    # The raw counts boxplot
+    transform1.raw<- exprs(object[,1:10])
+    transform2.raw<- as.data.frame(transform1.raw)
+    transform3.raw<- melt(transform2.raw)
+    ggboxplot.raw <- ggplot(transform3.raw, aes(variable, value)) +
+      stat_boxplot(geom = "errorbar") +
+      geom_boxplot(fill="#2CA02C") +
+      scale_y_log10() +
+      xlab("Segment") + 
+      ylab("Counts, Raw") +
+      ggtitle("Q3 Norm Counts") +
+      scale_x_discrete(labels=c(1:10))
+
+    # The normalized counts boxplot
+    transform1.norm<- assayDataElement(object[,1:10], elt = "q_norm")
+    transform2.norm<- as.data.frame(transform1.norm)
+    transform3.norm<- melt(transform2.norm)
+    ggboxplot.norm <- ggplot(transform3.norm, aes(variable, value)) +
       stat_boxplot(geom = "errorbar") +
       geom_boxplot(fill="#2CA02C") +
       scale_y_log10() +
       xlab("Segment") + 
-      ylab("Counts, Quant. Normailzed") +
+      ylab("Counts, Q3 Normalized") +
       ggtitle("Quant Norm Counts") +
       scale_x_discrete(labels=c(1:10))
   }
-  if(norm == "Quant"){
-    stop(paste0("Error: Quant needs to be quant" ))
+  if(norm == "Q3"){
+    stop(paste0("Error: Q3 needs to be q3" ))
   }
   if(norm == "quantile"){
-    stop(paste0("Error: quantile needs to be quant" ))
+    stop(paste0("Error: quantile needs to be q3" ))
   }
   if(norm == "Quantile"){
-    stop(paste0("Error: Quantile needs to be quant" ))
+    stop(paste0("Error: Quantile needs to be q3" ))
+  }
+  if(norm == "quant"){
+    stop(paste0("Error: quant needs to be q3" ))
   }
 
   if(norm == "neg"){
@@ -123,15 +140,29 @@ geomxNorm <- function(object, norm) {
                         fromElt = "exprs",
                         toElt = "neg_norm")
 
-    transform1<- assayDataElement(object[,1:10], elt = "neg_norm")
-    transform2<- as.data.frame(transform1)
-    transform3<- melt(transform2)
-    ggboxplot <- ggplot(transform3, aes(variable, value)) +
+    # The raw counts boxplot
+    transform1.raw<- exprs(object[,1:10])
+    transform2.raw<- as.data.frame(transform1.raw)
+    transform3.raw<- melt(transform2.raw)
+    ggboxplot.raw <- ggplot(transform3.raw, aes(variable, value)) +
+      stat_boxplot(geom = "errorbar") +
+      geom_boxplot(fill="#FF7F0E") +
+      scale_y_log10() +
+      xlab("Segment") + 
+      ylab("Counts, Raw") +
+      ggtitle("Neg Norm Counts") +
+      scale_x_discrete(labels=c(1:10))
+
+    # The normalized counts boxplot
+    transform1.norm<- assayDataElement(object[,1:10], elt = "neg_norm")
+    transform2.norm<- as.data.frame(transform1.norm)
+    transform3.norm<- melt(transform2.norm)
+    ggboxplot.norm <- ggplot(transform3.norm, aes(variable, value)) +
       stat_boxplot(geom = "errorbar") +
       geom_boxplot(fill="#FF7F0E") +
       scale_y_log10() +
       xlab("Segment") + 
-      ylab("Counts, Neg. Normailzed") +
+      ylab("Counts, Neg. Normalized") +
       ggtitle("Neg Norm Counts") +
       scale_x_discrete(labels=c(1:10))
   }
@@ -145,5 +176,5 @@ geomxNorm <- function(object, norm) {
     stop(paste0("Error: Negative needs to be neg" ))
   }
 
-  return(list("multi.plot" = multi.plot, "boxplot" = ggboxplot, "object" = object))
+  return(list("multi.plot" = multi.plot, "boxplot.raw" = ggboxplot.raw, "boxplot.norm" = ggboxplot.norm, "object" = object))
 }