Add prior count to CPM & add FDR order again

LUMC · Feb 26, 2021 · 0216e4d · 0216e4d
1 parent 85a3bfb
commit 0216e4d
Show file tree

Hide file tree

Showing 3 changed files with 15 additions and 18 deletions.
diff --git a/inst/src/markdown/analysisDESeq2.Rmd b/inst/src/markdown/analysisDESeq2.Rmd
@@ -114,7 +114,7 @@ dge <- DGEList(counts = assay(se),
 dge <- dge[rowSums(abs(dge$counts)) > 1,]
 
 tempDge <- dge
-tempDge$counts <- cpm(dge, log = TRUE)
+tempDge$counts <- cpm(dge, log = TRUE, prior.count = 1)
 countDistributionLinePlot(tempDge)
 
 dge <- DGEList(counts = assay(se), samples = colData(se))
@@ -137,7 +137,7 @@ analysis <- DESeqDataSet(se, design = design)
 analysis <- analysis[rowSums(abs(assay(analysis))) > 1,]
 
 # FILTER IF NESSECARY
-counts <- cpm(counts(analysis), log = TRUE)
+counts <- cpm(counts(analysis), log = TRUE, prior.count = 1)
 selectedFeatures <- rownames(analysis)[apply(counts, 1, function(v)
   sum(v >= cpm_value)) >= 1 / 4 * ncol(counts)]
 analysis <- analysis[selectedFeatures, ]
@@ -161,10 +161,8 @@ Extract the normalized data from the analysis results.
 # GET NORMALIZED COUNTS
 
 getSize <- estimateSizeFactors(analysis)
-normCounts <- cpm(data.frame(counts(getSize, normalized = TRUE)))
-normDge <- DGEList(counts = normCounts, samples = dge$samples)
-normDge$counts <- log2(normDge$counts)
-normDge$counts[is.infinite(normDge$counts)] <- 0
+normDge <- DGEList(counts = data.frame(counts(getSize, normalized = TRUE)), samples = dge$samples)
+normDge$counts <- cpm(normDge, log = TRUE, prior.count = 1)
 
 countDistributionLinePlot(normDge)
 samplePca2dPlot(normDge, design_base, "PC1", "PC2")
@@ -217,10 +215,6 @@ if (!is.null(data_annotation)) {
   deTab$Row.names <- NULL
 }
 
-# MAKE BOTH TABLES EQUAL
-deTab <- deTab[intersect(rownames(deTab), rownames(normDge$counts)), ]
-normDge$counts <- normDge$counts[intersect(rownames(deTab), rownames(normDge$counts)), ]
-
 #ORDER deTab TABLE
 deTab <- rename(deTab, "FDR" = "adj.P.Val")
 deOrder <- c("avgLog2CPM", "avgLog2FC", "P.Value", "FDR", "DE")
@@ -264,6 +258,7 @@ Results are saved, so they can be retrieved by the application.
 ```{r save}
 # SAVE ANALYSIS
 
+deTab <- deTab[order(deTab$FDR),]
 save(deTab, normDge, file = "analysis.RData")
 
 ```

diff --git a/inst/src/markdown/analysisEdgeR.Rmd b/inst/src/markdown/analysisEdgeR.Rmd
@@ -122,7 +122,7 @@ row.names(dge$genes) <- row.names(dge$counts)
 dge <- dge[rowSums(abs(dge$counts)) > 1,]
 
 tempDge <- dge
-tempDge$counts <- cpm(dge, log = TRUE)
+tempDge$counts <- cpm(dge, log = TRUE, prior.count = 1)
 countDistributionLinePlot(tempDge)
 
 ```
@@ -134,7 +134,7 @@ The raw data is filtered based on the input values provided.
 # GET SELECTED FEATURES
 
 edger <- calcNormFactors(dge, method = "TMM")
-counts <- cpm(edger, log = TRUE)
+counts <- cpm(edger, log = TRUE, prior.count = 1)
 selectedFeatures <- rownames(edger)[apply(counts, 1, function(v)
   sum(v >= cpm_value)) >= 1 / 4 * ncol(counts)]
 
@@ -157,7 +157,7 @@ The filtered data is normalized using TMM.
 normDge <- calcNormFactors(highExprDge, method = "TMM")
 
 tempDge <- normDge
-tempDge$counts <- cpm(normDge, log = TRUE)
+tempDge$counts <- cpm(normDge, log = TRUE, prior.count = 1)
 countDistributionLinePlot(tempDge)
 samplePca2dPlot(tempDge, design_base, "PC1", "PC2")
 
@@ -255,7 +255,8 @@ Results are saved, so they can be retrieved by the application.
 ```{r save}
 # SAVE ANALYSIS
 
-normDge$counts <- cpm(normDge, log = TRUE)
+deTab <- deTab[order(deTab$FDR),]
+normDge$counts <- cpm(normDge, log = TRUE, prior.count = 1)
 save(deTab, normDge, file = "analysis.RData")
 
 ```

diff --git a/inst/src/markdown/analysisLimma.Rmd b/inst/src/markdown/analysisLimma.Rmd
@@ -122,7 +122,7 @@ row.names(dge$genes) <- row.names(dge$counts)
 dge <- dge[rowSums(abs(dge$counts)) > 1,]
 
 tempDge <- dge
-tempDge$counts <- cpm(dge, log = TRUE)
+tempDge$counts <- cpm(dge, log = TRUE, prior.count = 1)
 countDistributionLinePlot(tempDge)
 
 ```
@@ -134,7 +134,7 @@ The raw data is filtered based on the input values provided.
 # GET SELECTED FEATURES
 
 limmaV <- calcNormFactors(dge, method = "TMM")
-counts <- cpm(limmaV, log = TRUE)
+counts <- cpm(limmaV, log = TRUE, prior.count = 1)
 selectedFeatures <- rownames(limmaV)[apply(counts, 1, function(v)
   sum(v >= cpm_value)) >= 1 / 4 * ncol(counts)]
 
@@ -157,7 +157,7 @@ The filtered data is normalized using TMM.
 normDge <- calcNormFactors(highExprDge, method = "TMM")
 
 tempDge <- normDge
-tempDge$counts <- cpm(normDge, log = TRUE)
+tempDge$counts <- cpm(normDge, log = TRUE, prior.count = 1)
 countDistributionLinePlot(tempDge)
 samplePca2dPlot(tempDge, design_base, "PC1", "PC2")
 
@@ -257,7 +257,8 @@ Results are saved, so they can be retrieved by the application.
 ```{r save}
 # SAVE ANALYSIS
 
-normDge$counts <- cpm(normDge, log = TRUE)
+deTab <- deTab[order(deTab$FDR),]
+normDge$counts <- cpm(normDge, log = TRUE, prior.count = 1)
 save(deTab, normDge, file = "analysis.RData")
 
 ```