diff --git a/R_scripts/General_function.R b/R_scripts/General_function.R
new file mode 100644
index 0000000..c988562
--- /dev/null
+++ b/R_scripts/General_function.R
@@ -0,0 +1,222 @@
+#################################################
+# Define functions used accross several scripts #
+#################################################
+
+#############################
+# List of required packages #
+#############################
+
+# Create a function to load or install (then load) the required packages
+loadpackage <- function(package) {
+  if (suppressWarnings(require(package=deparse(substitute(package)), character.only=TRUE, quietly=TRUE))) {
+    print(paste(deparse(substitute(package)), " is loaded correctly!", sep=""))
+  }
+  else {
+    print(paste("Trying to install ", deparse(substitute(package)), sep=""))
+    source(file="http://bioconductor.org/biocLite.R", verbose=FALSE)
+    biocLite(pkgs=deparse(substitute(package)), suppressUpdates=TRUE)
+    if(require(package=deparse(substitute(package)), character.only=TRUE, quietly=TRUE)) {
+      print(paste(deparse(substitute(package)), " is correctly installed and loaded!", sep=""))
+    }
+    else {
+      stop(paste('"', "Could not install ", deparse(substitute(package)), '"', sep=""))
+    }
+  }
+  print(paste(deparse(substitute(package)), " version: ", packageVersion(pkg=deparse(substitute(package))), sep=""))
+}
+
+################################
+# Differential expression call #
+################################
+
+# Create a function to perform the differential expression within edgeR according to provided parameters
+diff_expr_edgeR <- function(treat1, treat2, data, design, group, adjpvalue, method, LRTdata, DEdata, DEfile, Smearfile) {
+  if(length(grep(pattern=treat1, x=colnames(design)))==1 && length(grep(pattern=treat2, x=colnames(design)))==1) {
+    contr <- rep(x=0, times=length(colnames(design)))
+    contr[c(grep(pattern=treat1, x=colnames(design)), grep(pattern=treat2, x=colnames(design)))] <- c(1, -1)
+    lrt <- glmLRT(glmfit=data, contrast=contr)
+  }
+  else if(length(grep(pattern=treat1, x=colnames(design)))==0 && length(grep(pattern=treat1, x=levels(group)))==1) {
+    contr <- grep(pattern=treat2, x=colnames(design))
+    lrt <- glmLRT(glmfit=data, coef=contr)
+  }
+  else if(length(grep(pattern=treat2, x=colnames(design)))==0 && length(grep(pattern=treat2, x=levels(group)))==1) {
+    contr <- grep(pattern=treat1, x=colnames(design))
+    lrt <- glmLRT(glmfit=data, coef=contr)
+  }
+  else {
+    stop("Error: Check that the treatments provided are in group table!")
+  }
+  de <- topTags(object=lrt, n="inf", adjust.method=method)
+  print("Names of the edgeR likeli-hood ratio test dataframe:")
+  print(names(lrt))
+  print("Comparison perfomed in the edgeR likeli-hood ratio test:")
+  print(lrt$comparison)
+  print("Heading of the edgeR likeli-hood ratio test dataframe:")
+  print(head(lrt$table))
+  print("Summary of the number of edgeR DEG:")
+  print(summary(decideTestsDGE(lrt, p.value=adjpvalue)))
+  print("Names of the edgeR multiple correction test dataframe:")
+  print(names(de))
+  print("Heading of the edgeR multiple correction test dataframe:")
+  print(head(de$table))
+  if(length(grep(pattern="antisense", x=DEfile))==1) {
+    write.table(x=de$table[,c("sense_ensembl_gene_id", "external_gene_id","description","logFC", "logCPM", "LR", "PValue", "FDR")], file=paste(DEfile, "txt", sep="."), sep="\t", quote=FALSE, row.names=TRUE, col.names=TRUE)
+  }
+  else if(length(grep(pattern="novel", x=DEfile))==1) {
+    write.table(x=de$table[,c("Hsapiens_ensembl_gene_id", "external_gene_id","description","logFC", "logCPM", "LR", "PValue", "FDR")], file=paste(DEfile, "txt", sep="."), sep="\t", quote=FALSE, row.names=TRUE, col.names=TRUE)
+  }
+  else {
+    write.table(x=de$table[,c("external_gene_id","description","logFC", "logCPM", "LR", "PValue", "FDR")], file=paste(DEfile, "txt", sep="."), sep="\t", quote=FALSE, row.names=TRUE, col.names=TRUE)
+  }
+  png(filename=paste(Smearfile, "png", sep="."), width=1366, height=768, units="px")
+  plotSmear(object=lrt, de.tags=(rownames(lrt$table)[as.logical(decideTestsDGE(lrt, p.value = 0.05))]))
+  abline(h=c(-1, 1), col="blue")
+  dev.off()
+  assign(x=LRTdata, value=lrt, envir=.GlobalEnv)
+  assign(x=DEdata, value=de, envir=.GlobalEnv)
+}
+
+################################
+# Significance label for graph #
+################################
+
+# Create a function to add significance label
+sig_label <- function(arg1, arg2) {
+  Significance_label <- vector()
+  for (j in 1:length(arg1[,arg2])) {
+    if (arg1[j,arg2] < 0.001) {
+      Significance_label <- c(Significance_label, "***")
+    }
+    else if (arg1[j,arg2] < 0.01) {
+      Significance_label <- c(Significance_label, "**")
+    }
+    else if (arg1[j,arg2] < 0.05) {
+      Significance_label <- c(Significance_label, "*")
+    }
+    else {
+      Significance_label <- c(Significance_label, "")
+    }
+  }
+  arg1 <- cbind(arg1, Significance_label)
+  return(arg1)
+}
+
+##########################################
+# Differential expression call for miRNA #
+##########################################
+
+# Create a function to perform differential expression of miRNA genes
+miR.DE <- function(..., data, design, group, adjpvalue, method, lrtdata, dedata,
+                   smearfile) {
+  comparison <- matrix(data = unlist(list(...)), ncol = 2, byrow = TRUE)
+  contr <- rep(x = 0, times = length(colnames(design)))
+  for (i in 1:length(comparison[, 1])) {
+    if (length(grep(pattern = paste('^', comparison[i, 1], '$', sep = ""),
+                    x = colnames(design), perl = TRUE)) == 1) {
+      contr[grep(pattern = paste('^', comparison[i, 1], '$', sep = ""),
+                 x = colnames(design), perl = TRUE)] <-
+        as.numeric(comparison[i, 2])
+    }
+    else if (length(grep(pattern = paste('^', comparison[i, 1], '$', sep = ""),
+                         x = colnames(design), perl = TRUE)) == 0 && length(
+                           grep(pattern = paste('^', comparison[i, 1], '$',
+                                                sep = ""), x = levels(group),
+                                perl = TRUE)) == 1) {
+      print(x = "Intersect value used!")
+    }
+    else {
+      stop("Error: Check that the comparison values are in group table!")
+    }
+  }
+  lrt <- glmLRT(glmfit = data, contrast = contr)
+  print("Names in the edgeR likeli-hood ratio test dataframe:")
+  print(names(lrt))
+  print("Comparison perfomed in the edgeR likeli-hood ratio test:")
+  print(lrt$comparison)
+  print(x = contr)
+  print("Heading of the edgeR likeli-hood ratio test table:")
+  print(head(lrt$table))
+  print("Summary of the number of DE gene:")
+  print(summary(decideTestsDGE(object = lrt, p.value = adjpvalue,
+                               adjust.method = "BH")))
+  de <- topTags(object = lrt, n = "inf", adjust.method = method)
+  print("Names in the edgeR multiple correction test dataframe:")
+  print(names(de))
+  print("Heading of the edgeR multiple correction test table:")
+  print(head(de$table))
+  png(filename = paste(smearfile, "png", sep="."), width = 1366, height = 768,
+      units = "px")
+  plotSmear(object = lrt, de.tags = (rownames(lrt$table)[as.logical(
+    decideTestsDGE(object = lrt, p.value = 0.05, adjust.method = "BH"))]))
+  abline(h = c(-0.5, 0.5), col = "blue")
+  dev.off()
+  assign(x = lrtdata, value = lrt, envir = .GlobalEnv)
+  assign(x = dedata, value = de, envir = .GlobalEnv)
+}
+
+############################################################
+# Merge differential expression tables obtained from edgeR #
+############################################################
+
+# Create a function to merge the differential expression tables
+# obtained in edgeR
+DEtable.merge <- function(..., output, pattern) {
+  var <- c(...)
+  suffix <- gsub(pattern = pattern, replacement = '_', x = var, perl = TRUE)
+  for (i in 1:length(var)) {
+    tomerge <- eval(parse(text = var[i]))$table
+    if (i == 1) {
+      data <- tomerge[,]
+      colnames(data)[(ncol(data) - 4):ncol(data)] <- c(gsub(
+        pattern = "$", replacement = suffix[i], x = colnames(data)[(
+          ncol(data) - 4):ncol(data)], perl = TRUE))
+    }
+    else {
+      data <- merge(x = data, y = tomerge[, (ncol(tomerge) - 4):ncol(tomerge)],
+                    by = "row.names")
+      rownames(data) <- data[, "Row.names"]
+      data <- data[, -1]
+      colnames(data)[(ncol(data) - 4):ncol(data)] <- c(gsub(
+        pattern = "$", replacement = suffix[i], x = colnames(data)[(
+          ncol(data) - 4):ncol(data)], perl = TRUE))
+    }
+  }
+  data <- cbind(rownames(data), data)
+  colnames(data)[1] <- "gene_id"
+  assign(x = output, value = data, envir = .GlobalEnv)
+}
+
+############################################################
+# Merge differential expression tables obtained from DESeq #
+############################################################
+
+# Create a function to merge the differential expression tables
+# obtained in DESeq
+DESeq.merge <- function(..., output, pattern) {
+  var <- c(...)
+  suffix <- gsub(pattern = pattern, replacement = '_', x = var, perl = TRUE)
+  for (i in 1:length(var)) {
+    tomerge <- as.data.frame(eval(parse(text = var[i])))
+    if (i == 1) {
+      data <- tomerge[,]
+      colnames(data)<- c(gsub(pattern = "$", replacement = suffix[i],
+                              x = colnames(data), perl = TRUE))
+    }
+    else {
+      data <- merge(x = data, y = tomerge, by = "row.names")
+      rownames(data) <- data[, "Row.names"]
+      data <- data[, -1]
+      colnames(data)[(ncol(data) - 5):ncol(data)] <- c(gsub(
+        pattern = "$", replacement = suffix[i], x = colnames(data)[(
+          ncol(data) - 5):ncol(data)], perl = TRUE))
+    }
+  }
+  data <- cbind(rownames(data), data)
+  colnames(data)[1] <- "gene_id"
+  assign(x = output, value = data, envir = .GlobalEnv)
+}
+
+#######
+# END #
+#######
\ No newline at end of file
diff --git a/R_scripts/miRNAseq_edgeR_pipeline.R b/R_scripts/miRNAseq_edgeR_pipeline.R
new file mode 100644
index 0000000..af6660a
--- /dev/null
+++ b/R_scripts/miRNAseq_edgeR_pipeline.R
@@ -0,0 +1,1503 @@
+#############################################################
+#############################################################
+##                                                         ##
+##  miRNA-seq data analysis of sense counts (paired data)  ##
+##                                                         ##
+#############################################################
+#############################################################
+
+# Analysis of 10 animals infected over a 15 weeks time course for which 
+# miRNA-seq libraries where prepared from serum samples
+
+#############################
+# List of required packages #
+#############################
+
+# Source the common functions used across this script
+source(file = paste("C:/Users/nnalpas/Dropbox/Home_work_sync/Work", 
+                     "/Bioinformatics/R/General_function.R", sep = ""))
+source(file = paste("C:/Users/Nicolas Nalpas/Dropbox/Home_work_sync/Work", 
+                    "/Bioinformatics/R/General_function.R", sep = ""))
+
+# Load the required packages
+loadpackage(package = edgeR)
+loadpackage(package = MASS)
+loadpackage(package = ggplot2)
+loadpackage(package = PerformanceAnalytics)
+loadpackage(package = VennDiagram)
+loadpackage(package = reshape2)
+
+#########################################################################
+# Analysis of counts data obtained via Novoalign-featureCounts pipeline #
+#########################################################################
+
+################################################
+# Read in and concatenate input files within R #
+################################################
+
+# Reads and merges a set of files containing counts
+Count <- readDGE(files = list.files(
+                 path = paste("C:/Users/nnalpas/Dropbox/Home_work_sync/Work/",
+                              "TIDA/miRNA-seq/Results/Counts/Novo-feature/", 
+                              "mature_miRNA", sep = ""), pattern = "6"),
+                 path = paste("C:/Users/nnalpas/Dropbox/Home_work_sync/Work/",
+                              "TIDA/miRNA-seq/Results/Counts/Novo-feature/", 
+                              "mature_miRNA", sep = ""), columns = c(1,7), 
+                 skip = 1, header = TRUE)
+
+# Check the readDGE variable output
+names(Count)
+Count$samples
+head(Count$counts)
+
+############################################################
+# Gene annotation using information obtained from GTF file #
+############################################################
+
+# Read in the annotation information
+miRNA.info <- read.table(file = "../miRNA_Btaurus.txt", header = TRUE,
+                        sep = "\t", quote = "")
+
+# Determine which miRNA have identical mature sequence
+miRNA.duplicate <- aggregate(gene_id ~ sequence, FUN = "as.vector",
+                             data = miRNA.info, na.action = "as.vector")
+
+# Combine new info with miRNA annotation
+miRNA.info <- merge(x = miRNA.info, y = miRNA.duplicate, by.x = "sequence",
+                   by.y = "sequence", all = TRUE)
+miRNA.info <- cbind(miRNA.info[, 2:7], miRNA.info[, 1], 
+                    miRNA.info[, 8:ncol(miRNA.info)])
+colnames(miRNA.info)[c(1,7,12)] <- c("gene_id", "sequence",
+                                     "identical_sequence")
+head(miRNA.info)
+dim(miRNA.info)
+
+# Merge the annotation information with the count table
+Count <- merge(x = miRNA.info, y = Count$counts, 
+                         by.x = "gene_id", by.y = "row.names", all = TRUE)
+head(Count)
+
+# Check the merged matrix in terms of size and content
+dim(Count)
+table(duplicated(Count[, "gene_id"]))
+length(unique(Count[, "gene_id"], incomparables = FALSE))
+
+# Ouptut samples data
+write.matrix(x = Count, file = "Novo-feature_counts.txt",  sep = "\t")
+
+###############################
+# Create groups and a DGElist #
+###############################
+
+# Create a target matrix and a experimental group vector and animal block
+target <- colnames(Count[, (ncol(Count)-69):ncol(Count)])
+target <- cbind(target, matrix(data = unlist(strsplit(x = target, split = "_",
+                                                      fixed = TRUE)),
+                               nrow = 70, ncol = 2, byrow = TRUE))
+colnames(target) = c("sample", "animal", "time_point")
+group <- factor(x = target[, "time_point"])
+animal <- factor(x = target[, "animal"])
+
+# Create a DGElist containing the group information
+dgelist <- DGEList(counts = Count[, (ncol(Count)-69):ncol(Count)], 
+                         lib.size = NULL, norm.factors = NULL, group = group, 
+                         genes = Count[, 1:(ncol(Count)-70)], 
+                         remove.zeros = FALSE)
+rownames(dgelist$counts) <- rownames(
+  dgelist$genes) <- dgelist$genes[, "gene_id"]
+dgelist$genes["gene_id"] <- NULL
+names(dgelist)
+head(dgelist$samples)
+head(dgelist$counts)
+head(dgelist$genes)
+
+# Ouptut samples data
+write.table(x = dgelist$samples, file = "Novo-feature_samples.txt", 
+            sep = "\t", quote = FALSE, row.names = TRUE, col.names = TRUE)
+
+###########################################################
+# Quality check of libraries by plotting density of count #
+###########################################################
+
+# Log2 transform the count data for better visualization
+count.log2 <- log(x = (dgelist$counts[,]+1), base = 2)
+
+# Plot density of count for all libraries
+png(filename = "Novo-feature_Dens_all_miRNA.png", width = 1500, height = 1300,
+    units = "px")
+plot(x = density(count.log2[, 1]), main = "Density plot of count per gene", 
+     lty =  1, xlab = "Log2 of count per gene", ylab = "Density", col = 1, 
+     xlim = c(-0.5, 6), ylim = c(0.0, 0.8), lwd = 2 , cex.axis = 1.5, 
+     cex.lab = 1.5, cex.main = 2)
+for (i in 2:ncol(count.log2)) {
+  lines(x = density(count.log2[, i]), lty = 1, col = i)
+}
+dev.off()
+
+#####################################
+# Filtering of lowly expressed tags #
+#####################################
+
+# Identify genes with zero counts across all samples
+dim(dgelist[rowSums(dgelist$counts) == 0, ])
+head(dgelist[rowSums(dgelist$counts) == 0, ])
+
+# Filter lowly expressed tags, retaining only tags with at least 50 counts per 
+# million in 10 or more libraries (10 libraries correspond to one time point)
+dgelist.filt <- dgelist[rowSums(cpm(dgelist$counts) > 50) >= 10, ]
+dim(dgelist.filt$counts)
+
+# Compute the library size
+dgelist.filt$samples$lib.size <- colSums(dgelist.filt$counts)
+head(dgelist.filt$samples)
+head(dgelist$samples)
+
+###########################################################################
+# Quality check of libraries by plotting density of count after filtering #
+###########################################################################
+
+# Log2 transform the count data for better visualization
+count.log2 <- log(x = (dgelist.filt$counts[,]+1), base = 2)
+
+# Plot density of count for all libraries
+png(filename = "Novo-feature_Dens_filt_miRNA.png", width = 1366, height = 768,
+    units = "px")
+plot(x = density(count.log2[, 1]), main = "Density plot of count per gene", 
+     lty =  1, xlab = "Log2 of count per gene", ylab = "Density", col = 1, 
+     xlim = c(-0.5, 15), ylim = c(0.0, 0.2), lwd = 2 , cex.axis = 1.5, 
+     cex.lab = 1.5, cex.main = 2)
+for (i in 2:ncol(count.log2)) {
+  lines(x = density(count.log2[, i]), lty = 1, col = i)
+}
+dev.off()
+
+############################################
+# Histogram of CPM per expression category #
+############################################
+
+# Prepare the histogram picture output
+png(filename = "Novo-feature_Hist_miRNA.png", width = 1500, height = 1500,
+    units = "px")
+par(mfrow = c(2,2), cex = 3, cex.axis = 0.7)
+
+# Plot histogram of number of gene per CPM category for all microRNA
+hist_plot <- hist(x = rowMeans(x = cpm(dgelist$counts), na.rm = FALSE), 
+                  breaks = c(
+                    0, 0.01, 0.1, 1, 10, 100, 1000, 10000, 100000, 1000000), 
+                  plot = FALSE)
+plot(x = hist_plot$counts, log = "x", type = "h", axes = FALSE, 
+     xlab = "Average CPM", ylab = "Number of gene", lwd = 10, 
+     col = "lightblue", main = "All miRNA")
+axis(side = 1, at = c(1:10), labels = c(
+  "0", "0.01", "0.1", "1", "10", "100", "1000", "10000", "100000", "1000000"))
+axis(side = 2)
+
+# Plot histogram of number of gene per CPM category for microRNA with 
+# at least one count
+hist_plot <- hist(x = rowMeans(x = cpm(dgelist$counts[rowSums(
+  dgelist$counts) > 0, ]), na.rm = FALSE), breaks = c(
+    0, 0.01, 0.1, 1, 10, 100, 1000, 10000, 100000, 1000000), plot = FALSE)
+plot(hist_plot$counts, log = "x", type = "h", axes = FALSE, 
+     xlab = "Average CPM", ylab = "Number of gene", lwd = 10, 
+     col = "lightgreen", main = "Min one count miRNA")
+axis(side = 1, at = c(1:10), labels = c(
+  "0", "0.01", "0.1", "1", "10", "100", "1000", "10000", "100000", "1000000"))
+axis(side = 2)
+
+# Plot histogram of number of gene per CPM category for microRNA 
+# filtered for low expression
+hist_plot <- hist(x = rowMeans(x = cpm(dgelist.filt$counts), na.rm = FALSE), 
+                  breaks = c(
+                    0, 0.1, 1, 10, 100, 1000, 10000, 100000, 1000000), 
+                  plot = FALSE)
+plot(hist_plot$counts, log = "x", type = "h", axes = FALSE, 
+     xlab = "Average CPM", ylab = "Number of gene", lwd = 10, 
+     col = "lightcoral", main = "Low expression filtered miRNA")
+axis(side = 1, at = c(1:10), labels = c(
+  "0", "0.01", "0.1", "1", "10", "100", "1000", "10000", "100000", "1000000"))
+axis(side = 2)
+
+# Close graphic device
+dev.off()
+
+#################################################
+# Gene expression correlation between libraries #
+#################################################
+
+# Note that CPM values are not required with the use of Spearman rank test
+
+# Perform gene expression correlation between libraries at pre 2 weeks to 
+# identify outlier library
+png(filename = "Novo-feature_Pre2_cor.png", width = 1500, height = 1500,
+    units = "px")
+chart.Correlation(R = log(x = (dgelist.filt$counts[, grep(
+  pattern = "_pre2", x = colnames(dgelist.filt$counts))] + 1), base = 2), 
+  histogram = TRUE, method = "spearman", 
+  main = "Pre 2 weeks time point expression correlation")
+dev.off()
+
+# Perform gene expression correlation between libraries at pre 1 weeks to 
+# identify outlier library
+png(filename = "Novo-feature_Pre1_cor.png", width = 1500, height = 1500,
+    units = "px")
+chart.Correlation(R = log(x = (dgelist.filt$counts[, grep(
+  pattern = "_pre1", x = colnames(dgelist.filt$counts))] + 1), base = 2), 
+  histogram = TRUE, method = "spearman", 
+  main = "Pre 1 weeks time point expression correlation")
+dev.off()
+
+# Perform gene expression correlation between libraries at post 1 weeks to 
+# identify outlier library
+png(filename = "Novo-feature_Post1_cor.png", width = 1500, height = 1500,
+    units = "px")
+chart.Correlation(R = log(x = (dgelist.filt$counts[, grep(
+  pattern = "_1$", x = colnames(dgelist.filt$counts), perl = TRUE)] + 1),
+  base = 2), histogram = TRUE, method = "spearman", 
+  main = "Post 1 weeks time point expression correlation")
+dev.off()
+
+# Perform gene expression correlation between libraries at post 2 weeks to 
+# identify outlier library
+png(filename = "Novo-feature_Post2_cor.png", width = 1500, height = 1500,
+    units = "px")
+chart.Correlation(R = log(x = (dgelist.filt$counts[, grep(
+  pattern = "_2", x = colnames(dgelist.filt$counts))] + 1), base = 2), 
+  histogram = TRUE, method = "spearman", 
+  main = "Post 2 weeks time point expression correlation")
+dev.off()
+
+# Perform gene expression correlation between libraries at post 6 weeks to 
+# identify outlier library
+png(filename = "Novo-feature_Post6_cor.png", width = 1500, height = 1500,
+    units = "px")
+chart.Correlation(R = log(x = (dgelist.filt$counts[, grep(
+  pattern = "_6", x = colnames(dgelist.filt$counts))] + 1), base = 2), 
+  histogram = TRUE, method = "spearman", 
+  main = "Post 6 weeks time point expression correlation")
+dev.off()
+
+# Perform gene expression correlation between libraries at post 10 weeks to 
+# identify outlier library
+png(filename = "Novo-feature_Post10_cor.png", width = 1500, height = 1500,
+    units = "px")
+chart.Correlation(R = log(x = (dgelist.filt$counts[, grep(
+  pattern = "_10", x = colnames(dgelist.filt$counts))] + 1), base = 2), 
+  histogram = TRUE, method = "spearman", 
+  main = "Post 10 weeks time point expression correlation")
+dev.off()
+
+# Perform gene expression correlation between libraries at post 12 weeks to 
+# identify outlier library
+png(filename = "Novo-feature_Post12_cor.png", width = 1500, height = 1500,
+    units = "px")
+chart.Correlation(R = log(x = (dgelist.filt$counts[, grep(
+  pattern = "_12", x = colnames(dgelist.filt$counts))] + 1), base = 2), 
+  histogram = TRUE, method = "spearman", 
+  main = "Post 12 weeks time point expression correlation")
+dev.off()
+
+########################################################
+# Normalization of data using trimmed mean of M-values #
+########################################################
+
+# Calculate normalisation factor for our DGElist, note that with edgeR 
+# the counts are not transformed in any way after normalization, instead 
+# normalization will modify library size
+dgelist.norm <- calcNormFactors(dgelist.filt)
+dgelist.norm$samples
+
+################################################
+# Multidimensional scaling plot of all samples #
+################################################
+
+# Output value for MDS plot (dimension 1 and 2 in this case)
+MDS <- plotMDS(x = dgelist.norm, top = 1000000, gene.selection = "pairwise", 
+               xlab = "Dimension 1", ylab = "Dimension 2", dim.plot = c(1, 2), 
+               cex = 1)
+
+# MDS values are then plotted with ggplot2
+MDS.ggplot <- data.frame(target, MDS$x, MDS$y)
+tiff(filename = "Novo-feature_MDS_miRNA.tif", width = (
+  (range(MDS.ggplot$MDS.x)[2]-range(MDS.ggplot$MDS.x)[1])*11.5), height = (
+    (range(MDS.ggplot$MDS.y)[2]-range(MDS.ggplot$MDS.y)[1])*10), units = "in",
+     res = 600, compression = "lzw")
+ggplot(data = MDS.ggplot, aes(x = MDS.ggplot$MDS.x, y = MDS.ggplot$MDS.y, 
+                              colour = MDS.ggplot$time_point)) + 
+  geom_point(size = 4.5) + theme(panel.background = element_rect(
+    fill = 'wheat'), legend.title = element_text(size = 12, face = "bold"), 
+    legend.text = element_text(size = 10, face = "bold"), 
+    axis.title = element_text(face = "bold", size = 12), 
+    axis.text = element_text(face = "bold", size = 10), 
+    plot.title = element_text(face = "bold", size = 15)) + 
+  ggtitle("MDS plot") + xlab("Dimension 1") + ylab("Dimension 2") + 
+  scale_colour_discrete(name = "Time point\npost-infection", breaks = c(
+    "pre2", "pre1", "1", "2", "6", "10", "12"), 
+    labels = c("pre 2w", "pre 1w", "1w", "2w", "6w", "10w", "12w"))
+dev.off()
+
+# Write into a table the coordinates of each library for the MDS plot
+write.table(x = MDS.ggplot, file = "Novo-feature_MDS_xy.txt", sep = "\t",
+            quote = FALSE, row.names = TRUE, col.names = TRUE)
+
+#########################################################################
+# Determine if there has been inversion between 6522_pre2 and 6526_pre2 #
+#########################################################################
+
+# Output value for MDS plot (dimension 1 and 2) for animals 6522 and 6526
+MDS <- plotMDS(x = dgelist.norm[, grep(pattern = "652(2|6)",
+                                       x = colnames(dgelist.norm),
+                                       perl = TRUE)], top = 1000000,
+               gene.selection = "pairwise", xlab = "Dimension 1",
+               ylab = "Dimension 2", dim.plot = c(1, 2), cex = 1)
+
+# MDS values are then plotted with ggplot2
+MDS.ggplot <- data.frame(target[grep(pattern = "652(2|6)",
+                                     x = target[, "sample"], perl = TRUE),],
+                         MDS$x, MDS$y)
+ggplot(data = MDS.ggplot, aes(x = MDS.ggplot$MDS.x, y = MDS.ggplot$MDS.y, 
+                              colour = MDS.ggplot$time_point,
+                              shape = MDS.ggplot$animal)) + 
+  geom_point(size = 8) + theme(panel.background = element_rect(
+    fill = 'wheat'), legend.title = element_text(size = 20, face = "bold"), 
+    legend.text = element_text(size = 15, face = "bold"), 
+    axis.title = element_text(face = "bold", size = 30), 
+    axis.text = element_text(face = "bold", size = 20), 
+    plot.title = element_text(face = "bold", size = 40)) + 
+  ggtitle("MDS plot") + xlab("Dimension 1") + ylab("Dimension 2")
+
+##############################################
+# Create a design matrix for paired analysis #
+##############################################
+
+# Create a design matrix
+design <- model.matrix(~ animal + group)
+rownames(design) <- rownames(dgelist.norm$samples)
+colnames(design) <- gsub(pattern = "(animal)|(group)", replacement = "",
+                         x = colnames(design), perl = TRUE)
+design
+
+########################################################################
+# Estimate the dispersion parameter for each tag using Cox-Reid method #
+########################################################################
+
+# Calculate the dispersion (common, trended and tagwise)
+dgelist.disp <- estimateGLMCommonDisp(y = dgelist.norm, design = design,
+                                      verbose = TRUE)
+dgelist.disp <- estimateGLMTrendedDisp(y = dgelist.disp, design = design)
+dgelist.disp <- estimateGLMTagwiseDisp(y = dgelist.disp, design = design)
+names(dgelist.disp)
+
+# Plot the dispersion
+png(filename = "Novo-feature_BCV.png", width = 1366, height = 768,
+    units = "px")
+plotBCV(dgelist.disp)
+dev.off()
+
+# Show the calculated dispersion and the coefficient of biological variation
+dgelist.disp$common.dispersion
+sqrt(dgelist.disp$common.dispersion)
+
+##################################################################
+# Determine differential expression using negative binomial GLMs #
+##################################################################
+
+# Fit a negative binomial generalized linear model for each tag using
+# the design matrix and calculated dispersion
+dgeglm.fit <- glmFit(y = dgelist.disp, design = design)
+names(dgeglm.fit)
+
+################################
+# Differential expression call #
+################################
+
+# Test for differential expression for -1 week versus -2 week
+miR.DE("pre1", 1, "pre2", -1, data = dgeglm.fit, design = design,
+       group = group, adjpvalue = 0.05, method = "BH",
+       lrtdata = "Novofeature.dgelrt.pre1w", dedata = "Novofeature.de.pre1w",
+       smearfile = "Novo-feature_smear_pre1w")
+
+# Test for differential expression for 1 week versus -2 week and -1 week
+miR.DE(1, 1, "pre1", -1, "pre2", -1, data = dgeglm.fit, design = design,
+       group = group, adjpvalue = 0.05, method = "BH",
+       lrtdata = "Novofeature.dgelrt.1w", dedata = "Novofeature.de.1w",
+       smearfile = "Novo-feature_smear_1w")
+
+# Test for differential expression for 2 week versus -2 week and -1 week
+miR.DE(2, 1, "pre1", -1, "pre2", -1, data = dgeglm.fit, design = design,
+       group = group, adjpvalue = 0.05, method = "BH",
+       lrtdata = "Novofeature.dgelrt.2w", dedata = "Novofeature.de.2w",
+       smearfile = "Novo-feature_smear_2w")
+
+# Test for differential expression for 6 week versus -2 week and -1 week
+miR.DE(6, 1, "pre1", -1, "pre2", -1, data = dgeglm.fit, design = design,
+       group = group, adjpvalue = 0.05, method = "BH",
+       lrtdata = "Novofeature.dgelrt.6w", dedata = "Novofeature.de.6w",
+       smearfile = "Novo-feature_smear_6w")
+
+# Test for differential expression for 10 week versus -2 week and -1 week
+miR.DE(10, 1, "pre1", -1, "pre2", -1, data = dgeglm.fit, design = design,
+       group = group, adjpvalue = 0.05, method = "BH",
+       lrtdata = "Novofeature.dgelrt.10w", dedata = "Novofeature.de.10w",
+       smearfile = "Novo-feature_smear_10w")
+
+# Test for differential expression for 12 week versus -2 week and -1 week
+miR.DE(12, 1, "pre1", -1, "pre2", -1, data = dgeglm.fit, design = design,
+       group = group, adjpvalue = 0.05, method = "BH",
+       lrtdata = "Novofeature.dgelrt.12w", dedata = "Novofeature.de.12w",
+       smearfile = "Novo-feature_smear_12w")
+
+#########################################################
+# Merge all DE call data from the different time points #
+#########################################################
+
+# Merge all DE table for the different time points into a single dataframe
+DEtable.merge("Novofeature.de.1w", "Novofeature.de.2w", "Novofeature.de.6w",
+              "Novofeature.de.10w", "Novofeature.de.12w",
+              output = "Novofeature.DE", pattern = "^Novofeature.de.")
+head(Novofeature.DE)
+
+# Write into a table the full DE call data
+write.matrix(x = Novofeature.DE, file = "Novo-feature_full_DE.txt", sep = "\t")
+
+##############################################
+# Comparison of DE genes between time points #
+##############################################
+
+# Identify as a vector list the significant DE genes per time point
+sig.1w <- as.character(Novofeature.DE[!is.na(Novofeature.DE$FDR_1w) & (
+  Novofeature.DE$FDR_1w < 0.05), "gene_id"])
+sig.2w <- as.character(Novofeature.DE[!is.na(Novofeature.DE$FDR_2w) & (
+  Novofeature.DE$FDR_2w < 0.05), "gene_id"])
+sig.6w <- as.character(Novofeature.DE[!is.na(Novofeature.DE$FDR_6w) & (
+  Novofeature.DE$FDR_6w < 0.05), "gene_id"])
+sig.10w <- as.character(Novofeature.DE[!is.na(Novofeature.DE$FDR_10w) & (
+  Novofeature.DE$FDR_10w < 0.05), "gene_id"])
+sig.12w <- as.character(Novofeature.DE[!is.na(Novofeature.DE$FDR_12w) & (
+  Novofeature.DE$FDR_12w < 0.05), "gene_id"])
+
+# Create the Venn diagram of overlapping DE genes between time points
+venn.diagram(x = list("1w" = sig.1w, "2w" = sig.2w, "6w" = sig.6w,
+                      "10w" = sig.10w, "12w" = sig.12w),
+             filename = "Novo-feature_Venn_All.tiff", na="remove")
+
+# Identify the DE miRNA common to all post-infection time points
+Novofeature.overlap <- Reduce(intersect, list(sig.1w, sig.2w, sig.6w, sig.10w, sig.12w))
+Novofeature.overlap
+
+############################
+# Clean up the R workspace #
+############################
+
+# Rename variables to be kept according to the dataset used for generation
+Novofeature.dgelist.disp <- dgelist.disp
+
+# Remove temporary and unrequired variables
+rm(miRNA.duplicate, target, animal, group, Count, i, count.log2, hist_plot,
+   MDS, MDS.ggplot, dgelist, dgelist.filt, design, dgelist.norm, dgeglm.fit, 
+   Novofeature.dgelrt.pre1w, Novofeature.de.pre1w, sig.1w, sig.2w, sig.6w,
+   sig.10w, sig.12w, dgelist.disp)
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  
+##########################################################
+# Analysis of counts data obtained via miRdeep2 pipeline #
+##########################################################
+
+################################################
+# Read in and concatenate input files within R #
+################################################
+
+# Read in the count files via the read.table function (cannot use readDGE)
+files <- list.files(path = paste("C:/Users/nnalpas/Dropbox/Home_work_sync/",
+                                 "Work/TIDA/miRNA-seq/Results/Counts/mirdeep2",
+                                 sep = ""), pattern = "6")
+for (i in 1:length(files)) {
+  Dat <- read.table(file = paste("C:/Users/nnalpas/Dropbox/Home_work_sync/",
+                                  "Work/TIDA/miRNA-seq/Results/Counts/",
+                                  "mirdeep2", files[i], sep = "/"), quote = "")
+  Dat <- Dat[, c(1:3)]
+  sample <- gsub(pattern = "_expressed.csv", replacement = "", x = files[i])
+  colnames(Dat) <- c("gene_name", sample, "precursor_name")
+  Dat <- merge(x = Dat, y = miRNA.info, by = c("gene_name", "precursor_name"))
+  Dat <- Dat[, c("gene_id", sample)]
+  if (i == 1) {
+    Count <- Dat
+  }
+  else {
+    Count <- merge(x = Count, y = Dat, by = "gene_id")
+  }
+}
+dim(Count)
+head(Count)
+tail(Count)
+
+######################################################
+# Merge the annotation information with counts table #
+######################################################
+
+# Merge previously imported annotation information with the count table
+Count <- merge(x = miRNA.info, y = Count, by = "gene_id", all = TRUE)
+head(Count)
+
+# Check the merged matrix in terms of size and content
+dim(Count)
+table(duplicated(Count[, "gene_id"]))
+length(unique(Count[, "gene_id"], incomparables = FALSE))
+
+# Ouptut samples data
+write.matrix(x = Count, file = "miRdeep2_counts.txt",  sep = "\t")
+
+###############################
+# Create groups and a DGElist #
+###############################
+
+# Create a target matrix and a experimental group vector and animal block
+target <- colnames(Count[, (ncol(Count)-69):ncol(Count)])
+target <- cbind(target, matrix(data = unlist(strsplit(x = target, split = "_",
+                                                      fixed = TRUE)),
+                               nrow = 70, ncol = 2, byrow = TRUE))
+colnames(target) = c("sample", "animal", "time_point")
+group <- factor(x = target[, "time_point"])
+animal <- factor(x = target[, "animal"])
+
+# Create a DGElist containing the group information
+dgelist <- DGEList(counts = Count[, (ncol(Count)-69):ncol(Count)], 
+                   lib.size = NULL, norm.factors = NULL, group = group, 
+                   genes = Count[, 1:(ncol(Count)-70)], 
+                   remove.zeros = FALSE)
+rownames(dgelist$counts) <- rownames(
+  dgelist$genes) <- dgelist$genes[, "gene_id"]
+dgelist$genes["gene_id"] <- NULL
+names(dgelist)
+head(dgelist$samples)
+head(dgelist$counts)
+head(dgelist$genes)
+
+# Ouptut samples data
+write.table(x = dgelist$samples, file = "miRdeep2_samples.txt", 
+            sep = "\t", quote = FALSE, row.names = TRUE, col.names = TRUE)
+
+###########################################################
+# Quality check of libraries by plotting density of count #
+###########################################################
+
+# Log2 transform the count data for better visualization
+count.log2 <- log(x = (dgelist$counts[,]+1), base = 2)
+
+# Plot density of count for all libraries
+png(filename = "miRdeep2_Dens_all_miRNA.png", width = 1500, height = 1300,
+    units = "px")
+plot(x = density(count.log2[, 1]), main = "Density plot of count per gene", 
+     lty =  1, xlab = "Log2 of count per gene", ylab = "Density", col = 1, 
+     xlim = c(-0.5, 5), ylim = c(0.0, 1), lwd = 2 , cex.axis = 1.5, 
+     cex.lab = 1.5, cex.main = 2)
+for (i in 2:ncol(count.log2)) {
+  lines(x = density(count.log2[, i]), lty = 1, col = i)
+}
+dev.off()
+
+#####################################
+# Filtering of lowly expressed tags #
+#####################################
+
+# Identify genes with zero counts across all samples
+dim(dgelist[rowSums(dgelist$counts) == 0, ])
+head(dgelist[rowSums(dgelist$counts) == 0, ])
+
+# Filter lowly expressed tags, retaining only tags with at least 50 counts per 
+# million in 10 or more libraries (10 libraries correspond to one time point)
+dgelist.filt <- dgelist[rowSums(cpm(dgelist$counts) > 50) >= 10, ]
+dim(dgelist.filt$counts)
+
+# Compute the library size
+dgelist.filt$samples$lib.size <- colSums(dgelist.filt$counts)
+head(dgelist.filt$samples)
+head(dgelist$samples)
+
+###########################################################################
+# Quality check of libraries by plotting density of count after filtering #
+###########################################################################
+
+# Log2 transform the count data for better visualization
+count.log2 <- log(x = (dgelist.filt$counts[,]+1), base = 2)
+
+# Plot density of count for all libraries
+png(filename = "miRdeep2_Dens_filt_miRNA.png", width = 1366, height = 768,
+    units = "px")
+plot(x = density(count.log2[, 1]), main = "Density plot of count per gene", 
+     lty =  1, xlab = "Log2 of count per gene", ylab = "Density", col = 1, 
+     xlim = c(-0.5, 15), ylim = c(0.0, 0.2), lwd = 2 , cex.axis = 1.5, 
+     cex.lab = 1.5, cex.main = 2)
+for (i in 2:ncol(count.log2)) {
+  lines(x = density(count.log2[, i]), lty = 1, col = i)
+}
+dev.off()
+
+############################################
+# Histogram of CPM per expression category #
+############################################
+
+# Prepare the histogram picture output
+png(filename = "miRdeep2_Hist_miRNA.png", width = 1500, height = 1500,
+    units = "px")
+par(mfrow = c(2,2), cex = 3, cex.axis = 0.7)
+
+# Plot histogram of number of gene per CPM category for all microRNA
+hist_plot <- hist(x = rowMeans(x = cpm(dgelist$counts), na.rm = FALSE), 
+                  breaks = c(
+                    0, 0.01, 0.1, 1, 10, 100, 1000, 10000, 100000, 1000000), 
+                  plot = FALSE)
+plot(x = hist_plot$counts, log = "x", type = "h", axes = FALSE, 
+     xlab = "Average CPM", ylab = "Number of gene", lwd = 10, 
+     col = "lightblue", main = "All miRNA")
+axis(side = 1, at = c(1:10), labels = c(
+  "0", "0.01", "0.1", "1", "10", "100", "1000", "10000", "100000", "1000000"))
+axis(side = 2)
+
+# Plot histogram of number of gene per CPM category for microRNA with 
+# at least one count
+hist_plot <- hist(x = rowMeans(x = cpm(dgelist$counts[rowSums(
+  dgelist$counts) > 0, ]), na.rm = FALSE), breaks = c(
+    0, 0.01, 0.1, 1, 10, 100, 1000, 10000, 100000, 1000000), plot = FALSE)
+plot(hist_plot$counts, log = "x", type = "h", axes = FALSE, 
+     xlab = "Average CPM", ylab = "Number of gene", lwd = 10, 
+     col = "lightgreen", main = "Min one count miRNA")
+axis(side = 1, at = c(1:10), labels = c(
+  "0", "0.01", "0.1", "1", "10", "100", "1000", "10000", "100000", "1000000"))
+axis(side = 2)
+
+# Plot histogram of number of gene per CPM category for microRNA 
+# filtered for low expression
+hist_plot <- hist(x = rowMeans(x = cpm(dgelist.filt$counts), na.rm = FALSE), 
+                  breaks = c(
+                    0, 0.1, 1, 10, 100, 1000, 10000, 100000, 1000000), 
+                  plot = FALSE)
+plot(hist_plot$counts, log = "x", type = "h", axes = FALSE, 
+     xlab = "Average CPM", ylab = "Number of gene", lwd = 10, 
+     col = "lightcoral", main = "Low expression filtered miRNA")
+axis(side = 1, at = c(1:10), labels = c(
+  "0", "0.01", "0.1", "1", "10", "100", "1000", "10000", "100000", "1000000"))
+axis(side = 2)
+
+# Close graphic device
+dev.off()
+
+#################################################
+# Gene expression correlation between libraries #
+#################################################
+
+# Note that CPM values are not required with the use of Spearman rank test
+
+# Perform gene expression correlation between libraries at pre 2 weeks to 
+# identify outlier library
+png(filename = "miRdeep2_Pre2_cor.png", width = 1500, height = 1500,
+    units = "px")
+chart.Correlation(R = log(x = (dgelist.filt$counts[, grep(
+  pattern = "_pre2", x = colnames(dgelist.filt$counts))] + 1), base = 2), 
+  histogram = TRUE, method = "spearman", 
+  main = "Pre 2 weeks time point expression correlation")
+dev.off()
+
+# Perform gene expression correlation between libraries at pre 1 weeks to 
+# identify outlier library
+png(filename = "miRdeep2_Pre1_cor.png", width = 1500, height = 1500,
+    units = "px")
+chart.Correlation(R = log(x = (dgelist.filt$counts[, grep(
+  pattern = "_pre1", x = colnames(dgelist.filt$counts))] + 1), base = 2), 
+  histogram = TRUE, method = "spearman", 
+  main = "Pre 1 weeks time point expression correlation")
+dev.off()
+
+# Perform gene expression correlation between libraries at post 1 weeks to 
+# identify outlier library
+png(filename = "miRdeep2_Post1_cor.png", width = 1500, height = 1500,
+    units = "px")
+chart.Correlation(R = log(x = (dgelist.filt$counts[, grep(
+  pattern = "_1$", x = colnames(dgelist.filt$counts), perl = TRUE)] + 1),
+  base = 2), histogram = TRUE, method = "spearman", 
+  main = "Post 1 weeks time point expression correlation")
+dev.off()
+
+# Perform gene expression correlation between libraries at post 2 weeks to 
+# identify outlier library
+png(filename = "miRdeep2_Post2_cor.png", width = 1500, height = 1500,
+    units = "px")
+chart.Correlation(R = log(x = (dgelist.filt$counts[, grep(
+  pattern = "_2", x = colnames(dgelist.filt$counts))] + 1), base = 2), 
+  histogram = TRUE, method = "spearman", 
+  main = "Post 2 weeks time point expression correlation")
+dev.off()
+
+# Perform gene expression correlation between libraries at post 6 weeks to 
+# identify outlier library
+png(filename = "miRdeep2_Post6_cor.png", width = 1500, height = 1500,
+    units = "px")
+chart.Correlation(R = log(x = (dgelist.filt$counts[, grep(
+  pattern = "_6", x = colnames(dgelist.filt$counts))] + 1), base = 2), 
+  histogram = TRUE, method = "spearman", 
+  main = "Post 6 weeks time point expression correlation")
+dev.off()
+
+# Perform gene expression correlation between libraries at post 10 weeks to 
+# identify outlier library
+png(filename = "miRdeep2_Post10_cor.png", width = 1500, height = 1500,
+    units = "px")
+chart.Correlation(R = log(x = (dgelist.filt$counts[, grep(
+  pattern = "_10", x = colnames(dgelist.filt$counts))] + 1), base = 2), 
+  histogram = TRUE, method = "spearman", 
+  main = "Post 10 weeks time point expression correlation")
+dev.off()
+
+# Perform gene expression correlation between libraries at post 12 weeks to 
+# identify outlier library
+png(filename = "miRdeep2_Post12_cor.png", width = 1500, height = 1500,
+    units = "px")
+chart.Correlation(R = log(x = (dgelist.filt$counts[, grep(
+  pattern = "_12", x = colnames(dgelist.filt$counts))] + 1), base = 2), 
+  histogram = TRUE, method = "spearman", 
+  main = "Post 12 weeks time point expression correlation")
+dev.off()
+
+########################################################
+# Normalization of data using trimmed mean of M-values #
+########################################################
+
+# Calculate normalisation factor for our DGElist, note that with edgeR 
+# the counts are not transformed in any way after normalization, instead 
+# normalization will modify library size
+dgelist.norm <- calcNormFactors(dgelist.filt)
+dgelist.norm$samples
+
+################################################
+# Multidimensional scaling plot of all samples #
+################################################
+
+# Output value for MDS plot (dimension 1 and 2 in this case)
+MDS <- plotMDS(x = dgelist.norm, top = 1000000, gene.selection = "pairwise", 
+               xlab = "Dimension 1", ylab = "Dimension 2", dim.plot = c(1, 2), 
+               cex = 1)
+
+# MDS values are then plotted with ggplot2
+MDS.ggplot <- data.frame(target, MDS$x, MDS$y)
+tiff(filename = "miRdeep2_MDS_miRNA.tif", width = (
+  (range(MDS.ggplot$MDS.x)[2]-range(MDS.ggplot$MDS.x)[1])*11.5), height = (
+    (range(MDS.ggplot$MDS.y)[2]-range(MDS.ggplot$MDS.y)[1])*10), units = "in",
+  res = 600, compression = "lzw")
+ggplot(data = MDS.ggplot, aes(x = MDS.ggplot$MDS.x, y = MDS.ggplot$MDS.y, 
+                              colour = MDS.ggplot$time_point)) + 
+  geom_point(size = 4.5) + theme(panel.background = element_rect(
+    fill = 'wheat'), legend.title = element_text(size = 12, face = "bold"), 
+    legend.text = element_text(size = 10, face = "bold"), 
+    axis.title = element_text(face = "bold", size = 12), 
+    axis.text = element_text(face = "bold", size = 10), 
+    plot.title = element_text(face = "bold", size = 15)) + 
+  ggtitle("MDS plot") + xlab("Dimension 1") + ylab("Dimension 2") + 
+  scale_colour_discrete(name = "Time point\npost-infection", breaks = c(
+    "pre2", "pre1", "1", "2", "6", "10", "12"), 
+    labels = c("pre 2w", "pre 1w", "1w", "2w", "6w", "10w", "12w"))
+dev.off()
+
+# Write into a table the coordinates of each library for the MDS plot
+write.table(x = MDS.ggplot, file = "miRdeep2_MDS_xy.txt", sep = "\t",
+            quote = FALSE, row.names = TRUE, col.names = TRUE)
+
+#########################################################################
+# Determine if there has been inversion between 6522_pre2 and 6526_pre2 #
+#########################################################################
+
+# Output value for MDS plot (dimension 1 and 2) for animals 6522 and 6526
+MDS <- plotMDS(x = dgelist.norm[, grep(pattern = "652(2|6)",
+                                       x = colnames(dgelist.norm),
+                                       perl = TRUE)], top = 1000000,
+               gene.selection = "pairwise", xlab = "Dimension 1",
+               ylab = "Dimension 2", dim.plot = c(1, 2), cex = 1)
+
+# MDS values are then plotted with ggplot2
+MDS.ggplot <- data.frame(target[grep(pattern = "652(2|6)",
+                                     x = target[, "sample"], perl = TRUE),],
+                         MDS$x, MDS$y)
+ggplot(data = MDS.ggplot, aes(x = MDS.ggplot$MDS.x, y = MDS.ggplot$MDS.y, 
+                              colour = MDS.ggplot$time_point,
+                              shape = MDS.ggplot$animal)) + 
+  geom_point(size = 8) + theme(panel.background = element_rect(
+    fill = 'wheat'), legend.title = element_text(size = 20, face = "bold"), 
+    legend.text = element_text(size = 15, face = "bold"), 
+    axis.title = element_text(face = "bold", size = 30), 
+    axis.text = element_text(face = "bold", size = 20), 
+    plot.title = element_text(face = "bold", size = 40)) + 
+  ggtitle("MDS plot") + xlab("Dimension 1") + ylab("Dimension 2")
+
+##############################################
+# Create a design matrix for paired analysis #
+##############################################
+
+# Create a design matrix
+design <- model.matrix(~ animal + group)
+rownames(design) <- rownames(dgelist.norm$samples)
+colnames(design) <- gsub(pattern = "(animal)|(group)", replacement = "",
+                         x = colnames(design), perl = TRUE)
+design
+
+########################################################################
+# Estimate the dispersion parameter for each tag using Cox-Reid method #
+########################################################################
+
+# Calculate the dispersion (common, trended and tagwise)
+dgelist.disp <- estimateGLMCommonDisp(y = dgelist.norm, design = design,
+                                      verbose = TRUE)
+dgelist.disp <- estimateGLMTrendedDisp(y = dgelist.disp, design = design)
+dgelist.disp <- estimateGLMTagwiseDisp(y = dgelist.disp, design = design)
+names(dgelist.disp)
+
+# Plot the dispersion
+png(filename = "miRdeep2_BCV.png", width = 1366, height = 768,
+    units = "px")
+plotBCV(dgelist.disp)
+dev.off()
+
+# Show the calculated dispersion and the coefficient of biological variation
+dgelist.disp$common.dispersion
+sqrt(dgelist.disp$common.dispersion)
+
+##################################################################
+# Determine differential expression using negative binomial GLMs #
+##################################################################
+
+# Fit a negative binomial generalized linear model for each tag using
+# the design matrix and calculated dispersion
+dgeglm.fit <- glmFit(y = dgelist.disp, design = design)
+names(dgeglm.fit)
+
+################################
+# Differential expression call #
+################################
+
+# Test for differential expression for -1 week versus -2 week
+miR.DE("pre1", 1, "pre2", -1, data = dgeglm.fit, design = design,
+       group = group, adjpvalue = 0.05, method = "BH",
+       lrtdata = "miRdeep2.dgelrt.pre1w", dedata = "miRdeep2.de.pre1w",
+       smearfile = "miRdeep2_smear_pre1w")
+
+# Test for differential expression for 1 week versus -2 week and -1 week
+miR.DE(1, 1, "pre1", -1, "pre2", -1, data = dgeglm.fit, design = design,
+       group = group, adjpvalue = 0.05, method = "BH",
+       lrtdata = "miRdeep2.dgelrt.1w", dedata = "miRdeep2.de.1w",
+       smearfile = "miRdeep2_smear_1w")
+
+# Test for differential expression for 2 week versus -2 week and -1 week
+miR.DE(2, 1, "pre1", -1, "pre2", -1, data = dgeglm.fit, design = design,
+       group = group, adjpvalue = 0.05, method = "BH",
+       lrtdata = "miRdeep2.dgelrt.2w", dedata = "miRdeep2.de.2w",
+       smearfile = "miRdeep2_smear_2w")
+
+# Test for differential expression for 6 week versus -2 week and -1 week
+miR.DE(6, 1, "pre1", -1, "pre2", -1, data = dgeglm.fit, design = design,
+       group = group, adjpvalue = 0.05, method = "BH",
+       lrtdata = "miRdeep2.dgelrt.6w", dedata = "miRdeep2.de.6w",
+       smearfile = "miRdeep2_smear_6w")
+
+# Test for differential expression for 10 week versus -2 week and -1 week
+miR.DE(10, 1, "pre1", -1, "pre2", -1, data = dgeglm.fit, design = design,
+       group = group, adjpvalue = 0.05, method = "BH",
+       lrtdata = "miRdeep2.dgelrt.10w", dedata = "miRdeep2.de.10w",
+       smearfile = "miRdeep2_smear_10w")
+
+# Test for differential expression for 12 week versus -2 week and -1 week
+miR.DE(12, 1, "pre1", -1, "pre2", -1, data = dgeglm.fit, design = design,
+       group = group, adjpvalue = 0.05, method = "BH",
+       lrtdata = "miRdeep2.dgelrt.12w", dedata = "miRdeep2.de.12w",
+       smearfile = "miRdeep2_smear_12w")
+
+#########################################################
+# Merge all DE call data from the different time points #
+#########################################################
+
+# Merge all DE table for the different time points into a single dataframe
+DEtable.merge("miRdeep2.de.1w", "miRdeep2.de.2w", "miRdeep2.de.6w",
+              "miRdeep2.de.10w", "miRdeep2.de.12w",
+              output = "miRdeep2.DE", pattern = "^miRdeep2.de.")
+head(miRdeep2.DE)
+
+# Write into a table the full DE call data
+write.matrix(x = miRdeep2.DE, file = "miRdeep2_full_DE.txt", sep = "\t")
+
+##############################################
+# Comparison of DE genes between time points #
+##############################################
+
+# Identify as a vector list the significant DE genes per time point
+sig.1w <- as.character(miRdeep2.DE[!is.na(miRdeep2.DE$FDR_1w) & (
+  miRdeep2.DE$FDR_1w < 0.05), "gene_id"])
+sig.2w <- as.character(miRdeep2.DE[!is.na(miRdeep2.DE$FDR_2w) & (
+  miRdeep2.DE$FDR_2w < 0.05), "gene_id"])
+sig.6w <- as.character(miRdeep2.DE[!is.na(miRdeep2.DE$FDR_6w) & (
+  miRdeep2.DE$FDR_6w < 0.05), "gene_id"])
+sig.10w <- as.character(miRdeep2.DE[!is.na(miRdeep2.DE$FDR_10w) & (
+  miRdeep2.DE$FDR_10w < 0.05), "gene_id"])
+sig.12w <- as.character(miRdeep2.DE[!is.na(miRdeep2.DE$FDR_12w) & (
+  miRdeep2.DE$FDR_12w < 0.05), "gene_id"])
+
+# Create the Venn diagram of overlapping DE genes between time points
+venn.diagram(x = list("1w" = sig.1w, "2w" = sig.2w, "6w" = sig.6w,
+                      "10w" = sig.10w, "12w" = sig.12w),
+             filename = "miRdeep2_Venn_All.tiff", na="remove")
+
+# Identify the DE miRNA common to all post-infection time points
+miRdeep2.overlap <- Reduce(intersect, list(sig.1w, sig.2w, sig.6w, sig.10w, sig.12w))
+miRdeep2.overlap
+
+############################
+# Clean up the R workspace #
+############################
+
+# Rename variables to be kept according to the dataset used for generation
+miRdeep2.dgelist.disp <- dgelist.disp
+
+# Remove temporary and unrequired variables
+rm(target, animal, group, Count, i, count.log2, hist_plot, MDS, MDS.ggplot,
+   dgelist, dgelist.filt, design, dgelist.norm, dgeglm.fit,
+   miRdeep2.dgelrt.pre1w, miRdeep2.de.pre1w, sig.1w, sig.2w, sig.6w,
+   sig.10w, sig.12w, dgelist.disp, files, Dat, sample)
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  
+##########################################################
+# Analysis of counts data obtained via miRdeep* pipeline #
+##########################################################
+
+################################################
+# Read in and concatenate input files within R #
+################################################
+
+# Read in the count files via the read.table function (cannot use readDGE)
+files <- list.files(path = paste("C:/Users/nnalpas/Dropbox/Home_work_sync/",
+                                 "Work/TIDA/miRNA-seq/Results/Counts/",
+                                 "miRdeep-star", sep = ""), pattern = "6")
+for (i in 1:length(files)) {
+  Dat <- read.table(file = paste("C:/Users/nnalpas/Dropbox/Home_work_sync/",
+                                 "Work/TIDA/miRNA-seq/Results/Counts/",
+                                 "miRdeep-star", files[i], sep = "/"),
+                    quote = "", sep = "\t", header = TRUE)
+  Dat <- Dat[, c(1,3,4,6,7)]
+  sample <- gsub(pattern = "_result.txt", replacement = "", x = files[i])
+  colnames(Dat)[1:4] <- c("precursor_name", "chromosome", "strand", sample)
+  Dat <- cbind(Dat[,1:4], colsplit(string = as.character(
+    Dat[,5]), pattern = "-", names = c("start_position", "end_position")))
+  Dat <- merge(x = Dat, y = miRNA.info, by = c(
+    "precursor_name", "chromosome", "strand", "start_position"), all = TRUE)
+  Dat <- Dat[, c("gene_id", sample)]
+  Dat[is.na(x = Dat)] <- 0
+  if (i == 1) {
+    Count <- Dat
+  }
+  else {
+    Count <- merge(x = Count, y = Dat, by = "gene_id", all = TRUE)
+  }
+}
+dim(Count)
+head(Count)
+tail(Count)
+
+######################################################
+# Merge the annotation information with counts table #
+######################################################
+
+# Merge previously imported annotation information with the count table
+Count <- merge(x = miRNA.info, y = Count, by = "gene_id", all = TRUE)
+head(Count)
+
+# Check the merged matrix in terms of size and content
+dim(Count)
+table(duplicated(Count[, "gene_id"]))
+length(unique(Count[, "gene_id"], incomparables = FALSE))
+
+# Ouptut samples data
+write.matrix(x = Count, file = "miRdeepstar_counts.txt",  sep = "\t")
+
+###############################
+# Create groups and a DGElist #
+###############################
+
+# Create a target matrix and a experimental group vector and animal block
+target <- colnames(Count[, (ncol(Count)-69):ncol(Count)])
+target <- cbind(target, matrix(data = unlist(strsplit(x = target, split = "_",
+                                                      fixed = TRUE)),
+                               nrow = 70, ncol = 2, byrow = TRUE))
+colnames(target) = c("sample", "animal", "time_point")
+group <- factor(x = target[, "time_point"])
+animal <- factor(x = target[, "animal"])
+
+# Create a DGElist containing the group information
+dgelist <- DGEList(counts = Count[, (ncol(Count)-69):ncol(Count)], 
+                   lib.size = NULL, norm.factors = NULL, group = group, 
+                   genes = Count[, 1:(ncol(Count)-70)], 
+                   remove.zeros = FALSE)
+rownames(dgelist$counts) <- rownames(
+  dgelist$genes) <- dgelist$genes[, "gene_id"]
+dgelist$genes["gene_id"] <- NULL
+names(dgelist)
+head(dgelist$samples)
+head(dgelist$counts)
+head(dgelist$genes)
+
+# Ouptut samples data
+write.table(x = dgelist$samples, file = "miRdeepstar_samples.txt", 
+            sep = "\t", quote = FALSE, row.names = TRUE, col.names = TRUE)
+
+###########################################################
+# Quality check of libraries by plotting density of count #
+###########################################################
+
+# Log2 transform the count data for better visualization
+count.log2 <- log(x = (dgelist$counts[,]+1), base = 2)
+
+# Plot density of count for all libraries
+png(filename = "miRdeepstar_Dens_all_miRNA.png", width = 1500, height = 1300,
+    units = "px")
+plot(x = density(count.log2[, 1]), main = "Density plot of count per gene", 
+     lty =  1, xlab = "Log2 of count per gene", ylab = "Density", col = 1, 
+     xlim = c(-0.5, 5), ylim = c(0.0, 0.9), lwd = 2 , cex.axis = 1.5, 
+     cex.lab = 1.5, cex.main = 2)
+for (i in 2:ncol(count.log2)) {
+  lines(x = density(count.log2[, i]), lty = 1, col = i)
+}
+dev.off()
+
+#####################################
+# Filtering of lowly expressed tags #
+#####################################
+
+# Identify genes with zero counts across all samples
+dim(dgelist[rowSums(dgelist$counts) == 0, ])
+head(dgelist[rowSums(dgelist$counts) == 0, ])
+
+# Filter lowly expressed tags, retaining only tags with at least 50 counts per 
+# million in 10 or more libraries (10 libraries correspond to one time point)
+dgelist.filt <- dgelist[rowSums(cpm(dgelist$counts) > 50) >= 10, ]
+dim(dgelist.filt$counts)
+
+# Compute the library size
+dgelist.filt$samples$lib.size <- colSums(dgelist.filt$counts)
+head(dgelist.filt$samples)
+head(dgelist$samples)
+
+###########################################################################
+# Quality check of libraries by plotting density of count after filtering #
+###########################################################################
+
+# Log2 transform the count data for better visualization
+count.log2 <- log(x = (dgelist.filt$counts[,]+1), base = 2)
+
+# Plot density of count for all libraries
+png(filename = "miRdeepstar_Dens_filt_miRNA.png", width = 1366, height = 768,
+    units = "px")
+plot(x = density(count.log2[, 1]), main = "Density plot of count per gene", 
+     lty =  1, xlab = "Log2 of count per gene", ylab = "Density", col = 1, 
+     xlim = c(-0.5, 15), ylim = c(0.0, 0.2), lwd = 2 , cex.axis = 1.5, 
+     cex.lab = 1.5, cex.main = 2)
+for (i in 2:ncol(count.log2)) {
+  lines(x = density(count.log2[, i]), lty = 1, col = i)
+}
+dev.off()
+
+############################################
+# Histogram of CPM per expression category #
+############################################
+
+# Prepare the histogram picture output
+png(filename = "miRdeepstar_Hist_miRNA.png", width = 1500, height = 1500,
+    units = "px")
+par(mfrow = c(2,2), cex = 3, cex.axis = 0.7)
+
+# Plot histogram of number of gene per CPM category for all microRNA
+hist_plot <- hist(x = rowMeans(x = cpm(dgelist$counts), na.rm = FALSE), 
+                  breaks = c(
+                    0, 0.01, 0.1, 1, 10, 100, 1000, 10000, 100000, 1000000), 
+                  plot = FALSE)
+plot(x = hist_plot$counts, log = "x", type = "h", axes = FALSE, 
+     xlab = "Average CPM", ylab = "Number of gene", lwd = 10, 
+     col = "lightblue", main = "All miRNA")
+axis(side = 1, at = c(1:10), labels = c(
+  "0", "0.01", "0.1", "1", "10", "100", "1000", "10000", "100000", "1000000"))
+axis(side = 2)
+
+# Plot histogram of number of gene per CPM category for microRNA with 
+# at least one count
+hist_plot <- hist(x = rowMeans(x = cpm(dgelist$counts[rowSums(
+  dgelist$counts) > 0, ]), na.rm = FALSE), breaks = c(
+    0, 0.01, 0.1, 1, 10, 100, 1000, 10000, 100000, 1000000), plot = FALSE)
+plot(hist_plot$counts, log = "x", type = "h", axes = FALSE, 
+     xlab = "Average CPM", ylab = "Number of gene", lwd = 10, 
+     col = "lightgreen", main = "Min one count miRNA")
+axis(side = 1, at = c(1:10), labels = c(
+  "0", "0.01", "0.1", "1", "10", "100", "1000", "10000", "100000", "1000000"))
+axis(side = 2)
+
+# Plot histogram of number of gene per CPM category for microRNA 
+# filtered for low expression
+hist_plot <- hist(x = rowMeans(x = cpm(dgelist.filt$counts), na.rm = FALSE), 
+                  breaks = c(
+                    0, 0.1, 1, 10, 100, 1000, 10000, 100000, 1000000), 
+                  plot = FALSE)
+plot(hist_plot$counts, log = "x", type = "h", axes = FALSE, 
+     xlab = "Average CPM", ylab = "Number of gene", lwd = 10, 
+     col = "lightcoral", main = "Low expression filtered miRNA")
+axis(side = 1, at = c(1:10), labels = c(
+  "0", "0.01", "0.1", "1", "10", "100", "1000", "10000", "100000", "1000000"))
+axis(side = 2)
+
+# Close graphic device
+dev.off()
+
+#################################################
+# Gene expression correlation between libraries #
+#################################################
+
+# Note that CPM values are not required with the use of Spearman rank test
+
+# Perform gene expression correlation between libraries at pre 2 weeks to 
+# identify outlier library
+png(filename = "miRdeepstar_Pre2_cor.png", width = 1500, height = 1500,
+    units = "px")
+chart.Correlation(R = log(x = (dgelist.filt$counts[, grep(
+  pattern = "_pre2", x = colnames(dgelist.filt$counts))] + 1), base = 2), 
+  histogram = TRUE, method = "spearman", 
+  main = "Pre 2 weeks time point expression correlation")
+dev.off()
+
+# Perform gene expression correlation between libraries at pre 1 weeks to 
+# identify outlier library
+png(filename = "miRdeepstar_Pre1_cor.png", width = 1500, height = 1500,
+    units = "px")
+chart.Correlation(R = log(x = (dgelist.filt$counts[, grep(
+  pattern = "_pre1", x = colnames(dgelist.filt$counts))] + 1), base = 2), 
+  histogram = TRUE, method = "spearman", 
+  main = "Pre 1 weeks time point expression correlation")
+dev.off()
+
+# Perform gene expression correlation between libraries at post 1 weeks to 
+# identify outlier library
+png(filename = "miRdeepstar_Post1_cor.png", width = 1500, height = 1500,
+    units = "px")
+chart.Correlation(R = log(x = (dgelist.filt$counts[, grep(
+  pattern = "_1$", x = colnames(dgelist.filt$counts), perl = TRUE)] + 1),
+  base = 2), histogram = TRUE, method = "spearman", 
+  main = "Post 1 weeks time point expression correlation")
+dev.off()
+
+# Perform gene expression correlation between libraries at post 2 weeks to 
+# identify outlier library
+png(filename = "miRdeepstar_Post2_cor.png", width = 1500, height = 1500,
+    units = "px")
+chart.Correlation(R = log(x = (dgelist.filt$counts[, grep(
+  pattern = "_2", x = colnames(dgelist.filt$counts))] + 1), base = 2), 
+  histogram = TRUE, method = "spearman", 
+  main = "Post 2 weeks time point expression correlation")
+dev.off()
+
+# Perform gene expression correlation between libraries at post 6 weeks to 
+# identify outlier library
+png(filename = "miRdeepstar_Post6_cor.png", width = 1500, height = 1500,
+    units = "px")
+chart.Correlation(R = log(x = (dgelist.filt$counts[, grep(
+  pattern = "_6", x = colnames(dgelist.filt$counts))] + 1), base = 2), 
+  histogram = TRUE, method = "spearman", 
+  main = "Post 6 weeks time point expression correlation")
+dev.off()
+
+# Perform gene expression correlation between libraries at post 10 weeks to 
+# identify outlier library
+png(filename = "miRdeepstar_Post10_cor.png", width = 1500, height = 1500,
+    units = "px")
+chart.Correlation(R = log(x = (dgelist.filt$counts[, grep(
+  pattern = "_10", x = colnames(dgelist.filt$counts))] + 1), base = 2), 
+  histogram = TRUE, method = "spearman", 
+  main = "Post 10 weeks time point expression correlation")
+dev.off()
+
+# Perform gene expression correlation between libraries at post 12 weeks to 
+# identify outlier library
+png(filename = "miRdeepstar_Post12_cor.png", width = 1500, height = 1500,
+    units = "px")
+chart.Correlation(R = log(x = (dgelist.filt$counts[, grep(
+  pattern = "_12", x = colnames(dgelist.filt$counts))] + 1), base = 2), 
+  histogram = TRUE, method = "spearman", 
+  main = "Post 12 weeks time point expression correlation")
+dev.off()
+
+########################################################
+# Normalization of data using trimmed mean of M-values #
+########################################################
+
+# Calculate normalisation factor for our DGElist, note that with edgeR 
+# the counts are not transformed in any way after normalization, instead 
+# normalization will modify library size
+dgelist.norm <- calcNormFactors(dgelist.filt)
+dgelist.norm$samples
+
+################################################
+# Multidimensional scaling plot of all samples #
+################################################
+
+# Output value for MDS plot (dimension 1 and 2 in this case)
+MDS <- plotMDS(x = dgelist.norm, top = 1000000, gene.selection = "pairwise", 
+               xlab = "Dimension 1", ylab = "Dimension 2", dim.plot = c(1, 2), 
+               cex = 1)
+
+# MDS values are then plotted with ggplot2
+MDS.ggplot <- data.frame(target, MDS$x, MDS$y)
+tiff(filename = "miRdeepstar_MDS_miRNA.tif", width = (
+  (range(MDS.ggplot$MDS.x)[2]-range(MDS.ggplot$MDS.x)[1])*11.5), height = (
+    (range(MDS.ggplot$MDS.y)[2]-range(MDS.ggplot$MDS.y)[1])*10), units = "in",
+  res = 600, compression = "lzw")
+ggplot(data = MDS.ggplot, aes(x = MDS.ggplot$MDS.x, y = MDS.ggplot$MDS.y, 
+                              colour = MDS.ggplot$time_point)) + 
+  geom_point(size = 4.5) + theme(panel.background = element_rect(
+    fill = 'wheat'), legend.title = element_text(size = 12, face = "bold"), 
+    legend.text = element_text(size = 10, face = "bold"), 
+    axis.title = element_text(face = "bold", size = 12), 
+    axis.text = element_text(face = "bold", size = 10), 
+    plot.title = element_text(face = "bold", size = 15)) + 
+  ggtitle("MDS plot") + xlab("Dimension 1") + ylab("Dimension 2") + 
+  scale_colour_discrete(name = "Time point\npost-infection", breaks = c(
+    "pre2", "pre1", "1", "2", "6", "10", "12"), 
+    labels = c("pre 2w", "pre 1w", "1w", "2w", "6w", "10w", "12w"))
+dev.off()
+
+# Write into a table the coordinates of each library for the MDS plot
+write.table(x = MDS.ggplot, file = "miRdeepstar_MDS_xy.txt", sep = "\t",
+            quote = FALSE, row.names = TRUE, col.names = TRUE)
+
+#########################################################################
+# Determine if there has been inversion between 6522_pre2 and 6526_pre2 #
+#########################################################################
+
+# Output value for MDS plot (dimension 1 and 2) for animals 6522 and 6526
+MDS <- plotMDS(x = dgelist.norm[, grep(pattern = "652(2|6)",
+                                       x = colnames(dgelist.norm),
+                                       perl = TRUE)], top = 1000000,
+               gene.selection = "pairwise", xlab = "Dimension 1",
+               ylab = "Dimension 2", dim.plot = c(1, 2), cex = 1)
+
+# MDS values are then plotted with ggplot2
+MDS.ggplot <- data.frame(target[grep(pattern = "652(2|6)",
+                                     x = target[, "sample"], perl = TRUE),],
+                         MDS$x, MDS$y)
+ggplot(data = MDS.ggplot, aes(x = MDS.ggplot$MDS.x, y = MDS.ggplot$MDS.y, 
+                              colour = MDS.ggplot$time_point,
+                              shape = MDS.ggplot$animal)) + 
+  geom_point(size = 8) + theme(panel.background = element_rect(
+    fill = 'wheat'), legend.title = element_text(size = 20, face = "bold"), 
+    legend.text = element_text(size = 15, face = "bold"), 
+    axis.title = element_text(face = "bold", size = 30), 
+    axis.text = element_text(face = "bold", size = 20), 
+    plot.title = element_text(face = "bold", size = 40)) + 
+  ggtitle("MDS plot") + xlab("Dimension 1") + ylab("Dimension 2")
+
+##############################################
+# Create a design matrix for paired analysis #
+##############################################
+
+# Create a design matrix
+design <- model.matrix(~ animal + group)
+rownames(design) <- rownames(dgelist.norm$samples)
+colnames(design) <- gsub(pattern = "(animal)|(group)", replacement = "",
+                         x = colnames(design), perl = TRUE)
+design
+
+########################################################################
+# Estimate the dispersion parameter for each tag using Cox-Reid method #
+########################################################################
+
+# Calculate the dispersion (common, trended and tagwise)
+dgelist.disp <- estimateGLMCommonDisp(y = dgelist.norm, design = design,
+                                      verbose = TRUE)
+dgelist.disp <- estimateGLMTrendedDisp(y = dgelist.disp, design = design)
+dgelist.disp <- estimateGLMTagwiseDisp(y = dgelist.disp, design = design)
+names(dgelist.disp)
+
+# Plot the dispersion
+png(filename = "miRdeepstar_BCV.png", width = 1366, height = 768,
+    units = "px")
+plotBCV(dgelist.disp)
+dev.off()
+
+# Show the calculated dispersion and the coefficient of biological variation
+dgelist.disp$common.dispersion
+sqrt(dgelist.disp$common.dispersion)
+
+##################################################################
+# Determine differential expression using negative binomial GLMs #
+##################################################################
+
+# Fit a negative binomial generalized linear model for each tag using
+# the design matrix and calculated dispersion
+dgeglm.fit <- glmFit(y = dgelist.disp, design = design)
+names(dgeglm.fit)
+
+################################
+# Differential expression call #
+################################
+
+# Test for differential expression for -1 week versus -2 week
+miR.DE("pre1", 1, "pre2", -1, data = dgeglm.fit, design = design,
+       group = group, adjpvalue = 0.05, method = "BH",
+       lrtdata = "miRdeepstar.dgelrt.pre1w", dedata = "miRdeepstar.de.pre1w",
+       smearfile = "miRdeepstar_smear_pre1w")
+
+# Test for differential expression for 1 week versus -2 week and -1 week
+miR.DE(1, 1, "pre1", -1, "pre2", -1, data = dgeglm.fit, design = design,
+       group = group, adjpvalue = 0.05, method = "BH",
+       lrtdata = "miRdeepstar.dgelrt.1w", dedata = "miRdeepstar.de.1w",
+       smearfile = "miRdeepstar_smear_1w")
+
+# Test for differential expression for 2 week versus -2 week and -1 week
+miR.DE(2, 1, "pre1", -1, "pre2", -1, data = dgeglm.fit, design = design,
+       group = group, adjpvalue = 0.05, method = "BH",
+       lrtdata = "miRdeepstar.dgelrt.2w", dedata = "miRdeepstar.de.2w",
+       smearfile = "miRdeepstar_smear_2w")
+
+# Test for differential expression for 6 week versus -2 week and -1 week
+miR.DE(6, 1, "pre1", -1, "pre2", -1, data = dgeglm.fit, design = design,
+       group = group, adjpvalue = 0.05, method = "BH",
+       lrtdata = "miRdeepstar.dgelrt.6w", dedata = "miRdeepstar.de.6w",
+       smearfile = "miRdeepstar_smear_6w")
+
+# Test for differential expression for 10 week versus -2 week and -1 week
+miR.DE(10, 1, "pre1", -1, "pre2", -1, data = dgeglm.fit, design = design,
+       group = group, adjpvalue = 0.05, method = "BH",
+       lrtdata = "miRdeepstar.dgelrt.10w", dedata = "miRdeepstar.de.10w",
+       smearfile = "miRdeepstar_smear_10w")
+
+# Test for differential expression for 12 week versus -2 week and -1 week
+miR.DE(12, 1, "pre1", -1, "pre2", -1, data = dgeglm.fit, design = design,
+       group = group, adjpvalue = 0.05, method = "BH",
+       lrtdata = "miRdeepstar.dgelrt.12w", dedata = "miRdeepstar.de.12w",
+       smearfile = "miRdeepstar_smear_12w")
+
+#########################################################
+# Merge all DE call data from the different time points #
+#########################################################
+
+# Merge all DE table for the different time points into a single dataframe
+DEtable.merge("miRdeepstar.de.1w", "miRdeepstar.de.2w", "miRdeepstar.de.6w",
+              "miRdeepstar.de.10w", "miRdeepstar.de.12w",
+              output = "miRdeepstar.DE", pattern = "^miRdeepstar.de.")
+head(miRdeepstar.DE)
+
+# Write into a table the full DE call data
+write.matrix(x = miRdeepstar.DE, file = "miRdeepstar_full_DE.txt", sep = "\t")
+
+##############################################
+# Comparison of DE genes between time points #
+##############################################
+
+# Identify as a vector list the significant DE genes per time point
+sig.1w <- as.character(miRdeepstar.DE[!is.na(miRdeepstar.DE$FDR_1w) & (
+  miRdeepstar.DE$FDR_1w < 0.05), "gene_id"])
+sig.2w <- as.character(miRdeepstar.DE[!is.na(miRdeepstar.DE$FDR_2w) & (
+  miRdeepstar.DE$FDR_2w < 0.05), "gene_id"])
+sig.6w <- as.character(miRdeepstar.DE[!is.na(miRdeepstar.DE$FDR_6w) & (
+  miRdeepstar.DE$FDR_6w < 0.05), "gene_id"])
+sig.10w <- as.character(miRdeepstar.DE[!is.na(miRdeepstar.DE$FDR_10w) & (
+  miRdeepstar.DE$FDR_10w < 0.05), "gene_id"])
+sig.12w <- as.character(miRdeepstar.DE[!is.na(miRdeepstar.DE$FDR_12w) & (
+  miRdeepstar.DE$FDR_12w < 0.05), "gene_id"])
+
+# Create the Venn diagram of overlapping DE genes between time points
+venn.diagram(x = list("1w" = sig.1w, "2w" = sig.2w, "6w" = sig.6w,
+                      "10w" = sig.10w, "12w" = sig.12w),
+             filename = "miRdeepstar_Venn_All.tiff", na="remove")
+
+# Identify the DE miRNA common to all post-infection time points
+miRdeepstar.overlap <- Reduce(intersect, list(sig.1w, sig.2w, sig.6w, sig.10w, sig.12w))
+miRdeepstar.overlap
+
+############################
+# Clean up the R workspace #
+############################
+
+# Rename variables to be kept according to the dataset used for generation
+miRdeepstar.dgelist.disp <- dgelist.disp
+
+# Remove temporary and unrequired variables
+rm(target, animal, group, Count, i, count.log2, hist_plot, MDS, MDS.ggplot,
+   dgelist, dgelist.filt, design, dgelist.norm, dgeglm.fit,
+   miRdeepstar.dgelrt.pre1w, miRdeepstar.de.pre1w, sig.1w, sig.2w, sig.6w,
+   sig.10w, sig.12w, miR.DE, DEtable.merge, DESeq.merge, diff_expr_edgeR,
+   loadpackage, sig_label, dgelist.disp, files, Dat, sample)
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+#############################################################################
+# Comparison of time overlapping DE genes between various analysis pipeline #
+#############################################################################
+
+# Create the Venn diagram of overlapping DE genes between time points
+venn.diagram(x = list("Novo-feature" = Novofeature.overlap,
+                      "miRdeep2" = miRdeep2.overlap,
+                      "miRdeep-star" = miRdeepstar.overlap),
+             filename = "Venn_3pipelines.tiff", na="remove")
+
+# Get all the gene expression information for the candidate biomarkers miRNA
+candidates <- merge(x = miRNA.info[(miRNA.info$gene_id %in% unique(c(
+  Novofeature.overlap, miRdeep2.overlap, miRdeepstar.overlap))),],
+  y = Novofeature.DE[, (ncol(Novofeature.DE)-24):ncol(Novofeature.DE)],
+  by.x = "gene_id", by.y = "row.names", all.x = TRUE)
+candidates <- merge(x = candidates, y = miRdeep2.DE[, (ncol(miRdeep2.DE)-24):ncol(
+  miRdeep2.DE)], by.x = "gene_id", by.y = "row.names", all.x = TRUE)
+candidates <- merge(x = candidates, y = miRdeepstar.DE[, (ncol(
+  miRdeepstar.DE)-24):ncol(miRdeepstar.DE)], by.x = "gene_id",
+  by.y = "row.names", all.x = TRUE)
+
+# Write into a table all the time overlapping DE genes
+write.matrix(x = candidates, file = "Candidate_biomarkers.txt", sep = "\t")
+
+# Remove temporary and unrequired variables
+rm(Novofeature.overlap, miRdeep2.overlap, miRdeepstar.overlap)
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+##########################################################
+# Identification of reference genes for RT-qPCR analysis #
+##########################################################
+
+# Calculate average CPM and Standard deviation for each gene
+mean <- as.matrix(apply(X = cpm(x = Novofeature.dgelist.disp), MARGIN = 1,
+                        FUN = function(x) mean(x)))
+std.dev <- as.matrix(apply(X = cpm(x = Novofeature.dgelist.disp), MARGIN = 1,
+                           FUN = function(x) sd(x)))
+
+# Merge results together
+gene.stable <- merge(x = mean, y = std.dev, by = "row.names", all = TRUE)
+gene.stable <- merge(x = miRNA.info[, 1:2], y = gene.stable, by.x = "gene_id",
+                     by.y = "Row.names")
+rownames(gene.stable) <- gene.stable[, "gene_id"]
+gene.stable <- gene.stable[, -1]
+colnames(gene.stable)[2:3] <- c("meanCPM", "SdevCPM")
+head(gene.stable)
+
+# Calculate percentage of Standard deviation in order to compare all genes
+gene.stable <- cbind(gene.stable, (
+  gene.stable[, "SdevCPM"]*100/gene.stable[, "meanCPM"]))
+colnames(gene.stable)[4] <- "Perc_Sdev"
+gene.stable <- gene.stable[order(gene.stable$Perc_Sdev),]
+
+# Plot the mean CPM and Standard deviation CPM of all genes
+ggplot(data = gene.stable, aes(x = log(x = gene.stable$meanCPM, base = 2),
+                               y = log(x = gene.stable$SdevCPM,
+                                       base = 2))) + geom_point(size = 4.5)
+
+# Write into a table the analysis for reference gene discovery
+write.matrix(x = gene.stable, file = "Potential_reference_gene.txt", sep = "\t")
+
+# Remove temporary and unrequired variables
+rm(mean, std.dev, gene.stable)
+
+#######
+# END #
+#######
\ No newline at end of file