-
Notifications
You must be signed in to change notification settings - Fork 2
/
audic.R
98 lines (81 loc) · 2.62 KB
/
audic.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#This is differential expression analysis for RNA sequencing data with no replicates using Audic and Claverie method 1997
# Input file
#The input file should comtain Genenames followed by counts and length columns separated by <TAB> space.
#Note length column should be at the end.
# To run from command line
# use R CMD BATCH '--args ctrl_column_no tmt_column_no input_file outfolder_name Length_column_no audic.R audic.Rout
require(stringr)
# Total number of reads
rm(list = ls())
# Read feature count
args = commandArgs()
ix = grep("--args", args, fixed = TRUE)
if (ix < length(args))
{
for (i in (ix+1):length(args))
{
print(paste("argument #",i-ix,"-->",args[i], sep=" "), sep = "\n")
}
#Parameters passed
ctl<-args[8]
ctl
tmt <- args[9]
tmt
infile <- args[10]
infile
outfolder <- args[11]
outfolder
lenth <- args[12]
}
library(edgeR)
infile
ctl
tmt
data <- read.table(infile, header=TRUE,sep="\t")
outfolder
ctcol<-strtoi(ctl)
tmcol <- strtoi(tmt)
lencol <- strtoi(lenth)
f1<-names(data)[ctcol]
f2<-names(data)[tmcol]
len <- names(data)[lencol]
ofile <- paste(outfolder,str_trim(f1),"_",str_trim(f2),sep="")
newdata <- data[,c(f1,f2,len)]
row.names(newdata) <- data$Geneid
N1 <- sum(newdata[[f1]])
N2 <- sum(newdata[[f2]])
x <- newdata[[f1]]
y <- newdata[[f2]]
newdata$csh <- newdata[[f1]]
newdata$tmt <- newdata[[f2]]
# Calculte RPKM
colno <- ncol(data)
rownames(data) <- data[,1]
RPKM <- NULL
for(i in 2:ncol(data)-1)
{
d <-data[,i]
l <- data[[len]] #Accessing the length column
cS <- sum(as.numeric(data[,i])) #Total mapped reads per sample
RPKM[[i]] <- (10^9)*(as.numeric(data[,i]))/(as.numeric(l)*cS)
RPKM[[1]] <- data[[1]]
}
RPKM <- as.data.frame(RPKM)
colnames(RPKM) <- colnames(data[ 2:ncol(data)-1)
#------------------------------------Precalculations--------------------------------------------------------------------
d <- x+y
a <- factorial(x)*factorial(y)
b <- 1+(N2/N1)
c <- (x+y+1)
A <- (N2/N1)^y
C <- a*b^c
B <- factorial(x+y)
#----------------------------------------Audic----------------------------------------------------------------------------------
Pvalue <- exp(lchoose(x+y,x)-(x+y+1)*log(2))
FDR <- p.adjust(Pvalue,method="BH")
#---------------------FC----calculation-------------------------------------------------------------------------------------------
log2FC <- log2(RPKM[[f2]]/RPKM[[f1]])
#----------------------------------------Master file----------------------------------------------------------------------------------
acr <-data.frame(data$Geneid,log2FC,Pvalue,FDR)
file1 <- paste(ofile,"Master.txt",sep="")
write.table(acr,file1,sep="\t",row.names=F,quote=F)