-
Notifications
You must be signed in to change notification settings - Fork 0
/
fig4b.R
122 lines (96 loc) · 5.95 KB
/
fig4b.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
library(data.table)
library(ggplot2)
library(ggpubr)
results <- data.table()
for(cancer in c("BLCA","BRCA","HNSC","CESC","LUAD","LUSC")){
dataLoopC <- read.csv(paste0("/Users/mar/BIO/PROJECTS/APOBEC/NONBDNA/IRloopstem/irloop_",cancer,".txt"), sep='\t')
dataLoopC <- data.table(dataLoopC)
dataNotLoopC <- read.csv(paste0("/Users/mar/BIO/PROJECTS/APOBEC/NONBDNA/IRloopstem/irstem_",cancer,".txt"), sep='\t')
dataNotLoopC <- data.table(dataNotLoopC)
dataBDNA <- read.csv("/Users/mar/BIO/PROJECTS/APOBEC/NONBDNA/Denek3/united_X0.txt", sep='\t')
dataBDNA <- data.table(dataBDNA)
setnames(dataBDNA,c("Cancer","structure","isAPOBEC","sample","trgIn","cntIn","trgOut","cntOut","trgOutOld","cntOutOld","sign"))
dataBDNA <- dataBDNA[Cancer == cancer & structure == "ir" & isAPOBEC == 1]
activity <- read.csv("/Users/mar/BIO/PROJECTS/PCAWG_APOBEC/PCAWG_enrichment_6cancers.txt",sep='\t',header=FALSE,strip.white =TRUE)
activity <- data.table(activity)
setnames(activity,c("project","sample","enrichment"))
resultsNT <- data.table()
samples <- unique(dataLoopC$sample)
for(s in samples){
dt1 <- dataLoopC[sample == s]
dt2 <- dataNotLoopC[sample == s]
dt3 <- dataBDNA[sample == s]
enrich <- activity[sample == s, enrichment]
totalTrgs1 <- dt1[pC == "T",sum(Cnt)]
totalMuts1 <- dt1[pC == "T" & mut %in% c("G","T"),sum(Cnt)]
totalTrgAPBloop <- dt2[LOOP==1 & pC == "T" & nuc == "C", sum(Cnt)]
totalMutsAPBloop <- dt2[LOOP==1 & pC == "T" & nuc == "C" & mut %in% c("G","T"), sum(Cnt)]
totalTrgAPBstem <- dt2[LOOP==0 & pC == "T" & nuc == "C", sum(Cnt)]
totalMutsAPBstem <- dt2[LOOP==0 & pC == "T" & nuc == "C" & mut %in% c("G","T"), sum(Cnt)]
totalTrgBDNA <- dt3$trgOut
totalMutsBDNA <- dt3$cntOut
loopCdensity <- totalMuts1 / totalTrgs1
loopAPBdensity <- totalMutsAPBloop / totalTrgAPBloop
stemAPBdensity <- totalMutsAPBstem / totalTrgAPBstem
DBNAdensity <- totalMutsBDNA / totalTrgBDNA
results <- rbind(results, data.table("cancer"=cancer, "sample"=s, "type"="1LoopC", "density"=log(loopCdensity/DBNAdensity), "enrich"=enrich))
results <- rbind(results, data.table("cancer"=cancer, "sample"=s, "type"="2loopAPB", "density"=log(loopAPBdensity/DBNAdensity), "enrich"=enrich))
results <- rbind(results, data.table("cancer"=cancer, "sample"=s, "type"="3stemAPB", "density"=log(stemAPBdensity/DBNAdensity), "enrich"=enrich))
resultsNT <- rbind(resultsNT, data.table("sample"=s, "enrich"=enrich, "type"="1LoopC", "cnt"=totalMuts1))
resultsNT <- rbind(resultsNT, data.table("sample"=s, "enrich"=enrich, "type"="2loopAPB", "cnt"=totalMutsAPBloop))
resultsNT <- rbind(resultsNT, data.table("sample"=s, "enrich"=enrich, "type"="3stemAPB", "cnt"=totalMutsAPBstem))
}
resultsNT[, sampleEnrich := paste0(sample,'__',round(enrich,2))]
sampleLevels <- unique(resultsNT[order(enrich),sampleEnrich])
resultsNT$sampleEnrich <- factor(resultsNT$sampleEnrich,levels=sampleLevels)
ggplot(resultsNT, aes(x=sampleEnrich,y=cnt,fill=type,labels=cnt)) + geom_bar(position="fill", stat="identity") +
geom_text(aes(label = resultsNT$cnt),size = 3, position = position_fill(vjust = .5)) +
theme(panel.background = element_blank(),
axis.text.x = element_text(angle = 90))
ggsave(paste0("/Users/mar/BIO/PROJECTS/APOBEC/NONBDNA/IRloopstem/pics/ir_samples_",cancer,".tiff"),units="mm",width=300,height=150,dpi=300)
}
results <- results[!is.na(enrich)]
resultsHigh <- results[enrich >= 2.0]
resultsLow <- results[enrich < 2.0]
ggplot(resultsHigh, aes(x=cancer,y=density,fill=type,color=type)) +
# ggplot(dtm, aes(x=cancer,y=value,fill=isAPOBEC,color=isAPOBEC)) +
geom_boxplot(position=position_dodge(1)) +
scale_fill_manual(name= "Clarity", values = c(rgb(253,190,147,maxColorValue = 255),
rgb(233,148,151,maxColorValue = 255),
rgb(146,187,216,maxColorValue = 255))) +
scale_color_manual(name = "Clarity", values = c(rgb(253,127,40,maxColorValue = 255),
rgb(212,42,47,maxColorValue = 255),
rgb(38,120,178,maxColorValue = 255))) +
geom_hline(yintercept = 0) +
theme(panel.background = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x=element_blank(),
axis.line.y = element_line(color="black"),
panel.grid.major.y = element_line(size = rel(0.5), colour='grey92'),
legend.position = "none"
) +
geom_vline(xintercept=c(0.5,1.5,2.5,3.5,4.5,5.5),color="grey92")
ggsave(paste0("/Users/mar/BIO/PROJECTS/APOBEC/NONBDNA/IRloopstem/pics/ir_high.tiff"),units="mm",dpi=300,width=125,height=80)
ggplot(resultsLow, aes(x=cancer,y=density,fill=type,color=type)) +
# ggplot(dtm, aes(x=cancer,y=value,fill=isAPOBEC,color=isAPOBEC)) +
geom_boxplot(position=position_dodge(1)) +
scale_fill_manual(name= "Clarity", values = c(rgb(253,190,147,maxColorValue = 255),
rgb(233,148,151,maxColorValue = 255),
rgb(146,187,216,maxColorValue = 255))) +
scale_color_manual(name = "Clarity", values = c(rgb(253,127,40,maxColorValue = 255),
rgb(212,42,47,maxColorValue = 255),
rgb(38,120,178,maxColorValue = 255))) +
geom_hline(yintercept = 0) +
theme(panel.background = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x=element_blank(),
axis.line.y = element_line(color="black"),
panel.grid.major.y = element_line(size = rel(0.5), colour='grey92'),
legend.position = "none"
) +
geom_vline(xintercept=c(0.5,1.5,2.5,3.5,4.5,5.5),color="grey92")
ggsave(paste0("/Users/mar/BIO/PROJECTS/APOBEC/NONBDNA/IRloopstem/pics/ir_low.tiff"),units="mm",dpi=300,width=125,height=80)