-
Notifications
You must be signed in to change notification settings - Fork 0
/
peakAnno.R
40 lines (34 loc) · 1.23 KB
/
peakAnno.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
library(doParallel)
library(tidyverse)
library(ChIPseeker)
library(org.Hs.eg.db)
dir <- system("echo $SCRATCH/cobinding/beds/", intern = TRUE)
setwd(dir)
baseOutDir <- system("echo $SCRATCH/cobinding/annotated/", intern = TRUE)
factors <- list.files()
# exclude informational files
factors <- factors[!(factors %in% c("encodeKey.csv", "nodata.csv", "key.csv"))]
print("Starting annotation")
# 36 threads, ~70 GB memory
cl <- makeCluster(36, type = "PSOCK")
registerDoParallel(cl)
factors <- foreach(factor = factors, .packages = c("ChIPseeker", "org.Hs.eg.db")) %dopar% {
setwd(file.path(dir, factor))
beds <- list.files()
outDir <- file.path(baseOutDir, factor)
dir.create(outDir, showWarnings = FALSE, recursive = T)
# load GENCODE V29 database
annoFile <- system("echo $SCRATCH/data/gencode_v29.sqlite", intern = TRUE)
txdb <- loadDb(annoFile)
# annotate and output to new directory
for(bed in beds){
setwd(file.path(dir, factor))
peak <- ChIPseeker::readPeakFile(bed)
suppressWarnings(peakAnno <- ChIPseeker::annotatePeak(peak, TxDb = txdb, annoDb = "org.Hs.eg.db", verbose = FALSE))
peakAnno <- as.data.frame(peakAnno)
setwd(outDir)
write.csv(peakAnno, bed, row.names = F)
}
factor
}
stopCluster(cl)