-
Notifications
You must be signed in to change notification settings - Fork 12
/
dbscan-swa.smk
114 lines (97 loc) · 3.25 KB
/
dbscan-swa.smk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
"""
DBSCAN-SWA
Manuscript: https://www.biorxiv.org/content/10.1101/2020.07.12.199018v1
Software: https://github.com/HIT-ImmunologyLab/DBSCAN-SWA
"""
import os
import sys
# CONFIG
outDirName = 'dbscan-swa'
dbsBuild = os.path.join(workflow.basedir, "../build/")
dbsHome = os.path.join(dbsBuild, 'DBSCAN')
dbsRun = os.path.join(dbsHome, 'bin/dbscan-swa.py')
dlUrl = 'https://github.com/beardymcjohnface/DBSCAN-SWA-1.git'
# GENERIC CONFIG/RECIPES
include: os.path.join(workflow.basedir, "../rules/preflight.smk")
# TARGETS
rule all:
input:
expand(os.path.join(outputdir, "{genome}_dbscan-swa_tptn.tsv"), genome=GENOMES)
rule build_dbscan_swa:
"""
clone git, make executable, run test to do the first-time database installation, delete test
"""
output:
dbsRun
conda:
"../conda_environments/dbscan-swa.yaml"
shell:
"""
cd {dbsBuild}
git clone {dlUrl}
cd DBSCAN-SWA-1/
cd {dbsBuild}
rm -rf DBSCAN/ && mkdir DBSCAN
mv DBSCAN-SWA-1/* DBSCAN/
rm -rf DBSCAN-SWA-1/
cd DBSCAN/
wget -O db.tar.gz https://cloudstor.aarnet.edu.au/plus/s/2nfXwDm9YuWNy5C/download
tar xvf db.tar.gz
cd {dbsHome}
chmod u+x -R bin/
chmod u+x -R software/
cd {dbsHome}/test
# sed 's/num_threads = 20/num_threads = thread_num/' {dbsRun} \
# | sed 's/thread_num <- 10//' \
# | sed "s/.*0 prophage region was detected in the query bacterial genome.*/\\t\\topen(save_prophage_summary_file,'w').close()\\n\\t\\tsys.exit(0)/" \
# > tmp.py && mv tmp.py {dbsRun}
python {dbsRun} --input NC_007054.fasta --output yeet --thread_num 1
rm -r yeet
"""
rule run_dbscan_swa:
input:
fa = os.path.join(outputdir,"{genome}.fna"),
req = dbsRun
output:
os.path.join(outputdir, '{genome}/bac_DBSCAN-SWA_prophage_summary.txt')
conda:
"../conda_environments/dbscan-swa.yaml"
params:
os.path.join(outputdir, '{genome}')
benchmark:
os.path.join(outputdir, "benchmarks", "{genome}_dbscan-swa.txt")
log:
os.path.join(outputdir, '{genome}/dbscan.log')
resources:
mem_mb = 8000
shell:
"python {dbsRun} --input {input.fa} --output {params} --thread_num 1 &> {log}"
rule dbscan_swa_2_tbl:
input:
os.path.join(outputdir,'{genome}/bac_DBSCAN-SWA_prophage_summary.txt')
output:
os.path.join(outputdir,"{genome}","locs.tsv")
run:
infh = open(input[0],'r')
outfh = open(output[0], 'w')
line = infh.readline()
for line in infh:
l = line.split('\t')
id = l[1].split()
outfh.write(f'{id[0]}\t{l[3]}\t{l[4]}\n')
outfh.close()
rule count_tp_tn:
input:
gen = os.path.join(test_genomes, "{genome}.gb.gz"),
tbl = os.path.join(outputdir, "{genome}", "locs.tsv")
output:
tp = os.path.join(outputdir, "{genome}_dbscan-swa_tptn.tsv")
params:
os.path.join(workflow.basedir,'../')
conda:
"../conda_environments/roblib.yaml"
shell:
"""
export PYTHONPATH={params};
python3 {scripts}/compare_predictions_to_phages.py -t {input.gen} -r {input.tbl} > {output.tp}
"""