-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSnakefile
executable file
·84 lines (67 loc) · 3.28 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import os
import sys
import pandas as pd
from scripts import geneviking as gv
configfile: 'config.yaml'
# read dataframe
df = pd.read_table(config['input_file']).set_index("acc",
drop=False)
df['files'] = df['acc'] + '_' + df['start'].astype(str) + '-' + df['end'].astype(str)
# hmm models unpack
hmms = config['hmm_models']
download_fmt = 'fasta'
# pipeline
rule all:
input:
expand('{direc}/viking/gb/{sample}.gbk', sample=df['files'], direc=config['output_dir'])
rule download:
# define wildcards
output: '{direc}/fasta/{acc}_{start}-{end}.fasta'
run:
# make query object
query = gv.NCBIQuery(wildcards.acc,
int(wildcards.start),
int(wildcards.end),
config['params']['neighborhood'])
if download_fmt == 'fasta':
query.download_fasta(output=f'{wildcards.direc}/fasta/{wildcards.acc}_{wildcards.start}-{wildcards.end}.fasta')
else:
print('Unrecognized format. Please indicate <fasta>.')
rule prokka:
output:
'{direc}/prokka/{acc}_{start}-{end}/{acc}_{start}-{end}.gbk',
'{direc}/prokka/{acc}_{start}-{end}/{acc}_{start}-{end}.faa'
input: '{direc}/fasta/{acc}_{start}-{end}.fasta'
shell:
'prokka '
'--outdir {wildcards.direc}/prokka/{wildcards.acc}_{wildcards.start}-{wildcards.end} '
'--force '
'--quiet '
'--prefix {wildcards.acc}_{wildcards.start}-{wildcards.end} '
'--compliant '
'--kingdom Bacteria '
'{wildcards.direc}/fasta/{wildcards.acc}_{wildcards.start}-{wildcards.end}.fasta'
rule hmmer:
output:
'{direc}/hmmscan/{acc}_{start}-{end}/{acc}_{start}-{end}.out',
'{direc}/hmmscan/{acc}_{start}-{end}/{acc}_{start}-{end}.tbl'
input:
'{direc}/prokka/{acc}_{start}-{end}/{acc}_{start}-{end}.faa'
shell:
'hmmscan '
'-o {wildcards.direc}/hmmscan/{wildcards.acc}_{wildcards.start}-{wildcards.end}/{wildcards.acc}_{wildcards.start}-{wildcards.end}.out '
'--domtblout {wildcards.direc}/hmmscan/{wildcards.acc}_{wildcards.start}-{wildcards.end}/{wildcards.acc}_{wildcards.start}-{wildcards.end}.tbl '
'{hmms} '
'{wildcards.direc}/prokka/{wildcards.acc}_{wildcards.start}-{wildcards.end}/{wildcards.acc}_{wildcards.start}-{wildcards.end}.faa'
rule replace_annotations:
output:
'{direc}/viking/gb/{acc}_{start}-{end}.gbk',
'{direc}/viking/tables/{acc}_{start}-{end}.tsv'
input:
'{direc}/prokka/{acc}_{start}-{end}/{acc}_{start}-{end}.gbk',
'{direc}/hmmscan/{acc}_{start}-{end}/{acc}_{start}-{end}.tbl'
run:
gv.update_gb(f'{wildcards.direc}/hmmscan/{wildcards.acc}_{wildcards.start}-{wildcards.end}/{wildcards.acc}_{wildcards.start}-{wildcards.end}.tbl',
f'{wildcards.direc}/prokka/{wildcards.acc}_{wildcards.start}-{wildcards.end}/{wildcards.acc}_{wildcards.start}-{wildcards.end}.gbk',
save_gb= f'{wildcards.direc}/viking/gb/{wildcards.acc}_{wildcards.start}-{wildcards.end}.gbk',
save_table= f'{wildcards.direc}/viking/tables/{wildcards.acc}_{wildcards.start}-{wildcards.end}.tsv')