-
Notifications
You must be signed in to change notification settings - Fork 0
/
Snakefile
101 lines (94 loc) · 3.05 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
configfile: 'config.yaml'
IDS = ['WT', 'LMA', 'LME', 'LMH', 'dHsdS']
rule all:
input:
expand("unicycler_out/{sample}", sample=IDS),
expand("pbmm2_out/{sample}_pbmm2.bam", sample=IDS),
expand("ipdsummary_out/{sample}_modifications.gff", sample=IDS),
expand("multimotifmaker_out/{sample}_motifs.csv", sample=IDS),
#rule flye:
# input:
# nanopore = "nanopore_fastq/{sample}.fastq.gz",
# pacbio = "pacbio_fa/{sample}.fa"
# output:
# directory("flye_out/{sample}")
# conda:
# "envs/flye.yaml"
# log:
# "logs/flye/{sample}.log"
# params:
# genome_size = config['flye']['genome_size']
# threads: 16
# shell:
# """
# flye -o {output} --pacbio-raw {input.pacbio} --nano-raw {input.nanopore} --genome-size {params.genome_size} --threads {threads}
# """
rule unicycler:
input:
nanopore = "nanopore_fastq/{sample}.fastq.gz",
pacbio = "pacbio_fa/{sample}.fa",
fw = "illumina_fastq/{sample}_R1.fastq.gz",
rv = "illumina_fastq/{sample}_R2.fastq.gz",
startgenes = "startgenes.fasta"
output:
directory("unicycler_out/{sample}")
conda:
"envs/unicycler.yaml"
log:
"logs/unicycler/{sample}.log"
params:
general = config['unicycler']['general']
threads: 16
shell:
"""
unicycler --long {input.pacbio} --long {input.nanopore} -1 {input.fw} -2 {input.rv} -o {output} --threads {threads} --start_genes {input.startgenes} {params.general} 2>&1>{log}
"""
rule pbmm2:
input:
unicycler_dir = "unicycler_out/{sample}",
pacbio = "pacbio_bam/{sample}.bam"
output:
"pbmm2_out/{sample}_pbmm2.bam"
conda:
"envs/pbmm2.yaml"
log:
"logs/pbmm2/{sample}.log"
params:
general = config['pbmm2']['general']
threads: 16
shell:
"""
pbmm2 align {params.general} -j {threads} {input.unicycler_dir}/assembly.fasta {input.pacbio} {output} 2>&1>{log}
"""
rule ipdsummary:
input:
bam = "pbmm2_out/{sample}_pbmm2.bam",
unicycler_dir = "unicycler_out/{sample}"
output:
gff = "ipdsummary_out/{sample}_modifications.gff",
csv = "ipdsummary_out/{sample}_kinetics.csv"
log:
"logs/ipdsummary/{sample}.log"
params:
general = config['ipdsummary']['general'],
mods = config['ipdsummary']['mods']
threads: 16
shell:
"""
samtools faidx {input.unicycler_dir}/assembly.fasta
smrtlink/install/smrtlink-release_10.1.0.119588/bundles/smrttools/install/smrttools-release_10.1.0.119588/private/pacbio/python3pkgs/kineticstools-py3/binwrap/ipdSummary \
--reference {input.unicycler_dir}/assembly.fasta --identify {params.mods} --gff {output.gff} --csv {output.csv} -j {threads} {params.general} {input.bam} 2>&1>{log}
"""
rule multimotifmaker:
input:
unicycler_dir = "unicycler_out/{sample}",
gff = "ipdsummary_out/{sample}_modifications.gff",
output:
"multimotifmaker_out/{sample}_motifs.csv"
log:
"logs/multimotifmaker/{sample}.log"
threads: 16
shell:
"""
java -jar ./tools/MultiMotifMaker.jar find --fasta {input.unicycler_dir}/assembly.fasta --gff {input.gff} --output {output} --layer 1 --thread {threads} 2>&1>{log}
"""