-
Notifications
You must be signed in to change notification settings - Fork 0
/
s4.tallele_joint.smk
97 lines (74 loc) · 2.34 KB
/
s4.tallele_joint.smk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import pandas as pd
METADATA = pd.read_table(config["METADATA"])
METADATA = METADATA[METADATA["Platform"] == "S2"]
METADATA = METADATA[METADATA["Tissue"] == "dorsolateral_prefrontal_cortex"]
METADATA = METADATA.reset_index(drop=True)
SampleGroups = METADATA.groupby("Condition").groups
print(METADATA, SampleGroups)
workdir: config['workdir']
def getSamplesInGroup(var_group):
return [METADATA.iloc[i]["SM"] for i in SampleGroups[var_group]]
rule all:
input:
expand("TAllele_new/{var_group}.merged.mi_summary.tab", var_group = list(SampleGroups))
rule combine_gvcfs:
params:
sge_opts = "-cwd -V -l h_data=16G,h_rt=2:00:00 \
-N combine_gvcfs.{var_group} \
-o log/combine_gvcfs.{var_group}.out \
-e log/combine_gvcfs.{var_group}.err",
TAlleleDir = config["TAlleleDir"]
input:
ref = config["REF"],
vcf = lambda wildcards: \
[f"TAllele_new/{sm}.vcf" for sm in getSamplesInGroup(wildcards.var_group)]
output:
fofn = "TAllele_new/{var_group}.gvcfs.list",
vcf = "TAllele_new/{var_group}.merged.gvcf.gz",
gvcf = "TAllele_new/{var_group}.merged.genotyped.gvcf.gz"
shell:
"""
echo -n > {output.fofn}
for vcf in {input.vcf}
do
new_vcf=$(echo $vcf | sed 's/.vcf/.gvcf/g')
bash {params.TAlleleDir}/convert_vcf.sh $vcf $new_vcf
echo $new_vcf >> {output.fofn}
done
cat {output.fofn}
gatk --java-options "-Xmx4g" CombineGVCFs \
--variant {output.fofn} \
-R {input.ref} \
-O {output.vcf}
gatk --java-options "-Xmx4g" GenotypeGVCFs \
-R {input.ref} \
-V {output.vcf} \
-O {output.gvcf}
tabix -p vcf -f {output.gvcf}
"""
rule tallele_joint:
params:
sge_opts = "-cwd -V -l h_data=14G,h_rt=1:00:00 \
-N tallele_joint.{var_group} \
-o log/tallele_joint.{var_group}.out \
-e log/tallele_joint.{var_group}.err",
sm = lambda wildcards: getSamplesInGroup(wildcards.var_group),
TAlleleDir = config["TAlleleDir"]
input:
vcf = "TAllele_new/{var_group}.merged.genotyped.gvcf.gz",
output:
fofn = "TAllele_new/{var_group}.mi.fofn",
mi = "TAllele_new/{var_group}.merged.mi_summary.tab"
shell:
"""
echo -n > {output.fofn}
for sm in {params.sm}
do
echo bam/$sm.annotated.sorted.bam TAllele_new/$sm.mi_summary.tab >> {output.fofn}
done
cat {output.fofn}
python {params.TAlleleDir}/gqv_merge_outputs.py \
-g {input.vcf} \
-f {output.fofn} \
-o {output.mi}
"""