forked from clinical-genomics-uppsala/Twist_DNA
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Twist_DNA.smk
89 lines (71 loc) · 4.28 KB
/
Twist_DNA.smk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# vim: syntax=python tabstop=4 expandtab
# coding: utf-8
import pandas as pd
configfile: "Twist_DNA.yaml"
samples = pd.read_table(config["samples"], index_col="sample")
wildcard_constraints:
unit="[A-Za-z0-9-]+",
sample="[^._]+",
chr="chr[0-9XYM]+",
sample_list = [s.Index for s in samples.itertuples()]
def get_input():
input_list = []
'''Demultiplexning'''
if "units" not in config:
input_list.append(["fastq/DNA/" + s + "_R1.fastq.gz" for s in sample_list])
input_list.append(["fastq/DNA/" + s + "_R2.fastq.gz" for s in sample_list])
'''Alignment'''
input_list.append(["Bam/DNA/" + s + "-ready.bam.bai" for s in sample_list])
'''Callers'''
input_list.append(["mutect2/" + s + ".mutect2.gvcf.gz" for s in sample_list])
for caller in config["callers"]["list"]:
input_list.append([caller + "/" + s + "." + caller + ".normalized.vcf.gz.tbi" for s in sample_list])
input_list.append(["vardict/" + s + ".vardict.normalized.vcf.gz.tbi" for s in sample_list])
input_list.append(["recall/" + s + ".ensemble.vcf.gz" for s in sample_list])
input_list.append(["recall/" + s + ".ensemble.vcf.gz.tbi" for s in sample_list])
input_list.append(["recall/" + s + ".ensemble.vep.vcf.gz" for s in sample_list])
input_list.append(["recall/" + s + ".ensemble.vep.vcf.gz.tbi" for s in sample_list])
#input_list.append(["Results/DNA/" + s + "/vcf/" + s + ".ensemble.vep.exon.soft_filter.vcf.gz" for s in sample_list])
input_list.append(["Results/DNA/" + s + "/vcf/" + s + ".ensemble.vep.exon.soft_filter.ffpe.vcf.gz" for s in sample_list])
input_list.append(["Results/DNA/" + s + "/vcf/" + s + ".ensemble.vep.exon.soft_filter.multibp.vcf" for s in sample_list])
'''Background'''
#input_list.append("DATA/background_panel.tsv")
input_list.append("DATA/background_run.tsv")
'''CNV'''
#input_list.append(["CNV_calls/" + sample_id + "-ready.cnr" for sample_id in sample_list])
#input_list.append(["CNV_calls/" + sample_id + "-ready.cns" for sample_id in sample_list])
input_list.append("Results/DNA/CNV/Reported_cnvs.txt")
input_list.append("Results/DNA/CNV/cnv_plots.txt")
input_list.append(["CNV/cnvkit_calls/" + s + "-LoH.cns" for s in sample_list])
#input_list.append("CNV_calls/cnv_event.txt")
#input_list.append(["Results/DNA/" + s + "/CNV/" + s + "-ready.png" for s in sample_list])
input_list.append(["Results/DNA/CNV/" + s + "_GATK_clean.calledCNVs.modeled.png" for s in sample_list])
'''MSI'''
#input_list.append(["Results/DNA/" + s + "/MSI/" + s + ".msi" for s in sample_list])
input_list.append(["Results/DNA/" + s + "/MSI/" + s for s in sample_list])
'''Fusion/SV'''
#input_list.append(["Results/DNA/" + s + "/Fusions/JuLI/" + s + ".filtered.annotated.txt" for s in sample_list])
input_list.append(["Results/DNA/" + s + "/Fusions/geneFuse/fusions_" + s + ".txt" for s in sample_list])
'''TMB'''
input_list.append(["Results/DNA/" + s + "/TMB/" + s + ".TMB.txt" for s in sample_list])
'''HRD'''
input_list.append(["Results/DNA/" + s + "/HRD/" + s + ".HRD_score.txt" for s in sample_list])
'''QC'''
input_list.append(["Results/DNA/" + s + "/QC/" + s + ".Low_coverage_positions.txt" for s in sample_list])
input_list.append(["Results/DNA/" + s + "/QC/" + s + ".All_coverage_positions.txt" for s in sample_list])
#input_list.append(["qc/" + s + "/" + s + "_Stat_table.csv" for s in sample_list])
input_list.append(["qc/" + s + "/" + s + "_R1_fastqc.html" for s in sample_list])
input_list.append(["qc/" + s + "/" + s + "_R1_fastqc.zip" for s in sample_list])
input_list.append(["qc/" + s + "/" + s + "_R2_fastqc.html" for s in sample_list])
input_list.append(["qc/" + s + "/" + s + "_R2_fastqc.zip" for s in sample_list])
input_list.append(["qc/" + s + "/" + s + ".samtools-stats.txt" for s in sample_list])
input_list.append(["qc/" + s + "/" + s + ".HsMetrics.txt" for s in sample_list])
input_list.append(["qc/" + s + "/" + s + "_stats_mqc.csv" for s in sample_list])
input_list.append("qc/batchQC_stats_mqc.json")
input_list.append("qc/batchQC_stats_unsorted.csv")
input_list.append("Results/DNA/MultiQC.html")
return input_list
rule all:
input:
get_input()
include: "src/Snakemake/workflow/Twist_DNA_workflow.smk"