generated from snakemake-workflows/dna-seq-gatk-variant-calling
-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.yaml
54 lines (50 loc) · 1.78 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
samples: samples.tsv
units: units.tsv
ref:
name: GRCh38.86
# Path to the reference genome, ideally as it is provided by the GATK bundle.
genome: data/ref/genome.chr21.fa
# Path to any database of known variants, ideally as it is provided by the GATK bundle.
known-variants: data/ref/dbsnp.vcf.gz
filtering:
# Set to true in order to apply machine learning based recalibration of
# quality scores instead of hard filtering.
vqsr: false
hard:
# hard filtering as outlined in GATK docs
# (https://gatkforums.broadinstitute.org/gatk/discussion/2806/howto-apply-hard-filters-to-a-call-set)
snvs:
"QD < 2.0 || FS > 60.0 || MQ < 40.0 || MQRankSum < -12.5 || ReadPosRankSum < -8.0"
indels:
"QD < 2.0 || FS > 200.0 || ReadPosRankSum < -20.0"
processing:
remove-duplicates: true
# Uncomment and point to a bed file with, e.g., captured regions if necessary,
# see https://gatkforums.broadinstitute.org/gatk/discussion/4133/when-should-i-use-l-to-pass-in-a-list-of-intervals.
# restrict-regions: captured_regions.bed
# If regions are restricted, uncomment this to enlarge them by the given value in order to include
# flanking areas.
# region-padding: 100
params:
gatk:
HaplotypeCaller: ""
BaseRecalibrator: ""
GenotypeGVCFs: ""
VariantRecalibrator: ""
picard:
MarkDuplicates: "REMOVE_DUPLICATES=true"
trimmomatic:
pe:
trimmer:
# See trimmomatic manual for adding additional options, e.g. for adapter trimming.
- "LEADING:3"
- "TRAILING:3"
- "SLIDINGWINDOW:4:15"
- "MINLEN:36"
se:
trimmer:
# See trimmomatic manual for adding additional options, e.g. for adapter trimming.
- "LEADING:3"
- "TRAILING:3"
- "SLIDINGWINDOW:4:15"
- "MINLEN:36"