-
Notifications
You must be signed in to change notification settings - Fork 5
/
code.sh
70 lines (56 loc) · 3.31 KB
/
code.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#Downloading the ref.
wget ftp://ftp.ensembl.org/pub/release-96/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz
#Building ref. index
bowtie2-build ~/BaseRecalibration_Benchmarking/Homo_sapiens.GRCh38.dna.chromosome.21.fa index_two_bowtie2/Homo_sapiens.fa
R1="$HOME/BaseRecalibration_Benchmarking/SRR8115017.fastq.gz”
RGID=$(cat $R1 | head -n1 | sed 's/:/_/g' |cut -d "." -f1)
PU=$RGID.$LB
LB="SRR8115017_same"
PL="Illumina"
#Alignment step:
bowtie2 -p 20 -q --no-unal -x index_two_bowtie2/Homo_sapiens.fa -U SRR8115017.fastq.gz --rg-id $RGID --rg SM:$SM --rg PL:$PL --rg LB:$LB --rg PU:$PU 2> align_stats.txt| samtools view -Sb -o bowtie2.bam
#Sorting:
samtools sort bowtie2.bam -o SRR8115017.sorted.bam
#Mark-duplicates:
picard_path=$CONDA_PREFIX/share/picard-2.19.0-0
java -Xmx2g -jar $picard_path/picard.jar MarkDuplicates INPUT=SRR8115017.sorted.bam OUTPUT=SRR8115017.dedup.bam METRICS_FILE=SRR8115017.metrics.txt
#Indexing
java -Xmx2g -jar $picard_path/picard.jar BuildBamIndex VALIDATION_STRINGENCY=LENIENT INPUT=SRR8115017.dedup.bam
java -Xmx2g -jar $picard_path/picard.jar CreateSequenceDictionary R=Homo_sapiens.GRCh38.dna.chromosome.21.fa O=Homo_sapiens.GRCh38.dna.chromosome.21.dict
samtools faidx Homo_sapiens.GRCh38.dna.chromosome.21.fa
#Downloading known variants:
wget ftp://ftp.ensembl.org/pub/release-96/variation/vcf/homo_sapiens/homo_sapiens-chr21.vcf.gz
gunzip homo_sapiens-chr21.vcf.gz
#Indexing:
gatk IndexFeatureFile -F Homo_sapiens_chr21.vcf
head -1291601 Homo_sapiens_chr21.vcf | tail -1 #Then remove this record
#Base recalibration:
gatk --java-options "-Xmx2G" BaseRecalibrator -R Homo_sapiens.GRCh38.dna.chromosome.21.fa -I SRR8115017.dedup.bam --known-sites Homo_sapiens_chr21.vcf -O SRR8115017.report
gatk --java-options "-Xmx2G" ApplyBQSR -R Homo_sapiens.GRCh38.dna.chromosome.21.fa -I SRR8115017.dedup.bam -bqsr SRR8115017.report -O SRR8115017.bqsr.bam --add-output-sam-program-record --emit-original-quals
#Without base rec.
gatk --java-options "-Xmx2G" HaplotypeCaller -R Homo_sapiens.GRCh38.dna.chromosome.21.fa -I SRR8115017.dedup.bam --emit-ref-confidence GVCF --pcr-indel-model NONE -O SRR8115017.gvcf
#With base rec.
gatk --java-options "-Xmx2G" HaplotypeCaller -R Homo_sapiens.GRCh38.dna.chromosome.21.fa -I SRR8115017.bqsr.bam --emit-ref-confidence GVCF --pcr-indel-model NONE -O SRR8115017.bqsr.gvcf
# VCF statistics for un-recaliberated samples
i- Index the VCF file using the tabix tool
conda install -c bioconda tabix
bgzip -c SRR8115017.gvcf > SRR8115017.gvcf.gz
tabix -p vcf SRR8115017.gvcf.gz
ii- Calculate some statistics about the gvcf file
conda install -c bioconda rtg-tools
rtg vcfstats SRR8115017.gvcf.gz > stats.txt
# VCF statistics for recaliberated samples
i- Index the VCF file using the tabix tool
conda install -c bioconda tabix
bgzip -c SRR8115017.bqsr.gvcf > SRR8115017.bqsr.gvcf.gz
tabix -p vcf SRR8115017.bqsr.gvcf.gz
ii- Calculate some statistics about the gvcf file
conda install -c bioconda rtg-tools
rtg vcfstats SRR8115017.bqsr.gvcf.gz > recal_stats.txtVCF statistics for un-recaliberated samples
i- Index the VCF file using the tabix tool
conda install -c bioconda tabix
bgzip -c SRR8115017.gvcf > SRR8115017.gvcf.gz
tabix -p vcf SRR8115017.gvcf.gz
ii- Calculate some statistics about the gvcf file
conda install -c bioconda rtg-tools
rtg vcfstats SRR8115017.gvcf.gz > recal_stats.txt