Skip to content

Commit

Permalink
both pipelines are nearly complete
Browse files Browse the repository at this point in the history
  • Loading branch information
ahalfpen727 committed Oct 21, 2020
1 parent 6dceaf7 commit 9f05af0
Show file tree
Hide file tree
Showing 18 changed files with 190 additions and 323 deletions.

This file was deleted.

31 changes: 0 additions & 31 deletions Genome-Reference-File-Retreival/CreateMaptsv.sh

This file was deleted.

This file was deleted.

4 changes: 2 additions & 2 deletions MiSeqScripts/1.MiSeq-index-and-bwa-mem.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ for pfx in 2019_09 2019_12; do
datadir=${datadir[$pfx]}
cd $miseqdir
export WORK_DIR=./$datadir
mkdir -p ./MiSeq_Results_out
mkdir -p ./MiSeq_Results_out/1.RAW_BAMS
touch ./sm.file.txt
touch ./sm.txt
export INPUT_FILE=./sm.file.txt
export INPUTFILE=./sm.txt
export RESULTS=MiSeq_Results_out
export RESULTS=/MiSeq_Results_out/1.RAW_BAMS
find -iname "*_R1_*.fastq.gz" > $INPUT_FILE
for i in $(cat $INPUT_FILE); do
basename -s "R1_001.fastq.gz" $i >> $INPUTFILE
Expand Down
9 changes: 6 additions & 3 deletions MiSeqScripts/2.MiSeq-remove_dups.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,16 @@ for pfx in 2019_09 2019_12; do
export WORKDIR=$WORK_DIR/$miseqdir
export INPUT_FILE=$WORKDIR/sm.txt
cd $WORKDIR
mkdir -p $datadir/2.TMP_DUP_BAMS
export INPUT=$datadir/1.RAW_BAMS
export OUTPUT=$datadir/2.TMP_DUP_BAMS
sm_arr=( $(cat $INPUT_FILE) )
n=${#sm_arr[@]}
for i in $(seq 1 $n); do
sm_arr=( $(cat $INPUT_FILE) ); \
sm=${sm_arr[(($i-1))]}; \
java -jar $HOME/toolbin/picard.jar MarkDuplicates I=$datadir/$sm.raw.bam \
O=$datadir/$sm.tmp.bam M=$datadir/$sm.dups_metrics.txt && \
samtools index $datadir/$sm.tmp.bam
java -jar $HOME/toolbin/picard.jar MarkDuplicates I=$INPUT/$sm.raw.bam \
O=$OUTPUT/$sm.tmp.bam M=$OUTPUT/$sm.dups_metrics.txt && \
samtools index $OUTPUT/$sm.tmp.bam
done
done
22 changes: 10 additions & 12 deletions MiSeqScripts/3.MiSeq-recalibrate_basepairs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,29 +12,27 @@ export REFFA=$IDXDIR/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna
export VCF1000G=$REFDIR/GRCh38/1000G_phase1.snps.high_confidence.hg38.vcf.gz
export TBI1000G=$REFDIR/GRCh38/1000G_phase1.snps.high_confidence.hg38.vcf.gz.tbi

for file in $IDXDIR; do
ln -s $file
done

declare -A miseqdir=( ["2019_09"]="2019_09" ["2019_12"]="2019_12" )
declare -A datadir=( ["2019_09"]="MiSeq_Results_out" ["2019_12"]="MiSeq_Results_out" )

for pfx in 2019_09 2019_12; do
miseqdir=${miseqdir[$pfx]}
datadir=${datadir[$pfx]}
export WORK_DIR=$WORKDIR/$miseqdir
export INPUT_FILE=$WORK_DIR/sm.txt
cd $WORK_DIR
cd $miseqdir
mkdir -p $datadir/3.GRP_BAMs
export OUTDIR=$datadir/3.GRP_BAMs
export INPUT_FILE=sm.txt
export INPUTDIR=$datadir/2.TMP_DUP_BAMs
sm_arr=( $(cat $INPUT_FILE) )
n=${#sm_arr[@]}
for i in $(seq 1 $n); do
sm_arr=( $(cat $INPUT_FILE) );
sm=${sm_arr[(($i-1))]};
$GATK BaseRecalibrator -R $REFFA -I $datadir/$sm.tmp.bam --known-sites $VCF1000G \
-O $datadir/$sm.grp && \
$GATK ApplyBQSR -R $REFFA -I $datadir/$sm.tmp.bam \
--bqsr-recal-file $datadir/$sm.grp -O $datadir/$sm.bam && \
samtools index $datadir/$sm.bam
$GATK BaseRecalibrator -R $REFFA -I $INPUTDIR/$sm.tmp.bam --known-sites $VCF1000G \
-O $OUTDIR/$sm.grp && \
$GATK ApplyBQSR -R $REFFA -I $INPUTDIR/$sm.tmp.bam \
--bqsr-recal-file $OUTDIR/$sm.grp -O $OUTDIR/$sm.bam && \
samtools index $OUTDIR/$sm.bam
done
done

56 changes: 34 additions & 22 deletions MiSeqScripts/4.MiSeq-coverage.sh
Original file line number Diff line number Diff line change
@@ -1,35 +1,47 @@
#!bin/bash

###########################################################################
# MISEQ DATA
# PILOT DATA
# Export Env Variables
###########################################################################

export GATK=$HOME/toolbin/gatk-4.1.8.1/gatk
export REFDIR=/media/drew/easystore/ReferenceGenomes/
export BEDDIR=/media/drew/easystore/ReferenceGenomes/BEDs
export BEDDIR=$REFDIR/BEDs
export BEDFILE=$BEDDIR/3215481_Covered.bed
export IDXDIR=$REFDIR/GRCh38/GCA_000001405.15_GRCh38_no_alt_analysis_set
export REFFILE=$IDXDIR/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna
export REFFAI=$IDXDIR/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.fai

for file in $IDXDIR; do
ln -s $file
done

#cd ~/Downloads/GoodCell-Resources/GuniosAnalysis/2019_09/LVB_fastq_Sept2019_concat_fastq
#find -iname "*_R1_0012.fastq.gz" | cut -d/ -f2 | cut -d_ -f1 >> ../sm.txt

export IDXDIR=$REFDIR/GCA_000001405.15_GRCh38_no_alt_analysis_set
export REFFA=$IDXDIR/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna
export REFFAI=$IDXDIR/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.fai
export WORKDIR=/media/drew/easystore/GoodCell-Resources/AnalysisBaseDir/MiSeq_Data
###########################################################################
## COMPUTE COVERAGE OVER TARGETS ##
###########################################################################

sm_arr=( $(cat sm.txt) )
n=${#sm_arr[@]}
for sm in ${sm_arr[@]}; do
bedtools coverage -g $REFFAI -sorted -a $BEDFILE -b $sm.bam -mean | \
cut -f5 > $sm.cov
declare -A miseqdir=( ["2019_09"]="2019_09" ["2019_12"]="2019_12" )
declare -A datadir=( ["2019_09"]="MiSeq_Results_out/3.GRP_BAMs" ["2019_12"]="MiSeq_Results_out/3.GRP_BAMs" )
declare -A coverage=( ["2019_09"]="MiSeq_Results_out/Coverage_out" ["2019_12"]="MiSeq_Results_out/Coverage_out" )
declare -A v2s=( ["2019_09"]="MiSeq_Results_out/4.V2_BAMs" ["2019_12"]="MiSeq_Results_out/4.V2_BAMs" )

for pfx in 2019_09 2019_12; do
miseqdir=${miseqdir[$pfx]}
datadir=${datadir[$pfx]}
coverage=${coverage[$pfx]}
cd $WORKDIR/$miseqdir
export INPUT_FILE=sm.txt
export INPUTDIR=$datadir
export OUTPUT=$coverage
sm_arr=( $(cat $INPUT_FILE) )
n=${#sm_arr[@]}
echo $n
for i in $(seq 1 $n); do
sm_arr=( $(cat $INPUT_FILE) );
sm=${sm_arr[(($i-1))]};
echo $sm
pwd
bedtools coverage -g $REFFAI -sorted -a $BEDFILE -b $INPUTDIR/$sm.bam -mean | \
cut -f5 > $OUTPUT/$sm.cov
done
(echo -en "CHROM\tBEG\tEND\tNAME\t"; tr '\n' '\t' < sm.txt | sed 's/\t$/\n/'; \
paste $BEDFILE $(cat sm.txt | sed 's/$/.cov/' | tr '\n' '\t' | sed 's/\t$/\n/')) \
> $BEDDIR/3215481_Covered.GRCh38.tsv
done
(echo -en "CHROM\tBEG\tEND\tNAME\t"; tr '\n' '\t' < sm.txt | sed 's/\t$/\n/'; \
paste $BEDFILE $(cat sm.txt | sed 's/$/.cov/' | tr '\n' '\t' | sed 's/\t$/\n/')) \
> 3215481_Covered.GRCh38.tsv
#/bin/rm $(cat sm.txt | sed 's/$/.cov/' | tr '\n' '\t' | sed 's/\t$/\n/')
44 changes: 44 additions & 0 deletions MiSeqScripts/4.MiSeq-coverage.sh.~1~
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!bin/bash

###########################################################################
# Export Env Variables
###########################################################################

export GATK=$HOME/toolbin/gatk-4.1.8.1/gatk
export REFDIR=/media/drew/easystore/ReferenceGenomes/
export BEDDIR=$REFDIR/BEDs
export BEDFILE=$BEDDIR/3215481_Covered.bed

export IDXDIR=$REFDIR/GCA_000001405.15_GRCh38_no_alt_analysis_set
export REFFA=$IDXDIR/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna
export REFFAI=$IDXDIR/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.fai
export WORKDIR=/media/drew/easystore/GoodCell-Resources/AnalysisBaseDir/MiSeq_Data
###########################################################################
## COMPUTE COVERAGE OVER TARGETS ##
###########################################################################

declare -A miseqdir=( ["2019_09"]="2019_09" ["2019_12"]="2019_12" )
declare -A datadir=( ["2019_09"]="MiSeq_Results_out/3.GRP_BAMs" ["2019_12"]="MiSeq_Results_out/3.GRP_BAMs" )
declare -A coverage=( ["2019_09"]="MiSeq_Results_out/Coverage_out" ["2019_12"]="MiSeq_Results_out/Coverage_out" )
declare -A v2s=( ["2019_09"]="MiSeq_Results_out/4.V2_BAMs" ["2019_12"]="MiSeq_Results_out/4.V2_BAMs" )

for pfx in 2019_09 2019_12; do
miseqdir=${miseqdir[$pfx]}
datadir=${datadir[$pfx]}
coverage=${coverage[$pfx]}
cd $WORKDIR/$miseqdir
export INPUT_FILE=sm.txt
export INPUTDIR=$datadir
export OUTPUT=$coverage
sm_arr=( $(cat $INPUT_FILE) )
n=${#sm_arr[@]}
for i in $(seq 1 $n); do
sm_arr=( $(cat $INPUT_FILE) );
sm=${sm_arr[(($i-1))]};
bedtools coverage -g $REFFAI -sorted -a $BEDFILE -b $INPUTDIR/$sm.bam -mean | \
cut -f5 > $OUTPUT/$sm.cov
done
(echo -en "CHROM\tBEG\tEND\tNAME\t"; tr '\n' '\t' < sm.txt | sed 's/\t$/\n/'; \
paste $BEDFILE $(cat sm.txt | sed 's/$/.cov/' | tr '\n' '\t' | sed 's/\t$/\n/')) \
> $BEDDIR/3215481_Covered.GRCh38.tsv
done
41 changes: 0 additions & 41 deletions MiSeqScripts/4.MiSeq-coverage.sh~

This file was deleted.

8 changes: 0 additions & 8 deletions MiSeqScripts/Lift-over-Targets.sh~

This file was deleted.

Loading

0 comments on commit 9f05af0

Please sign in to comment.