-
Notifications
You must be signed in to change notification settings - Fork 2
/
bactofidia.sh
executable file
·173 lines (141 loc) · 5.63 KB
/
bactofidia.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#!/bin/bash -i
##to debug
#set -e
#set -v
#set -x
########################################
##Script to call snakefile for bacterial paired-end WGS Illumina data
## Run on a HPC with SLURM scheduler by requesting a node with sufficient memory
## screen -S [session_name]
## srun --time=24:00:00 --mem=32G --gres=tmpspace:4 --pty bash
##aschuerch 04-2020
########################################
##1. Checks
##Check for command line arguments
if [ $# -eq 0 -o "$1" == "-h" -o "$1" == "--help" ]; then
echo "
###########################################################################
############ Basic microbial WGS analysis pipeline ################
## ##
## for all samples in this folder. ##
## ##
## Paired end, compressed sequencing files (fastq.gz) ##
## must be present in the same folder from where the script is called. ##
## ##
## ##
## Example: ##
## ##
## ./bactofidia.sh ECO-RES-PR1-00001_R1.fastq.gz ECO-RES-PR1-00001_R2.fastq.gz
## ECO-RES-PR1-00002_R1.fastq.gz ECO-RES-PR1-00002_R2.fastq.gz ##
## ##
## or ##
## ##
## ./bactofidia.sh ALL ##
## ##
## Packages and versions are specified in envs/packages.yml. ##
## See bioconda.github.io for available packages. ##
## Command line parameters for individual tools can be adjusted in ##
## config/config.yaml ##
## ##
## Version March2020 ##
###########################################################################"
exit
fi
## create logs
mkdir -p "$(pwd)"/log
log=$(pwd)/log/call_assembly.txt
touch "$log"
sleep 1
## Check for *fastq.gz files
if [ "$1" == """ALL""" ];then
echo "All fastq.gz files will be processed" 2>&1 | tee -a "$log"
files=(./*fastq.gz)
else
echo else
files=( "$@" )
fi
for file in "${files[@]}"
do
if [ -e "$file" ]
then # Check whether file exists.
echo 'Found files for ' "$file" 2>&1 | tee -a "$log"
else
echo 'Sequence files as '"$file"'_*R1*fastq.gz are missing in this folder.
Please execute this script from the location of the sequencing files or exclude the sample.
Exiting.' 2>&1 | tee -a "$log"
exit 1
fi
done
#Check if conda is installed, if not found attempt to install in a temporary folder
if command -v conda > /dev/null; then
echo 2>&1| tee -a "$log"
echo
echo "conda found" | tee -a "$log"
else
echo
echo "conda missing."
echo "Install Miniconda with:"
echo
echo " wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
echo " chmod +x Miniconda3-latest-Linux-x86_64.sh"
echo " ./Miniconda3-latest-Linux-x86_64.sh"
echo "and follow the prompts."
echo "After installation, configure the channels with"
echo
echo " conda config --add channels defaults"
echo " conda config --add channels bioconda"
echo " conda config --add channels conda-forge"
exit 1
fi
echo "The logfiles will be generated here: " 2>&1 | tee -a "$log"
echo "$(pwd)"/log 2>&1| tee -a "$log"
echo 2>&1 |tee -a "$log"
sleep 1
# determine read length
length=$(zcat "${files[0]}" | awk '{if(NR%4==2) print length($1)}' | sort | uniq -c | sort -rn | head -n 1 | rev | cut -f 1,1 -d " "| rev)
# determine which config file according to read length (only 251 and 151 implemented a the moment)
if [[ "$length" == 151 ]];then
configfile=config/config.yaml
elif [[ "$length" == 251 ]]; then
configfile=config/config_miseq.yaml
else
echo 'Sequence length is '"$length"', please provide a custom config file (e.g. config_custom.yaml): '
read -r configfile
fi
# Write to log
echo 2>&1 |tee -a "$log"
echo "Read length was determined as: " 2>&1| tee -a "$log"
echo "$length" 2>&1| tee -a "$log"
echo "$configfile" "will be used as configfile" 2>&1| tee -a "$log"
echo 2>&1 |tee -a "$log"
# Check if snakemake is found or install directly into base
if command -v snakemake > /dev/null; then ##version?
echo 2>&1 |tee -a "$log"
echo "snakemake found" 2>&1 |tee -a "$log"
else
echo 2>&1 |tee -a "$log"
echo "snakemake will be installed" 2>&1 |tee -a "$log"
conda install -y snakemake=5.14.0
fi
sleep 1
# concatenate forward and reverse and put into data/ folder:
mkdir -p data
for file in "${files[@]}"
do
i="${file%%_*}"
echo "$i"
cat "$i"*R1*.fastq.gz > data/"$i"_R1.fastq.gz
cat "$i"*R2*.fastq.gz > data/"$i"_R2.fastq.gz
done
# run the snakemake pipeline
echo "snakemake --snakefile Snakefile.assembly --use-conda --cores all --printshellcmds --latency-wait 60 --keep-going --config configfile=""$configfile"
snakemake --snakefile Snakefile.assembly --use-conda --cores all --printshellcmds --latency-wait 60 --keep-going --config configfile="$configfile" 2>&1 | tee -a "$log"
#for the CI
if [ $? -eq 0 ]
then
echo "Successfully finished job"
exit 0
else
echo "Could not finish job" >&2
exit 1
fi