-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
92e46a2
commit d9bfcc6
Showing
1 changed file
with
162 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
#!/usr/bin/env bash | ||
# -*- coding: utf-8 -*- | ||
# Created on Fri June 23 14:31:04 2023 | ||
# @author: AlbertRockG | ||
|
||
# Function to display usage information | ||
usage() { | ||
echo "Usage: $0 -c <guppy_config> -i <input_dir> -o <output_dir> -k <barcode_kits> -m <medaka_model> -b <batchsize>" | ||
echo "Options:" | ||
echo " -c, --guppy-config Guppy config file" | ||
echo " -i, --input-dir Input directory" | ||
echo " -o, --output-dir Output directory: different from the input directory" | ||
echo " -k, --barcode-kits Barcode kits" | ||
echo " -m, --medaka-model Medaka model: only high accuracy models supported" | ||
echo " -b, --batchsize GPU memory control: controls memory use (default: 100)." | ||
exit 1 | ||
} | ||
|
||
# Function to concatenate fastq.gz files in a subfolder | ||
concatenate_fastq() { | ||
local subfolder=$1 | ||
|
||
echo "Processing subfolder: $subfolder" | ||
|
||
# Create the "joined" folder in the parent directory | ||
parent_dir=$(dirname "$subfolder") | ||
joined_folder="$parent_dir/joined" | ||
mkdir -p "$joined_folder" | ||
|
||
# Create output file name based on subfolder name | ||
output_file="$joined_folder/$(basename "$subfolder").fastq.gz" | ||
|
||
# Concatenate fastq.gz files into the output file | ||
gunzip -c "$subfolder"/*.fastq.gz | gzip -c > "$output_file" | ||
|
||
echo "Concatenated fastq.gz files into: $output_file" | ||
echo "" | ||
} | ||
|
||
# Function to run flye on a joined_fastq file | ||
run_fly() { | ||
local input_file=$1 | ||
|
||
echo "Processing file: $input_file" | ||
|
||
# Create the "assembled" folder in the parent directory | ||
parent_dir="${input_file%%/*}" | ||
assembly_folder="$parent_dir/assembled" | ||
mkdir -p "$assembly_folder" | ||
|
||
# Create output file name based on subfolder name | ||
output="$assembly_folder/$(basename "$input_file")" | ||
|
||
# Run flye on the input file | ||
flye -t "$(nproc)" --out-dir "$output" --nano-hq "$input_file" | ||
|
||
echo "Flye analysis completed for: $input_file" | ||
echo "" | ||
} | ||
|
||
# Function to run flye on a joined_fastq file | ||
run_medaka() { | ||
local input_file=$1 | ||
|
||
echo "Processing file: $input_file" | ||
|
||
# Create the "polished" folder in the parent directory | ||
main_dir="${input_file%%/*}" | ||
assembly_dir="$(basename "$input_file")" | ||
polished_folder="$main_dir/polished" | ||
mkdir -p "$polished_folder" | ||
|
||
# Create output dir name based on the input file name | ||
output="$polished_folder/$(basename "$input_file")" | ||
|
||
# Run medaka on the input file | ||
medaka_consensus -i "$input_file" \ | ||
-d "$main_dir"/assembled/"$assembly_dir"/assembly.fasta \ | ||
-o "$output" -m "$medaka_model" -t "$(nproc)" -b "$batchsize" | ||
|
||
echo "Medaka analysis completed for: $input_file" | ||
echo "" | ||
} | ||
|
||
|
||
# Parse command-line options | ||
while getopts ":c:i:o:k:m:b:" opt; do | ||
case "${opt}" in | ||
c) | ||
guppy_config=${OPTARG} | ||
;; | ||
i) | ||
input_dir=${OPTARG} | ||
;; | ||
o) | ||
output_dir=${OPTARG} | ||
;; | ||
k) | ||
barcode_kits=${OPTARG} | ||
;; | ||
m) | ||
medaka_model=${OPTARG} | ||
;; | ||
b) | ||
batchsize=${OPTARG} | ||
;; | ||
*) | ||
usage | ||
;; | ||
esac | ||
done | ||
|
||
# Check if all required options are provided | ||
if [[ -z $guppy_config || -z $input_dir || -z $output_dir || -z $barcode_kits || -z $medaka_model ]]; then | ||
echo "Error: Missing required options" | ||
usage | ||
fi | ||
|
||
# Step 1: Run guppy_basecaller | ||
guppy_basecaller --disable_pings -x auto \ | ||
-c "$guppy_config" \ | ||
-i "$input_dir" --recursive \ | ||
-s "$output_dir/basecalled" | ||
|
||
# Step 2: Run guppy_barcoder | ||
guppy_barcoder -t "$(nproc)" --disable_pings -x auto \ | ||
--barcode_kits "$barcode_kits" --enable_trim_barcodes \ | ||
-i "$output_dir/basecalled" --recursive \ | ||
-s "$output_dir/trimmed" --compress_fastq | ||
|
||
# Step 3: Join fastq.gz files | ||
|
||
# Loop through subfolders in the input folder | ||
for subfolder in "$output_dir"/trimmed/*; do | ||
if [[ -d $subfolder ]]; then | ||
concatenate_fastq "$subfolder" | ||
fi | ||
done | ||
|
||
# Step 4: Run flye | ||
for input_file in "$output_dir"/joined/*.fastq.gz; do | ||
if [[ -f $input_file ]]; then | ||
run_flye "$input_file" | ||
fi | ||
done | ||
|
||
# Step 6: Run medaka_consensus | ||
conda_env=medaka | ||
|
||
if conda info --envs | grep -wq "$conda_env"; then | ||
# Activate conda environment | ||
conda activate medaka | ||
|
||
for input_file in "$output_dir"/joined/*.fastq.gz; do | ||
if [[ -f $input_file ]]; then | ||
run_medaka "$input_file" | ||
fi | ||
done | ||
|
||
# Deactivate conda environment | ||
conda deactivate | ||
fi |