Skip to content

Commit

Permalink
Add the script of the pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
AlbertRockG committed Jul 2, 2023
1 parent 92e46a2 commit d9bfcc6
Showing 1 changed file with 162 additions and 0 deletions.
162 changes: 162 additions & 0 deletions Fast5Pipeline
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
#!/usr/bin/env bash
# -*- coding: utf-8 -*-
# Created on Fri June 23 14:31:04 2023
# @author: AlbertRockG

# Function to display usage information
usage() {
echo "Usage: $0 -c <guppy_config> -i <input_dir> -o <output_dir> -k <barcode_kits> -m <medaka_model> -b <batchsize>"
echo "Options:"
echo " -c, --guppy-config Guppy config file"
echo " -i, --input-dir Input directory"
echo " -o, --output-dir Output directory: different from the input directory"
echo " -k, --barcode-kits Barcode kits"
echo " -m, --medaka-model Medaka model: only high accuracy models supported"
echo " -b, --batchsize GPU memory control: controls memory use (default: 100)."
exit 1
}

# Function to concatenate fastq.gz files in a subfolder
concatenate_fastq() {
local subfolder=$1

echo "Processing subfolder: $subfolder"

# Create the "joined" folder in the parent directory
parent_dir=$(dirname "$subfolder")
joined_folder="$parent_dir/joined"
mkdir -p "$joined_folder"

# Create output file name based on subfolder name
output_file="$joined_folder/$(basename "$subfolder").fastq.gz"

# Concatenate fastq.gz files into the output file
gunzip -c "$subfolder"/*.fastq.gz | gzip -c > "$output_file"

echo "Concatenated fastq.gz files into: $output_file"
echo ""
}

# Function to run flye on a joined_fastq file
run_fly() {
local input_file=$1

echo "Processing file: $input_file"

# Create the "assembled" folder in the parent directory
parent_dir="${input_file%%/*}"
assembly_folder="$parent_dir/assembled"
mkdir -p "$assembly_folder"

# Create output file name based on subfolder name
output="$assembly_folder/$(basename "$input_file")"

# Run flye on the input file
flye -t "$(nproc)" --out-dir "$output" --nano-hq "$input_file"

echo "Flye analysis completed for: $input_file"
echo ""
}

# Function to run flye on a joined_fastq file
run_medaka() {
local input_file=$1

echo "Processing file: $input_file"

# Create the "polished" folder in the parent directory
main_dir="${input_file%%/*}"
assembly_dir="$(basename "$input_file")"
polished_folder="$main_dir/polished"
mkdir -p "$polished_folder"

# Create output dir name based on the input file name
output="$polished_folder/$(basename "$input_file")"

# Run medaka on the input file
medaka_consensus -i "$input_file" \
-d "$main_dir"/assembled/"$assembly_dir"/assembly.fasta \
-o "$output" -m "$medaka_model" -t "$(nproc)" -b "$batchsize"

echo "Medaka analysis completed for: $input_file"
echo ""
}


# Parse command-line options
while getopts ":c:i:o:k:m:b:" opt; do
case "${opt}" in
c)
guppy_config=${OPTARG}
;;
i)
input_dir=${OPTARG}
;;
o)
output_dir=${OPTARG}
;;
k)
barcode_kits=${OPTARG}
;;
m)
medaka_model=${OPTARG}
;;
b)
batchsize=${OPTARG}
;;
*)
usage
;;
esac
done

# Check if all required options are provided
if [[ -z $guppy_config || -z $input_dir || -z $output_dir || -z $barcode_kits || -z $medaka_model ]]; then
echo "Error: Missing required options"
usage
fi

# Step 1: Run guppy_basecaller
guppy_basecaller --disable_pings -x auto \
-c "$guppy_config" \
-i "$input_dir" --recursive \
-s "$output_dir/basecalled"

# Step 2: Run guppy_barcoder
guppy_barcoder -t "$(nproc)" --disable_pings -x auto \
--barcode_kits "$barcode_kits" --enable_trim_barcodes \
-i "$output_dir/basecalled" --recursive \
-s "$output_dir/trimmed" --compress_fastq

# Step 3: Join fastq.gz files

# Loop through subfolders in the input folder
for subfolder in "$output_dir"/trimmed/*; do
if [[ -d $subfolder ]]; then
concatenate_fastq "$subfolder"
fi
done

# Step 4: Run flye
for input_file in "$output_dir"/joined/*.fastq.gz; do
if [[ -f $input_file ]]; then
run_flye "$input_file"
fi
done

# Step 6: Run medaka_consensus
conda_env=medaka

if conda info --envs | grep -wq "$conda_env"; then
# Activate conda environment
conda activate medaka

for input_file in "$output_dir"/joined/*.fastq.gz; do
if [[ -f $input_file ]]; then
run_medaka "$input_file"
fi
done

# Deactivate conda environment
conda deactivate
fi

0 comments on commit d9bfcc6

Please sign in to comment.