From a4cb0362b2558e3e81888bb866bd9bb76e439997 Mon Sep 17 00:00:00 2001
From: bioaddict <rafgangbadja@gmail.com>
Date: Tue, 14 Nov 2023 10:41:03 +0100
Subject: [PATCH] Remove Fast5Pipeline

---
 .gitignore    |   6 +-
 Fast5Pipeline | 164 --------------------------------------------------
 2 files changed, 4 insertions(+), 166 deletions(-)
 delete mode 100755 Fast5Pipeline

diff --git a/.gitignore b/.gitignore
index 6c695b3..c78ad85 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,8 +5,10 @@ bin-release/
 [Bb]in/
 
 # Other files and folders
-.settings/
-./data/OUT_DIR/*
+/data/*
+/.gitpod.yml
+/.gitpod.Dockerfile
+/nextflow.config
 
 # files with extension
 *.swf
diff --git a/Fast5Pipeline b/Fast5Pipeline
deleted file mode 100755
index aee8038..0000000
--- a/Fast5Pipeline
+++ /dev/null
@@ -1,164 +0,0 @@
-#!/usr/bin/env bash
-# -*- coding: utf-8 -*-
-# Created on Fri June 23 14:31:04 2023    
-# @author: AlbertRockG
-
-export LC_ALL="C"
-set -euop pipefail
-### FUNCTIONS --------------------------------------------------------------------------------------------------------------------------
-# Function to display usage information
-usage() {
-    printf '
-Usage: %s [OPTIONS]
-
-    Basecalls, trims, joins, assembles and polish bacteria isolates genome sequencing
-    data. Take fast5 files as inputs.
-
-    OPTIONS
-        -c  Guppy config file
-        -i  Input directory
-        -o  Output directory: different from the input directory
-        -k  Barcode kits
-        -m  Medaka model: only high accuracy models supported
-        -b  GPU memory control: controls memory use (default: 100)
-        -v  Enable verbosity
-    ' "$(basename "$0")" &&
-        exit 1
-}
-
-err_msg() {
-    echo "${0##*/}: $*" >&2; 
-}
-
-emit() {
-    (( !VERBOSE )) || err_msg "$*"; 
-}
-
-err_exit() {
-    err_msg "$*";
-    exit 1; 
-}
-
-# Function to concatenate fastq.gz files in a subfolder
-concatenate_fastq() {
-    local subfolder="$1"
-
-    emit "Processing subfolder: $subfolder"
-
-    # Create the "joined" folder in the parent directory
-    parent_dir="${subfolder%/*/*}"
-    joined_folder="$parent_dir/joined"
-    mkdir -p "$joined_folder"
-
-    # Create output file name based on subfolder name
-    output_file="$joined_folder/$(basename "$subfolder").fastq.gz"
-
-    # Concatenate fastq.gz files into the output file
-    gunzip -c "$subfolder"/*.fastq.gz | gzip -c > "$output_file"
-
-    emit "Concatenated fastq.gz files into: $output_file"
-    emit ""
-}
-
-# Function to run flye on a joined_fastq file
-run_flye() {
-    local input_file="$1"
-
-    emit "Processing file: $input_file"
-
-    # Create the "assembled" folder in the parent directory
-    parent_dir="${input_file%/*/*}"
-    assembly_folder="$parent_dir/assembled"
-    mkdir -p "$assembly_folder"
-
-    # Create output file name based on subfolder name
-    output="$assembly_folder/$(basename "$input_file")"
-
-    # Run flye on the input file
-    flye -t "$(nproc)" --out-dir "$output" --nano-hq "$input_file"
-
-    emit "Flye analysis completed for: $input_file"
-    emit ""
-}
-
-# Function to run flye on a joined_fastq file
-run_medaka() {
-    local input_file="$1"
-
-    emit "Processing file: $input_file"
-
-    # Create the "polished" folder in the parent directory
-    main_dir="${input_file%/*/*}"
-    assembly_dir="$(basename "$input_file")"
-    polished_folder="$main_dir/polished"
-    mkdir -p "$polished_folder"
-
-    # Create output dir name based on the input file name
-    output="$polished_folder/$(basename "$input_file")"
-
-    # Run medaka on the input file
-    medaka_consensus -i "$input_file" \
-                    -d "$main_dir"/assembled/"$assembly_dir"/assembly.fasta \
-                    -o "$output" -m "$medaka_model" -t "$(nproc)" -b "$BATCHSIZE"
-
-    emit "Medaka analysis completed for: $input_file"
-    emit ""
-}
-
-### VARIABLE DECLARATION ---------------------------------------------------------------------------------------------------------------
-
-VERBOSE=0
-BATCHSIZE=100
-
-### ARGS PARSING -----------------------------------------------------------------------------------------------------------------------
-# Parse command-line options
-
-while getopts "c:i:o:k:m:bv" opt; do
-    case "${opt}" in
-        c) guppy_config=${OPTARG};;
-        i) input_dir=${OPTARG};;
-        o) output_dir=${OPTARG};;
-        k) barcode_kits=${OPTARG};;
-        m) medaka_model=${OPTARG};;
-        b) BATCHSIZE=${OPTARG};;
-        v) VERBOSE=1;;
-        *) usage;;
-    esac
-done
-
-# Check if all required options are provided
-
-if [[ -z "${guppy_config:-}" || -z "${input_dir:-}" || -z "${output_dir:-}" || -z "${barcode_kits:-}" || -z "${medaka_model:-}" ]]; then
-    usage
-fi
-
-### DATA PROCESSING --------------------------------------------------------------------------------------------------------------------
-
-# Step 1: Run guppy_basecaller
-guppy_basecaller --disable_pings -x auto \
-    -c "$guppy_config" \
-    -i "$input_dir" --recursive \
-    -s "$output_dir/basecalled"
-
-# Step 2: Run guppy_barcoder
-guppy_barcoder -t "$(nproc)" --disable_pings -x auto \
-    --barcode_kits "$barcode_kits" --enable_trim_barcodes \
-    -i "$output_dir/basecalled" --recursive \
-    -s "$output_dir/trimmed" --compress_fastq
-
-# Step 3: Join fastq.gz files
-
-# Loop through subfolders in the input folder
-for subfolder in "$output_dir"/trimmed/*; do
-    [[ ! -d "$subfolder" ]] || concatenate_fastq "$subfolder" || emit "no such directory: $subfolder"
-done
-
-# Step 4: Run flye
-for input_file in "$output_dir"/joined/*.fastq.gz; do
-    [[ ! -f $input_file ]] || run_flye "$input_file" || emit "no such file: $input_file"
-done
-
-# Step 6: Run medaka_consensus
-for input_file in "$output_dir"/joined/*.fastq.gz; do
-    [[ ! -f $input_file ]] || run_medaka "$input_file" || emit "no such file: $input_file"
-done