From 126c0b6f5c6f2bfc062382c2623030c3c2ce1224 Mon Sep 17 00:00:00 2001
From: Patrick Sorn <patrick.sorn@tron-mainz.de>
Date: Wed, 14 Jun 2023 10:11:05 +0200
Subject: [PATCH 1/3] Added ONT modules and scripts - not yet functional

---
 bin/optimus_no_prime.py           |  65 ++++++++++++
 bin/optimus_no_prime.sh           |  23 ++++
 modules/09_ont_qc.nf              |  43 ++++++++
 modules/10_ont_preprocess.nf      | 132 +++++++++++++++++++++++
 modules/11_ont_primer_removal.nf  |  61 +++++++++++
 modules/12_ont_variant_calling.nf |  66 ++++++++++++
 modules/13_ont_annotate.nf        | 169 ++++++++++++++++++++++++++++++
 7 files changed, 559 insertions(+)
 create mode 100755 bin/optimus_no_prime.py
 create mode 100755 bin/optimus_no_prime.sh
 create mode 100755 modules/09_ont_qc.nf
 create mode 100755 modules/10_ont_preprocess.nf
 create mode 100755 modules/11_ont_primer_removal.nf
 create mode 100755 modules/12_ont_variant_calling.nf
 create mode 100755 modules/13_ont_annotate.nf

diff --git a/bin/optimus_no_prime.py b/bin/optimus_no_prime.py
new file mode 100755
index 0000000..71259ae
--- /dev/null
+++ b/bin/optimus_no_prime.py
@@ -0,0 +1,65 @@
+import os
+import sys
+import re
+
+def list_primersets(path):
+    ignore = ("5p_cov",".txt",".bedGraph")
+    filenames = os.listdir(path)
+    filtered_filenames = [f for f in filenames if not any(j in f for j in ignore) and ".bed" in f]
+    return filtered_filenames
+
+def get_libsize(path):
+    with open(path, 'r') as file:
+        sum = 0
+        for line in file:
+            columns = line.split()
+            if len(columns) >= 5:
+                sum += float(columns[4])
+    return sum
+
+def get_score(path, T, G):
+    A = 0
+    S = 0
+    with open(path, 'r') as file:
+        for line in file:
+            columns = line.split()
+            if len(columns) >= 7:
+                S += float(columns[2]) - float(columns[1])
+                A += float(columns[6])
+    S = S / G
+    P = 1 - (A/T)
+    score = A / (T * S * P)
+    return score
+
+
+# Run
+G = 29903 # SARS CoV2 Genomelength
+
+# Path where to find primerset coverages generated by bedtools (optimus_no_prime.sh)
+path = sys.argv[1]
+
+sampleId = sys.argv[2]
+
+total_cov_file = [f for f in os.listdir(path) if "5p_cov" in f and "Graph" not in f]
+primersets = list_primersets(path)
+
+max = 0.0
+likely_primerset = primersets[0]
+for set in primersets:
+    libsize = get_libsize(path + total_cov_file[0])
+    score = get_score(path + set, libsize, G)
+    if score >= max:
+        likely_primerset = set
+        max = score
+
+# Remove sampleId from primer-set name
+likely_primerset = re.sub(sampleId + "_","",likely_primerset,1)
+
+# If score is too low, it could either mean the used primer-set is not in the library or primers have already been trimmed.
+# In this case return NA and primer clipping will be skipped.
+if max < 5:
+    likely_primerset = "NA"
+
+sys.stdout.write(likely_primerset)
+sys.stdout.flush()
+sys.exit(0)
diff --git a/bin/optimus_no_prime.sh b/bin/optimus_no_prime.sh
new file mode 100755
index 0000000..5380724
--- /dev/null
+++ b/bin/optimus_no_prime.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+primerDir=${1}
+sampleId=${2}
+bam=${3}
+
+# Get coverage of read-ends across genome
+	bedtools genomecov -5 -strand + -ibam ${bam} -dz \
+		| awk '{OFS="\t"}{x=$2+1}{print $1,$2,x,$1":"$2"-"x":-",$3,"+"}' > temp.bed
+	bedtools genomecov -5 -strand - -ibam ${bam} -dz \
+		| awk '{OFS="\t"}{x=$2+1}{print $1,$2,x,$1":"$2"-"x":-",$3,"-"}' >> temp.bed
+	sort -k1,1 -k2,2n temp.bed > ${sampleId}_5p_cov.bed
+	cut -f1,2,3,5 temp.bed > ${sampleId}_5p_cov.bedGraph
+	rm temp.bed
+
+# Overlap read-end coverage with primer BED files
+	primerList=($(find ${primerDir} -name "*.bed" -printf "%f\n"))
+	for pset in ${primerList[@]}
+	do
+		bedtools map -a ${primerDir}/${pset} -b ${sampleId}_5p_cov.bed -s -c 5 -o sum -null 0 \
+			| awk -F'\t' '{OFS="\t"}{print $1,$2,$3,$4,$5,$6,$(NF)}' \
+			> ${sampleId}_${pset}
+	done
diff --git a/modules/09_ont_qc.nf b/modules/09_ont_qc.nf
new file mode 100755
index 0000000..192623b
--- /dev/null
+++ b/modules/09_ont_qc.nf
@@ -0,0 +1,43 @@
+process FASTQC {
+	cpus 2
+	memory "3 GB"
+	publishDir "${params.outDir}/${sampleId}/qc", mode: "copy"
+	tag "${sampleId}"
+
+	conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null)
+	
+	input:
+		tuple val(sampleId), path(fq)
+	
+	output:
+		path("*fastqc*")
+
+	"""
+	fastqc \
+		--threads 2 \
+		${fq}
+	"""
+}
+
+process NANOPLOT {
+	cpus 2
+	memory "3 GB"
+	publishDir "${params.outDir}/${sampleId}/qc", mode: "copy"
+	tag "${sampleId}"
+
+	conda (params.enable_conda ? "bioconda::nanoplot=1.40.2" : null)
+	
+	input:
+		tuple val(sampleId), path(fq)
+
+	output:
+		path("*${sampleId}_*")
+	
+	"""
+	NanoPlot \
+		--fastq ${fq} \
+		-p ${sampleId}_ \
+		-t 1 \
+		--tsv_stats
+	"""
+}
diff --git a/modules/10_ont_preprocess.nf b/modules/10_ont_preprocess.nf
new file mode 100755
index 0000000..b4bb818
--- /dev/null
+++ b/modules/10_ont_preprocess.nf
@@ -0,0 +1,132 @@
+process NANOFILT {
+	cpus 3
+	memory params.memory
+	tag "${sampleId}"
+
+	conda (params.enable_conda ? "bioconda::nanofilt=2.8.0" : null)
+	
+	input:
+		tuple val(sampleId), path(inputFq)
+
+	output:
+		tuple val(sampleId), path("${sampleId}_qctrim.fq.gz"), emit: fq
+	
+	"""
+	gunzip -c ${inputFq} | NanoFilt \
+	--quality 10 \
+	--length 300 \
+	--logfile ${sampleId}_qctrim.log \
+	| gzip > ${sampleId}_qctrim.fq.gz
+	"""
+}
+
+process PORECHOP {
+	cpus params.cpus
+	memory params.memory
+	tag "${sampleId}"
+
+	conda (params.enable_conda ? "bioconda::porechop=0.2.4" : null)
+
+	input:
+		tuple val(sampleId), path(inputFq)
+
+	output:
+		tuple val(sampleId), path("${sampleId}_noAdapter.fq.gz"), emit: fq
+		path("${sampleId}_porechop.log")
+	
+	"""
+	porechop \
+		--input ${inputFq} \
+		--output ${sampleId}_noAdapter.fq.gz \
+		--threads ${params.cpus} \
+		--format "fastq" \
+		--verbosity 1 > ${sampleId}_porechop.log
+	"""
+}
+
+process CHOPPER {
+	cpus params.cpus+1
+	memory params.memory
+	publishDir "${params.outDir}/${sampleId}", mode: "copy", pattern: "*.log"
+	tag "${sampleId}"
+
+	conda (params.enable_conda ? "bioconda::chopper=0.5.0" : null)
+
+	input:
+		tuple val(sampleId), path(inputFq)
+
+	output:
+		tuple val(sampleId), path("${sampleId}_qctrim.fq.gz"), emit: fq
+		path("${sampleId}_chopper.log")
+	
+	"""
+	cat ${inputFq} \
+	| chopper \
+		--quality 10 \
+		--minlength 300 \
+		--threads ${params.cpus} \
+		2> ${sampleId}_chopper.log \
+	| gzip > ${sampleId}_qctrim.fq.gz
+	"""
+}
+
+process PORECHOP_ABI {
+	cpus params.cpus
+	memory params.memory
+	publishDir "${params.outDir}/${sampleId}", mode: "copy", pattern: "*.log"
+	tag "${sampleId}"
+
+	conda (params.enable_conda ? "bioconda::porechop_abi=0.5.0" : null)
+
+	input:
+		tuple val(sampleId), path(inputFq)
+
+	output:
+		tuple val(sampleId), path("${sampleId}_noAdapter.fq.gz"), emit: fq
+		path("${sampleId}_porechop_abi.log")
+	
+	"""
+	porechop_abi \
+		--ab_initio \
+		--input ${inputFq} \
+		--output ${sampleId}_noAdapter.fq.gz \
+		--threads ${params.cpus} \
+		--format "fastq.gz" \
+		--verbosity 1 > "${sampleId}_porechop_abi.log"
+	"""
+}
+
+process MINIMAP2 {
+	cpus params.cpus
+	memory params.memory
+	publishDir "${params.outDir}/${sampleId}", mode: "copy"
+	tag "${sampleId}"
+
+	conda (params.enable_conda ? "bioconda::minimap2=2.24 bioconda::samtools=1.16.1" : null)
+
+	input:
+		tuple val(sampleId), path(fq)
+
+	output:
+		tuple val(sampleId), path("${sampleId}.bam"), path("${sampleId}.bam.bai"), emit: bam
+
+	"""
+	cores=\$((${params.cpus} - 2))
+
+	minimap2 \
+		-x map-ont \
+		-a ${params.sarscov2_reference} \
+		-t \${cores} \
+		${fq} \
+	| samtools view \
+		-b \
+		- \
+	| samtools sort \
+		-o ${sampleId}.bam \
+		--threads 1 \
+		-T ${sampleId} \
+		-
+
+	samtools index ${sampleId}.bam
+	"""
+}
\ No newline at end of file
diff --git a/modules/11_ont_primer_removal.nf b/modules/11_ont_primer_removal.nf
new file mode 100755
index 0000000..448525e
--- /dev/null
+++ b/modules/11_ont_primer_removal.nf
@@ -0,0 +1,61 @@
+process OPTIMUS_NO_PRIME {
+	cpus 2
+	memory params.memory
+	publishDir "${params.outDir}/${sampleId}/optimus_no_prime", mode: "copy", pattern: "*bed*"
+	tag "${sampleId}"
+
+	conda (params.enable_conda ? "bioconda::bedtools=2.30.0 bioconda::samtools=1.16.1" : null)
+
+	input:
+		tuple val(sampleId), path(bam), path(bai)
+
+	output:
+		tuple val(sampleId), path(bam), path(bai), env(primerset), emit: prediction
+		path("${sampleId}_*")
+	
+	"""
+	${moduleDir}/optimus_no_prime.sh ${params.primerDir} ${sampleId} ${bam}
+	primerset=\$(python ${moduleDir}/optimus_no_prime.py ./ ${sampleId})
+	"""
+}
+
+process PRIMER_SOFTCLIP {
+	cpus 2
+	memory params.memory
+	publishDir "${params.outDir}/${sampleId}", mode: "copy"
+	tag "${sampleId}"
+
+	conda (params.enable_conda ? "bioconda::samtools=1.16.1" : null)
+
+	input:
+		tuple val(sampleId), path(bam), path(bai), val(bed)
+
+	output:
+		tuple val(sampleId), path("*_noprime.bam"), path("*_noprime.bam.bai"), emit: bam
+		path("${sampleId}_*"), optional: true
+
+	script:
+	if( bed == 'NA' ){
+		"""
+		mv ${bam} ${sampleId}_noprime.bam
+		mv ${bai} ${sampleId}_noprime.bam.bai
+		"""
+	} else {
+		"""
+		samtools ampliconclip \
+			-b ${params.primerDir}/${bed} \
+			-f ${sampleId}_ampliconclip_stats.txt \
+			--both-ends \
+			--strand \
+			--tolerance 5 \
+			${bam} \
+		| samtools sort \
+			-o ${sampleId}_noprime.bam \
+			-T ${sampleId} \
+			-
+
+		samtools index ${sampleId}_noprime.bam
+		"""
+	}
+}
+
diff --git a/modules/12_ont_variant_calling.nf b/modules/12_ont_variant_calling.nf
new file mode 100755
index 0000000..aabbffa
--- /dev/null
+++ b/modules/12_ont_variant_calling.nf
@@ -0,0 +1,66 @@
+process NANOCALLER {
+	cpus params.cpus
+	memory params.memory
+	publishDir "${params.outDir}/${sampleId}/variant_calls/nanocaller", mode: "copy", pattern: "*vcf*"
+	tag "${sampleId}"
+
+	conda (params.enable_conda ? "bioconda::nanocaller=3.0.1" : null)
+
+	input:
+		tuple val(sampleId), path(bam), path(bai)
+
+	output:
+		tuple val(sampleId), path(bam), path(bai), path("${sampleId}.vcf.gz"), val("nanocaller"), emit: vcf
+		path("${sampleId}*")
+	
+	"""
+	NanoCaller \
+	--bam ${bam} \
+	--ref ${params.reference} \
+	--preset ont \
+	--maxcov 1000 \
+	--mincov 10 \
+	--cpu ${params.cpus} \
+	--min_allele_freq 0.05 \
+	--ins_threshold 0.05 \
+	--del_threshold 0.05 \
+	--sample ${sampleId} \
+	--prefix ${sampleId}
+	"""
+}
+
+process CLAIR3 {
+	cpus params.cpus
+	memory params.memory
+	publishDir "${params.outDir}/${sampleId}/variant_calls", mode: "copy", pattern: "clair3"
+	tag "${sampleId}"
+
+	conda (params.enable_conda ? "bioconda::clair3=1.0.0" : null)
+
+	input:
+		tuple val(sampleId), path(bam), path(bai)
+
+	output:
+		tuple val(sampleId), path(bam), path(bai), path("clair3/merge_output.vcf.gz"), val("clair3"), emit: vcf
+		path("clair3")
+	
+	"""
+	run_clair3.sh \
+	--bam_fn=${bam} \
+	--ref_fn=${params.reference} \
+	--threads=${params.cpus} \
+	--platform="ont" \
+	--model_path=${params.clair3model} \
+	--include_all_ctgs \
+	--min_coverage=10 \
+	--snp_min_af=0.05 \
+	--indel_min_af=0.05 \
+	--output=clair3 \
+	--chunk_size=5000
+	
+	# Quick fix for RefCalls from Clair3 which mess up Vafator run_clair3
+	# Remove RefCalls from VCF
+	mv clair3/merge_output.vcf.gz clair3/temp.vcf.gz
+	zcat clair3/temp.vcf.gz | grep -v RefCall | gzip > clair3/merge_output.vcf.gz
+	"""
+}
diff --git a/modules/13_ont_annotate.nf b/modules/13_ont_annotate.nf
new file mode 100755
index 0000000..63b32b1
--- /dev/null
+++ b/modules/13_ont_annotate.nf
@@ -0,0 +1,169 @@
+process VAFATOR {
+    cpus 1
+    memory "3 GB"
+	tag "${sampleId}"
+	publishDir "${params.outDir}/${sampleId}/variant_calls/${caller}", mode: "copy"	
+
+	conda (params.enable_conda ? "bioconda::vafator=1.2.5" : null)
+
+	input:
+	tuple val(sampleId), path(bam), path(bai), path(vcf), val(caller)
+	
+	output:
+	tuple val(sampleId), path("${sampleId}_${caller}_vaf.vcf"), val(caller), emit: annotated_vcf
+
+	script:
+	mq_param = params.vafator_min_mapping_quality != false ? "--mapping-quality " + params.vafator_min_mapping_quality : ""
+	bq_param = params.vafator_min_base_quality != false ? "--base-call-quality " + params.vafator_min_base_quality : ""
+	"""
+	vafator \
+	--input-vcf ${vcf} \
+	--output-vcf ${sampleId}_${caller}_vaf.vcf \
+	--bam vafator ${bam} ${mq_param} ${bq_param}
+	"""
+}
+
+process SNPEFF {
+    cpus 1
+    memory params.memory
+	publishDir "${params.outDir}/${sampleId}/variant_calls/${caller}", mode: "copy"
+	tag "${sampleId}"
+
+	conda (params.enable_conda ? "bioconda::snpeff=5.0 bioconda::samtools=1.16.1" : null)
+
+	input:
+	tuple val(sampleId), path(vcf), val(caller)
+	val(snpeff_data)
+	val(snpeff_config)
+	val(snpeff_organism)
+
+	output:
+	tuple val(sampleId), path("${sampleId}_${caller}_snpeff.vcf.gz"), emit: annotated_vcf
+	path("${sampleId}_${caller}_snpeff.vcf.gz.tbi")
+
+	"""
+	# for some reason the snpEff.config file needs to be in the folder where snpeff runs...
+	cp ${snpeff_config} .
+
+	snpEff eff -Xmx${params.memory} -dataDir ${snpeff_data} \
+	-noStats -no-downstream -no-upstream -no-intergenic -no-intron -onlyProtein -hgvs1LetterAa -noShiftHgvs \
+	${snpeff_organism} ${vcf} | bgzip -c > ${sampleId}_${caller}_snpeff.vcf.gz
+
+	tabix -p vcf ${sampleId}_${caller}_snpeff.vcf.gz
+	"""
+}
+
+
+process VARIANT_VAF_ANNOTATION {
+	cpus 1
+	memory "3 GB"
+	publishDir "${params.outDir}", mode: "copy"
+	tag "${sampleId}"
+
+	conda (params.enable_conda ? "conda-forge::gsl=2.7 bioconda::bcftools=1.14" : null)
+
+	input:
+		tuple val(sampleId), path(vcf)
+
+	output:
+		tuple val(sampleId), path("${sampleId}.lowfreq_subclonal.vcf"), emit: vaf_annotated
+
+	"""
+	tabix -p vcf ${vcf}
+
+	# annotates low frequency and subclonal variants
+	bcftools view -Ob ${vcf} | \
+	bcftools filter \
+	--exclude 'INFO/vafator_af < ${params.low_frequency_variant_threshold}' \
+	--soft-filter LOW_FREQUENCY - | \
+	bcftools filter \
+	--exclude 'INFO/vafator_af >= ${params.low_frequency_variant_threshold} && INFO/vafator_af < ${params.subclonal_variant_threshold}' \
+	--soft-filter SUBCLONAL \
+	--output-type v - | \
+	bcftools filter \
+	--exclude 'INFO/vafator_af >= ${params.subclonal_variant_threshold} && INFO/vafator_af < ${params.lq_clonal_variant_threshold}' \
+	--soft-filter LOW_QUALITY_CLONAL \
+	--output-type v - > ${sampleId}.lowfreq_subclonal.vcf
+	"""
+}
+
+/**
+Add SARS-CoV-2 specific annotations: conservation, Pfam domains and problematic sites.
+Also, according to problematic sites described in DeMaio et al. (2020) we filter out any variants at the beginning and
+end of the genome.
+*/
+process VARIANT_SARSCOV2_ANNOTATION {
+	cpus 1
+	memory "3 GB"
+	publishDir "${params.output}", mode: "copy"
+	tag "${sampleId}"
+
+	conda (params.enable_conda ? "conda-forge::gsl=2.7 bioconda::bcftools=1.14" : null)
+
+	input:
+	tuple val(sampleId), path(vcf)
+
+	output:
+	tuple val(sampleId), path("${sampleId}.annotated_sarscov2.vcf"), emit: annotated_vcfs
+
+	"""
+	bcftools annotate \
+	--annotations ${params.conservation_sarscov2} \
+	--header-lines ${params.conservation_sarscov2_header} \
+	-c CHROM,FROM,TO,CONS_HMM_SARS_COV_2 \
+	--output-type z ${vcf} | \
+	bcftools annotate \
+	--annotations ${params.conservation_sarbecovirus} \
+	--header-lines ${params.conservation_sarbecovirus_header} \
+	-c CHROM,FROM,TO,CONS_HMM_SARBECOVIRUS \
+	--output-type z - | \
+	bcftools annotate \
+	--annotations ${params.conservation_vertebrate} \
+	--header-lines ${params.conservation_vertebrate_header} \
+	-c CHROM,FROM,TO,CONS_HMM_VERTEBRATE_COV \
+	--output-type z - | \
+	bcftools annotate \
+	--annotations ${params.pfam_names} \
+	--header-lines ${params.pfam_names_header} \
+	-c CHROM,FROM,TO,PFAM_NAME \
+	--output-type z - | \
+	bcftools annotate \
+	--annotations ${params.pfam_descriptions} \
+	--header-lines ${params.pfam_descriptions_header} \
+	-c CHROM,FROM,TO,PFAM_DESCRIPTION - | \
+	bcftools filter \
+	--exclude 'POS <= 55 | POS >= 29804' \
+	--output-type z - > annotated_sarscov2.vcf.gz
+
+	tabix -p vcf annotated_sarscov2.vcf.gz
+
+	bcftools annotate \
+	--annotations ${params.problematic_sites} \
+	--columns INFO/problematic:=FILTER annotated_sarscov2.vcf.gz > ${sampleId}.annotated_sarscov2.vcf
+	"""
+}
+
+process PANGOLIN {
+    cpus 1
+    memory "3 GB"
+	publishDir "${params.outDir}", mode: "copy"
+	tag "${sampleId}"
+
+	conda (params.enable_conda ? "bioconda::pangolin=4.1.2" : null)
+
+	input:
+	tuple val(sampleId), path(fasta)
+
+	output:
+	tuple val(name), path("*pangolin.csv")
+
+    """
+    mkdir tmp
+    #--decompress-model
+    pangolin \
+    ${fasta} \
+    --outfile ${sampleId}.pangolin.csv \
+    --tempdir ./tmp \
+    --threads 1
+    """
+}
\ No newline at end of file

From a90879238e61d02d6a4871d91ca53c6bb1873343 Mon Sep 17 00:00:00 2001
From: Patrick Sorn <patrick.sorn@tron-mainz.de>
Date: Wed, 14 Jun 2023 10:46:02 +0200
Subject: [PATCH 2/3] Updated main.nf with ONT pipeline calls

---
 main.nf                    |  54 ++++++++----
 modules/13_ont_annotate.nf | 169 -------------------------------------
 2 files changed, 39 insertions(+), 184 deletions(-)
 delete mode 100755 modules/13_ont_annotate.nf

diff --git a/main.nf b/main.nf
index 93beb95..34fd95f 100755
--- a/main.nf
+++ b/main.nf
@@ -13,6 +13,11 @@ include { VARIANT_ANNOTATION; VARIANT_SARSCOV2_ANNOTATION;
             VARIANT_VAF_ANNOTATION; VAFATOR } from './modules/06_variant_annotation'
 include { PANGOLIN_LINEAGE; VCF2FASTA } from './modules/07_lineage_annotation'
 include { BGZIP } from './modules/08_compress_vcf'
+include { FASTQC; NANOPLOT } from './modules/09_ont_qc'
+include { NANOFILT; PORECHOP; CHOPPER; PORECHOP_ABI; MINIMAP2 } from './modules/10_ont_preprocess'
+include { OPTIMUS_NO_PRIME; PRIMER_SOFTCLIP } from './modules/11_ont_primer_removal'
+include { NANOCALLER; CLAIR3 } from './modules/12_ont_variant_calling'
+
 
 
 params.help= false
@@ -60,6 +65,8 @@ params.input_fastqs_list = false
 params.input_fastas_list = false
 params.input_vcfs_list = false
 params.input_bams_list = false
+params.input_ont = false
+
 
 if (params.help) {
     log.info params.help_message
@@ -239,42 +246,55 @@ workflow {
             bam_files = ALIGNMENT_PAIRED_END.out
         }
         else {
-            READ_TRIMMING_SINGLE_END(input_fastqs)
-            ALIGNMENT_SINGLE_END(READ_TRIMMING_SINGLE_END.out[0], reference)
-            bam_files = ALIGNMENT_SINGLE_END.out
-        }
-        BAM_PREPROCESSING(bam_files, reference)
-        preprocessed_bams = BAM_PREPROCESSING.out.preprocessed_bams
-
-        if (primers) {
-            PRIMER_TRIMMING_IVAR(preprocessed_bams, primers)
-            preprocessed_bams = PRIMER_TRIMMING_IVAR.out.trimmed_bam
+	    if (params.input_ont) {
+	        FASTQC(input_fastqs)
+		PORECHOP(input_fastqs)
+		CHOPPER(PORECHOP.out.fq)
+		NANOPLOT(CHOPPER.out.fq)
+		input_bams = MINIMAP2(CHOPPER.out.fq).bam
+	    }
+	    else {
+                READ_TRIMMING_SINGLE_END(input_fastqs)
+                ALIGNMENT_SINGLE_END(READ_TRIMMING_SINGLE_END.out[0], reference)
+                bam_files = ALIGNMENT_SINGLE_END.out
         }
-        COVERAGE_ANALYSIS(preprocessed_bams)
+	if (params.input_ont) {
+	    preprocessed_bams = PRIMER_SOFTCLIP(OPTIMUS_NO_PRIME(input_bams).prediction)
+	}
+	else {
+            BAM_PREPROCESSING(bam_files, reference)
+            preprocessed_bams = BAM_PREPROCESSING.out.preprocessed_bams
+
+            if (primers) {
+                PRIMER_TRIMMING_IVAR(preprocessed_bams, primers)
+                preprocessed_bams = PRIMER_TRIMMING_IVAR.out.trimmed_bam
+            }
+            COVERAGE_ANALYSIS(preprocessed_bams)
 
         // variant calling
         vcfs_to_normalize = null
-        if (!params.skip_bcftools) {
+        if (!params.skip_bcftools && !params.input_ont) {
             VARIANT_CALLING_BCFTOOLS(preprocessed_bams, reference)
             vcfs_to_normalize = vcfs_to_normalize == null?
                 VARIANT_CALLING_BCFTOOLS.out : vcfs_to_normalize.concat(VARIANT_CALLING_BCFTOOLS.out)
         }
-        if (!params.skip_lofreq) {
+        if (!params.skip_lofreq && !params.input_ont) {
             VARIANT_CALLING_LOFREQ(preprocessed_bams, reference)
             vcfs_to_normalize = vcfs_to_normalize == null?
                 VARIANT_CALLING_LOFREQ.out : vcfs_to_normalize.concat(VARIANT_CALLING_LOFREQ.out)
         }
-        if (!params.skip_gatk) {
+        if (!params.skip_gatk && !params.input_ont) {
             VARIANT_CALLING_GATK(preprocessed_bams, reference)
             vcfs_to_normalize = vcfs_to_normalize == null?
                 VARIANT_CALLING_GATK.out : vcfs_to_normalize.concat(VARIANT_CALLING_GATK.out)
         }
-        if (!params.skip_ivar && gff) {
+        if (!params.skip_ivar && gff && !params.input_ont) {
             VARIANT_CALLING_IVAR(preprocessed_bams, reference, gff)
             IVAR2VCF(VARIANT_CALLING_IVAR.out, reference)
             vcfs_to_normalize = vcfs_to_normalize == null?
                 IVAR2VCF.out : vcfs_to_normalize.concat(IVAR2VCF.out)
         }
+	
     }
     else if (input_fastas) {
         if (!params.skip_pangolin) {
@@ -313,6 +333,10 @@ workflow {
     }
 
     if (preprocessed_bams) {
+        if (params.input_ont) {
+	    CLAIR3(preprocessed_bams.bam)
+	    NANOCALLER(preprocessed_bams.bam)
+	}
         // we can only add technical annotations when we have the reads
         VAFATOR(normalized_vcfs.combine(preprocessed_bams, by: 0))
         VARIANT_VAF_ANNOTATION(VAFATOR.out.annotated_vcf)
diff --git a/modules/13_ont_annotate.nf b/modules/13_ont_annotate.nf
deleted file mode 100755
index 63b32b1..0000000
--- a/modules/13_ont_annotate.nf
+++ /dev/null
@@ -1,169 +0,0 @@
-process VAFATOR {
-    cpus 1
-    memory "3 GB"
-	tag "${sampleId}"
-	publishDir "${params.outDir}/${sampleId}/variant_calls/${caller}", mode: "copy"	
-
-	conda (params.enable_conda ? "bioconda::vafator=1.2.5" : null)
-
-	input:
-	tuple val(sampleId), path(bam), path(bai), path(vcf), val(caller)
-	
-	output:
-	tuple val(sampleId), path("${sampleId}_${caller}_vaf.vcf"), val(caller), emit: annotated_vcf
-
-	script:
-	mq_param = params.vafator_min_mapping_quality != false ? "--mapping-quality " + params.vafator_min_mapping_quality : ""
-	bq_param = params.vafator_min_base_quality != false ? "--base-call-quality " + params.vafator_min_base_quality : ""
-	"""
-	vafator \
-	--input-vcf ${vcf} \
-	--output-vcf ${sampleId}_${caller}_vaf.vcf \
-	--bam vafator ${bam} ${mq_param} ${bq_param}
-	"""
-}
-
-process SNPEFF {
-    cpus 1
-    memory params.memory
-	publishDir "${params.outDir}/${sampleId}/variant_calls/${caller}", mode: "copy"
-	tag "${sampleId}"
-
-	conda (params.enable_conda ? "bioconda::snpeff=5.0 bioconda::samtools=1.16.1" : null)
-
-	input:
-	tuple val(sampleId), path(vcf), val(caller)
-	val(snpeff_data)
-	val(snpeff_config)
-	val(snpeff_organism)
-
-	output:
-	tuple val(sampleId), path("${sampleId}_${caller}_snpeff.vcf.gz"), emit: annotated_vcf
-	path("${sampleId}_${caller}_snpeff.vcf.gz.tbi")
-
-	"""
-	# for some reason the snpEff.config file needs to be in the folder where snpeff runs...
-	cp ${snpeff_config} .
-
-	snpEff eff -Xmx${params.memory} -dataDir ${snpeff_data} \
-	-noStats -no-downstream -no-upstream -no-intergenic -no-intron -onlyProtein -hgvs1LetterAa -noShiftHgvs \
-	${snpeff_organism} ${vcf} | bgzip -c > ${sampleId}_${caller}_snpeff.vcf.gz
-
-	tabix -p vcf ${sampleId}_${caller}_snpeff.vcf.gz
-	"""
-}
-
-
-process VARIANT_VAF_ANNOTATION {
-	cpus 1
-	memory "3 GB"
-	publishDir "${params.outDir}", mode: "copy"
-	tag "${sampleId}"
-
-	conda (params.enable_conda ? "conda-forge::gsl=2.7 bioconda::bcftools=1.14" : null)
-
-	input:
-		tuple val(sampleId), path(vcf)
-
-	output:
-		tuple val(sampleId), path("${sampleId}.lowfreq_subclonal.vcf"), emit: vaf_annotated
-
-	"""
-	tabix -p vcf ${vcf}
-
-	# annotates low frequency and subclonal variants
-	bcftools view -Ob ${vcf} | \
-	bcftools filter \
-	--exclude 'INFO/vafator_af < ${params.low_frequency_variant_threshold}' \
-	--soft-filter LOW_FREQUENCY - | \
-	bcftools filter \
-	--exclude 'INFO/vafator_af >= ${params.low_frequency_variant_threshold} && INFO/vafator_af < ${params.subclonal_variant_threshold}' \
-	--soft-filter SUBCLONAL \
-	--output-type v - | \
-	bcftools filter \
-	--exclude 'INFO/vafator_af >= ${params.subclonal_variant_threshold} && INFO/vafator_af < ${params.lq_clonal_variant_threshold}' \
-	--soft-filter LOW_QUALITY_CLONAL \
-	--output-type v - > ${sampleId}.lowfreq_subclonal.vcf
-	"""
-}
-
-/**
-Add SARS-CoV-2 specific annotations: conservation, Pfam domains and problematic sites.
-Also, according to problematic sites described in DeMaio et al. (2020) we filter out any variants at the beginning and
-end of the genome.
-*/
-process VARIANT_SARSCOV2_ANNOTATION {
-	cpus 1
-	memory "3 GB"
-	publishDir "${params.output}", mode: "copy"
-	tag "${sampleId}"
-
-	conda (params.enable_conda ? "conda-forge::gsl=2.7 bioconda::bcftools=1.14" : null)
-
-	input:
-	tuple val(sampleId), path(vcf)
-
-	output:
-	tuple val(sampleId), path("${sampleId}.annotated_sarscov2.vcf"), emit: annotated_vcfs
-
-	"""
-	bcftools annotate \
-	--annotations ${params.conservation_sarscov2} \
-	--header-lines ${params.conservation_sarscov2_header} \
-	-c CHROM,FROM,TO,CONS_HMM_SARS_COV_2 \
-	--output-type z ${vcf} | \
-	bcftools annotate \
-	--annotations ${params.conservation_sarbecovirus} \
-	--header-lines ${params.conservation_sarbecovirus_header} \
-	-c CHROM,FROM,TO,CONS_HMM_SARBECOVIRUS \
-	--output-type z - | \
-	bcftools annotate \
-	--annotations ${params.conservation_vertebrate} \
-	--header-lines ${params.conservation_vertebrate_header} \
-	-c CHROM,FROM,TO,CONS_HMM_VERTEBRATE_COV \
-	--output-type z - | \
-	bcftools annotate \
-	--annotations ${params.pfam_names} \
-	--header-lines ${params.pfam_names_header} \
-	-c CHROM,FROM,TO,PFAM_NAME \
-	--output-type z - | \
-	bcftools annotate \
-	--annotations ${params.pfam_descriptions} \
-	--header-lines ${params.pfam_descriptions_header} \
-	-c CHROM,FROM,TO,PFAM_DESCRIPTION - | \
-	bcftools filter \
-	--exclude 'POS <= 55 | POS >= 29804' \
-	--output-type z - > annotated_sarscov2.vcf.gz
-
-	tabix -p vcf annotated_sarscov2.vcf.gz
-
-	bcftools annotate \
-	--annotations ${params.problematic_sites} \
-	--columns INFO/problematic:=FILTER annotated_sarscov2.vcf.gz > ${sampleId}.annotated_sarscov2.vcf
-	"""
-}
-
-process PANGOLIN {
-    cpus 1
-    memory "3 GB"
-	publishDir "${params.outDir}", mode: "copy"
-	tag "${sampleId}"
-
-	conda (params.enable_conda ? "bioconda::pangolin=4.1.2" : null)
-
-	input:
-	tuple val(sampleId), path(fasta)
-
-	output:
-	tuple val(name), path("*pangolin.csv")
-
-    """
-    mkdir tmp
-    #--decompress-model
-    pangolin \
-    ${fasta} \
-    --outfile ${sampleId}.pangolin.csv \
-    --tempdir ./tmp \
-    --threads 1
-    """
-}
\ No newline at end of file

From 177bae94c1aaed97ab4d685f78334a14be378ebd Mon Sep 17 00:00:00 2001
From: Pablo Riesgo-Ferreiro <priesgoferreiro@gmail.com>
Date: Thu, 25 Jan 2024 12:40:25 +0000
Subject: [PATCH 3/3] fix indentation

---
 main.nf | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/main.nf b/main.nf
index 34fd95f..1c1b3a3 100755
--- a/main.nf
+++ b/main.nf
@@ -248,28 +248,28 @@ workflow {
         else {
 	    if (params.input_ont) {
 	        FASTQC(input_fastqs)
-		PORECHOP(input_fastqs)
-		CHOPPER(PORECHOP.out.fq)
-		NANOPLOT(CHOPPER.out.fq)
-		input_bams = MINIMAP2(CHOPPER.out.fq).bam
+		    PORECHOP(input_fastqs)
+		    CHOPPER(PORECHOP.out.fq)
+		    NANOPLOT(CHOPPER.out.fq)
+		    input_bams = MINIMAP2(CHOPPER.out.fq).bam
 	    }
 	    else {
-                READ_TRIMMING_SINGLE_END(input_fastqs)
-                ALIGNMENT_SINGLE_END(READ_TRIMMING_SINGLE_END.out[0], reference)
-                bam_files = ALIGNMENT_SINGLE_END.out
+            READ_TRIMMING_SINGLE_END(input_fastqs)
+            ALIGNMENT_SINGLE_END(READ_TRIMMING_SINGLE_END.out[0], reference)
+            bam_files = ALIGNMENT_SINGLE_END.out
         }
 	if (params.input_ont) {
 	    preprocessed_bams = PRIMER_SOFTCLIP(OPTIMUS_NO_PRIME(input_bams).prediction)
 	}
 	else {
-            BAM_PREPROCESSING(bam_files, reference)
-            preprocessed_bams = BAM_PREPROCESSING.out.preprocessed_bams
-
-            if (primers) {
-                PRIMER_TRIMMING_IVAR(preprocessed_bams, primers)
-                preprocessed_bams = PRIMER_TRIMMING_IVAR.out.trimmed_bam
-            }
-            COVERAGE_ANALYSIS(preprocessed_bams)
+        BAM_PREPROCESSING(bam_files, reference)
+        preprocessed_bams = BAM_PREPROCESSING.out.preprocessed_bams
+
+        if (primers) {
+            PRIMER_TRIMMING_IVAR(preprocessed_bams, primers)
+            preprocessed_bams = PRIMER_TRIMMING_IVAR.out.trimmed_bam
+        }
+        COVERAGE_ANALYSIS(preprocessed_bams)
 
         // variant calling
         vcfs_to_normalize = null