From 7d3650e39496d9a9f73e4f39b42a9c9acab95401 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Mon, 9 Sep 2024 16:08:20 +0200 Subject: [PATCH 01/21] Replace local spades modules with official nf-core version (missing fix CPU trick) --- conf/base.config | 174 +++-- conf/modules.config | 644 ++++-------------- modules.json | 5 + modules/local/spades.nf | 51 -- modules/local/spadeshybrid.nf | 49 -- modules/nf-core/spades/environment.yml | 5 + modules/nf-core/spades/main.nf | 98 +++ modules/nf-core/spades/meta.yml | 99 +++ modules/nf-core/spades/tests/main.nf.test | 228 +++++++ .../nf-core/spades/tests/main.nf.test.snap | 403 +++++++++++ modules/nf-core/spades/tests/nextflow.config | 5 + modules/nf-core/spades/tests/tags.yml | 2 + workflows/mag.nf | 18 +- 13 files changed, 1056 insertions(+), 725 deletions(-) delete mode 100644 modules/local/spades.nf delete mode 100644 modules/local/spadeshybrid.nf create mode 100644 modules/nf-core/spades/environment.yml create mode 100644 modules/nf-core/spades/main.nf create mode 100644 modules/nf-core/spades/meta.yml create mode 100644 modules/nf-core/spades/tests/main.nf.test create mode 100644 modules/nf-core/spades/tests/main.nf.test.snap create mode 100644 modules/nf-core/spades/tests/nextflow.config create mode 100644 modules/nf-core/spades/tests/tags.yml diff --git a/conf/base.config b/conf/base.config index 2928a99b..7c168c12 100644 --- a/conf/base.config +++ b/conf/base.config @@ -10,9 +10,9 @@ process { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 7.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { check_max(1 * task.attempt, 'cpus') } + memory = { check_max(7.GB * task.attempt, 'memory') } + time = { check_max(4.h * task.attempt, 'time') } errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } maxRetries = 3 @@ -24,150 +24,148 @@ process { // If possible, it would be nice to keep the same label naming convention when // adding in your local modules too. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors - withLabel:process_single { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + withLabel: process_single { + cpus = { check_max(1, 'cpus') } + memory = { check_max(6.GB * task.attempt, 'memory') } + time = { check_max(4.h * task.attempt, 'time') } } - withLabel:process_low { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 12.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + withLabel: process_low { + cpus = { check_max(2 * task.attempt, 'cpus') } + memory = { check_max(12.GB * task.attempt, 'memory') } + time = { check_max(4.h * task.attempt, 'time') } } - withLabel:process_medium { - cpus = { check_max( 6 * task.attempt, 'cpus' ) } - memory = { check_max( 36.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } + withLabel: process_medium { + cpus = { check_max(6 * task.attempt, 'cpus') } + memory = { check_max(36.GB * task.attempt, 'memory') } + time = { check_max(8.h * task.attempt, 'time') } } - withLabel:process_high { - cpus = { check_max( 12 * task.attempt, 'cpus' ) } - memory = { check_max( 72.GB * task.attempt, 'memory' ) } - time = { check_max( 16.h * task.attempt, 'time' ) } + withLabel: process_high { + cpus = { check_max(12 * task.attempt, 'cpus') } + memory = { check_max(72.GB * task.attempt, 'memory') } + time = { check_max(16.h * task.attempt, 'time') } } - withLabel:process_long { - time = { check_max( 20.h * task.attempt, 'time' ) } + withLabel: process_long { + time = { check_max(20.h * task.attempt, 'time') } } - withLabel:process_high_memory { - memory = { check_max( 200.GB * task.attempt, 'memory' ) } + withLabel: process_high_memory { + memory = { check_max(200.GB * task.attempt, 'memory') } } - withLabel:error_ignore { + withLabel: error_ignore { errorStrategy = 'ignore' } - withLabel:error_retry { + withLabel: error_retry { errorStrategy = 'retry' maxRetries = 2 } withName: BOWTIE2_HOST_REMOVAL_BUILD { - cpus = { check_max (10 * task.attempt, 'cpus' ) } - memory = { check_max (20.GB * task.attempt, 'memory' ) } - time = { check_max (4.h * task.attempt, 'time' ) } + cpus = { check_max(10 * task.attempt, 'cpus') } + memory = { check_max(20.GB * task.attempt, 'memory') } + time = { check_max(4.h * task.attempt, 'time') } } withName: BOWTIE2_HOST_REMOVAL_ALIGN { - cpus = { check_max (10 * task.attempt, 'cpus' ) } - memory = { check_max (10.GB * task.attempt, 'memory' ) } - time = { check_max (6.h * task.attempt, 'time' ) } + cpus = { check_max(10 * task.attempt, 'cpus') } + memory = { check_max(10.GB * task.attempt, 'memory') } + time = { check_max(6.h * task.attempt, 'time') } } withName: BOWTIE2_PHIX_REMOVAL_ALIGN { - cpus = { check_max (4 * task.attempt, 'cpus' ) } - memory = { check_max (8.GB * task.attempt, 'memory' ) } - time = { check_max (6.h * task.attempt, 'time' ) } + cpus = { check_max(4 * task.attempt, 'cpus') } + memory = { check_max(8.GB * task.attempt, 'memory') } + time = { check_max(6.h * task.attempt, 'time') } } withName: PORECHOP_PORECHOP { - cpus = { check_max (4 * task.attempt, 'cpus' ) } - memory = { check_max (30.GB * task.attempt, 'memory' ) } - time = { check_max (4.h * task.attempt, 'time' ) } + cpus = { check_max(4 * task.attempt, 'cpus') } + memory = { check_max(30.GB * task.attempt, 'memory') } + time = { check_max(4.h * task.attempt, 'time') } } withName: NANOLYSE { - cpus = { check_max (2 * task.attempt, 'cpus' ) } - memory = { check_max (10.GB * task.attempt, 'memory' ) } - time = { check_max (3.h * task.attempt, 'time' ) } + cpus = { check_max(2 * task.attempt, 'cpus') } + memory = { check_max(10.GB * task.attempt, 'memory') } + time = { check_max(3.h * task.attempt, 'time') } } //filtlong: exponential increase of memory and time with attempts withName: FILTLONG { - cpus = { check_max (8 * task.attempt , 'cpus' ) } - memory = { check_max (64.GB * (2**(task.attempt-1)), 'memory' ) } - time = { check_max (24.h * (2**(task.attempt-1)), 'time' ) } + cpus = { check_max(8 * task.attempt, 'cpus') } + memory = { check_max(64.GB * (2 ** (task.attempt - 1)), 'memory') } + time = { check_max(24.h * (2 ** (task.attempt - 1)), 'time') } } withName: CENTRIFUGE_CENTRIFUGE { - cpus = { check_max (8 * task.attempt, 'cpus' ) } - memory = { check_max (40.GB * task.attempt, 'memory' ) } - time = { check_max (12.h * task.attempt, 'time' ) } + cpus = { check_max(8 * task.attempt, 'cpus') } + memory = { check_max(40.GB * task.attempt, 'memory') } + time = { check_max(12.h * task.attempt, 'time') } } withName: KRAKEN2 { - cpus = { check_max (8 * task.attempt, 'cpus' ) } - memory = { check_max (40.GB * task.attempt, 'memory' ) } - time = { check_max (12.h * task.attempt, 'time' ) } + cpus = { check_max(8 * task.attempt, 'cpus') } + memory = { check_max(40.GB * task.attempt, 'memory') } + time = { check_max(12.h * task.attempt, 'time') } } withName: KRONA_KTIMPORTTAXONOMY { - cpus = { check_max (8 * task.attempt, 'cpus' ) } - memory = { check_max (20.GB * task.attempt, 'memory' ) } - time = { check_max (12.h * task.attempt, 'time' ) } + cpus = { check_max(8 * task.attempt, 'cpus') } + memory = { check_max(20.GB * task.attempt, 'memory') } + time = { check_max(12.h * task.attempt, 'time') } } withName: CAT_DB_GENERATE { - memory = { check_max (200.GB * task.attempt, 'memory' ) } - time = { check_max (16.h * task.attempt, 'time' ) } + memory = { check_max(200.GB * task.attempt, 'memory') } + time = { check_max(16.h * task.attempt, 'time') } } withName: CAT { - cpus = { check_max (8 * task.attempt, 'cpus' ) } - memory = { check_max (40.GB * task.attempt, 'memory' ) } - time = { check_max (12.h * task.attempt, 'time' ) } + cpus = { check_max(8 * task.attempt, 'cpus') } + memory = { check_max(40.GB * task.attempt, 'memory') } + time = { check_max(12.h * task.attempt, 'time') } } withName: GTDBTK_CLASSIFYWF { - cpus = { check_max (10 * task.attempt, 'cpus' ) } - memory = { check_max (128.GB * task.attempt, 'memory' ) } - time = { check_max (12.h * task.attempt, 'time' ) } + cpus = { check_max(10 * task.attempt, 'cpus') } + memory = { check_max(128.GB * task.attempt, 'memory') } + time = { check_max(12.h * task.attempt, 'time') } } //MEGAHIT returns exit code 250 when running out of memory withName: MEGAHIT { - cpus = { check_megahit_cpus (8, task.attempt ) } - memory = { check_max (40.GB * task.attempt, 'memory' ) } - time = { check_max (16.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137,104,134,139,250] ? 'retry' : 'finish' } + cpus = { check_megahit_cpus(8, task.attempt) } + memory = { check_max(40.GB * task.attempt, 'memory') } + time = { check_max(16.h * task.attempt, 'time') } + errorStrategy = { task.exitStatus in [143, 137, 104, 134, 139, 250] ? 'retry' : 'finish' } } //SPAdes returns error(1) if it runs out of memory (and for other reasons as well...)! //exponential increase of memory and time with attempts, keep number of threads to enable reproducibility - withName: SPADES { - cpus = { check_spades_cpus (10, task.attempt) } - memory = { check_max (64.GB * (2**(task.attempt-1)), 'memory' ) } - time = { check_max (24.h * (2**(task.attempt-1)), 'time' ) } - errorStrategy = { task.exitStatus in [143,137,21,1] ? 'retry' : 'finish' } + withName: METASPADES { + cpus = { check_spades_cpus(10, task.attempt) } + memory = { check_max(64.GB * (2 ** (task.attempt - 1)), 'memory') } + time = { check_max(24.h * (2 ** (task.attempt - 1)), 'time') } + errorStrategy = { task.exitStatus in [143, 137, 21, 1] ? 'retry' : 'finish' } maxRetries = 5 } - withName: SPADESHYBRID { - cpus = { check_spadeshybrid_cpus (10, task.attempt) } - memory = { check_max (64.GB * (2**(task.attempt-1)), 'memory' ) } - time = { check_max (24.h * (2**(task.attempt-1)), 'time' ) } - errorStrategy = { task.exitStatus in [143,137,21,1] ? 'retry' : 'finish' } + withName: METASPADESHYBRID { + cpus = { check_spadeshybrid_cpus(10, task.attempt) } + memory = { check_max(64.GB * (2 ** (task.attempt - 1)), 'memory') } + time = { check_max(24.h * (2 ** (task.attempt - 1)), 'time') } + errorStrategy = { task.exitStatus in [143, 137, 21, 1] ? 'retry' : 'finish' } maxRetries = 5 } //returns exit code 247 when running out of memory withName: BOWTIE2_ASSEMBLY_ALIGN { - cpus = { check_max (2 * task.attempt, 'cpus' ) } - memory = { check_max (8.GB * task.attempt, 'memory' ) } - time = { check_max (8.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137,104,134,139,247] ? 'retry' : 'finish' } + cpus = { check_max(2 * task.attempt, 'cpus') } + memory = { check_max(8.GB * task.attempt, 'memory') } + time = { check_max(8.h * task.attempt, 'time') } + errorStrategy = { task.exitStatus in [143, 137, 104, 134, 139, 247] ? 'retry' : 'finish' } } withName: METABAT2_METABAT2 { - cpus = { check_max (8 * task.attempt, 'cpus' ) } - memory = { check_max (20.GB * task.attempt, 'memory' ) } - time = { check_max (8.h * task.attempt, 'time' ) } + cpus = { check_max(8 * task.attempt, 'cpus') } + memory = { check_max(20.GB * task.attempt, 'memory') } + time = { check_max(8.h * task.attempt, 'time') } } withName: MAG_DEPTHS { - memory = { check_max (16.GB * task.attempt, 'memory' ) } + memory = { check_max(16.GB * task.attempt, 'memory') } } withName: BUSCO { - cpus = { check_max (8 * task.attempt, 'cpus' ) } - memory = { check_max (20.GB * task.attempt, 'memory' ) } + cpus = { check_max(8 * task.attempt, 'cpus') } + memory = { check_max(20.GB * task.attempt, 'memory') } } withName: MAXBIN2 { - // often fails when insufficient information, so we allow it to gracefully fail without failing the pipeline - errorStrategy = { task.exitStatus in [ 1, 255 ] ? 'ignore' : 'retry' } + errorStrategy = { task.exitStatus in [1, 255] ? 'ignore' : 'retry' } } withName: DASTOOL_DASTOOL { - // if SCGs not found, bins cannot be assigned and DAS_tool will die with exit status 1 - errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : task.exitStatus == 1 ? 'ignore' : 'finish' } + errorStrategy = { task.exitStatus in [143, 137, 104, 134, 139] ? 'retry' : task.exitStatus == 1 ? 'ignore' : 'finish' } } } diff --git a/conf/modules.config b/conf/modules.config index 81df5bc8..09a8d5f3 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -13,674 +13,302 @@ process { //default: do not publish into the results folder - publishDir = [ - path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: false - ] + publishDir = [path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: false] withName: FASTQC_RAW { - ext.args = '--quiet' - publishDir = [ - path: { "${params.outdir}/QC_shortreads/fastqc" }, - mode: params.publish_dir_mode, - pattern: "*.html" - ] + ext.args = '--quiet' + publishDir = [path: { "${params.outdir}/QC_shortreads/fastqc" }, mode: params.publish_dir_mode, pattern: "*.html"] ext.prefix = { "${meta.id}_run${meta.run}_raw" } - tag = { "${meta.id}_run${meta.run}_raw" } + tag = { "${meta.id}_run${meta.run}_raw" } } withName: FASTP { - ext.args = [ - "-q ${params.fastp_qualified_quality}", - "--cut_front", - "--cut_tail", - "--cut_mean_quality ${params.fastp_cut_mean_quality}", - "--length_required ${params.reads_minlength}" - ].join(' ').trim() - publishDir = [ - [ - path: { "${params.outdir}/QC_shortreads/fastp/${meta.id}" }, - mode: params.publish_dir_mode, - pattern: "*.{html,json}" - ], - [ - path: { "${params.outdir}/QC_shortreads/fastp/${meta.id}" }, - mode: params.publish_dir_mode, - pattern: "*.fastq.gz", - enabled: params.save_clipped_reads - ] - ] + ext.args = ["-q ${params.fastp_qualified_quality}", "--cut_front", "--cut_tail", "--cut_mean_quality ${params.fastp_cut_mean_quality}", "--length_required ${params.reads_minlength}"].join(' ').trim() + publishDir = [[path: { "${params.outdir}/QC_shortreads/fastp/${meta.id}" }, mode: params.publish_dir_mode, pattern: "*.{html,json}"], [path: { "${params.outdir}/QC_shortreads/fastp/${meta.id}" }, mode: params.publish_dir_mode, pattern: "*.fastq.gz", enabled: params.save_clipped_reads]] ext.prefix = { "${meta.id}_run${meta.run}_fastp" } - tag = { "${meta.id}_run${meta.run}" } + tag = { "${meta.id}_run${meta.run}" } } withName: ADAPTERREMOVAL_PE { - ext.args = [ - "--minlength ${params.reads_minlength}", - "--adapter1 ${params.adapterremoval_adapter1} --adapter2 ${params.adapterremoval_adapter2}", - "--minquality ${params.adapterremoval_minquality} --trimns", - params.adapterremoval_trim_quality_stretch ? "--trim_qualities" : "--trimwindows 4" - ].join(' ').trim() - publishDir = [ - [ - path: { "${params.outdir}/QC_shortreads/adapterremoval/${meta.id}" }, - mode: params.publish_dir_mode, - pattern: "*.{settings}" - ], - [ - path: { "${params.outdir}/QC_shortreads/adapterremoval/${meta.id}" }, - mode: params.publish_dir_mode, - pattern: "*.{truncated,discarded}.gz", - enabled: params.save_clipped_reads - ] - ] + ext.args = ["--minlength ${params.reads_minlength}", "--adapter1 ${params.adapterremoval_adapter1} --adapter2 ${params.adapterremoval_adapter2}", "--minquality ${params.adapterremoval_minquality} --trimns", params.adapterremoval_trim_quality_stretch ? "--trim_qualities" : "--trimwindows 4"].join(' ').trim() + publishDir = [[path: { "${params.outdir}/QC_shortreads/adapterremoval/${meta.id}" }, mode: params.publish_dir_mode, pattern: "*.{settings}"], [path: { "${params.outdir}/QC_shortreads/adapterremoval/${meta.id}" }, mode: params.publish_dir_mode, pattern: "*.{truncated,discarded}.gz", enabled: params.save_clipped_reads]] ext.prefix = { "${meta.id}_run${meta.run}_ar2" } - tag = { "${meta.id}_run${meta.run}" } + tag = { "${meta.id}_run${meta.run}" } } withName: ADAPTERREMOVAL_SE { - ext.args = [ - "--minlength ${params.reads_minlength}", - "--adapter1 ${params.adapterremoval_adapter1}", - "--minquality ${params.adapterremoval_minquality} --trimns", - params.adapterremoval_trim_quality_stretch ? "--trim_qualities" : "--trimwindows 4" - ].join(' ').trim() - publishDir = [ - path: { "${params.outdir}/QC_shortreads/adapterremoval/${meta.id}" }, - mode: params.publish_dir_mode, - pattern: "*.{settings}" - ] + ext.args = ["--minlength ${params.reads_minlength}", "--adapter1 ${params.adapterremoval_adapter1}", "--minquality ${params.adapterremoval_minquality} --trimns", params.adapterremoval_trim_quality_stretch ? "--trim_qualities" : "--trimwindows 4"].join(' ').trim() + publishDir = [path: { "${params.outdir}/QC_shortreads/adapterremoval/${meta.id}" }, mode: params.publish_dir_mode, pattern: "*.{settings}"] ext.prefix = { "${meta.id}_run${meta.run}_ar2" } - tag = { "${meta.id}_run${meta.run}" } + tag = { "${meta.id}_run${meta.run}" } } withName: BOWTIE2_PHIX_REMOVAL_ALIGN { ext.prefix = { "${meta.id}_run${meta.run}_phix_removed" } - publishDir = [ - [ - path: { "${params.outdir}/QC_shortreads/remove_phix" }, - mode: params.publish_dir_mode, - pattern: "*.log" - ], - [ - path: { "${params.outdir}/QC_shortreads/remove_phix" }, - mode: params.publish_dir_mode, - pattern: "*.unmapped*.fastq.gz", - enabled: params.save_phixremoved_reads - ] - ] - tag = { "${meta.id}_run${meta.run}" } + publishDir = [[path: { "${params.outdir}/QC_shortreads/remove_phix" }, mode: params.publish_dir_mode, pattern: "*.log"], [path: { "${params.outdir}/QC_shortreads/remove_phix" }, mode: params.publish_dir_mode, pattern: "*.unmapped*.fastq.gz", enabled: params.save_phixremoved_reads]] + tag = { "${meta.id}_run${meta.run}" } } withName: BOWTIE2_HOST_REMOVAL_ALIGN { - ext.args = params.host_removal_verysensitive ? "--very-sensitive" : "--sensitive" - ext.args2 = params.host_removal_save_ids ? "--host_removal_save_ids" : '' + ext.args = params.host_removal_verysensitive ? "--very-sensitive" : "--sensitive" + ext.args2 = params.host_removal_save_ids ? "--host_removal_save_ids" : '' ext.prefix = { "${meta.id}_run${meta.run}_host_removed" } - publishDir = [ - [ - path: { "${params.outdir}/QC_shortreads/remove_host" }, - mode: params.publish_dir_mode, - pattern: "*{.log,read_ids.txt}" - ], - [ - path: { "${params.outdir}/QC_shortreads/remove_host" }, - mode: params.publish_dir_mode, - pattern: "*.unmapped*.fastq.gz", - enabled: params.save_hostremoved_reads - ] - ] - tag = { "${meta.id}_run${meta.run}" } + publishDir = [[path: { "${params.outdir}/QC_shortreads/remove_host" }, mode: params.publish_dir_mode, pattern: "*{.log,read_ids.txt}"], [path: { "${params.outdir}/QC_shortreads/remove_host" }, mode: params.publish_dir_mode, pattern: "*.unmapped*.fastq.gz", enabled: params.save_hostremoved_reads]] + tag = { "${meta.id}_run${meta.run}" } } withName: FASTQC_TRIMMED { - ext.args = '--quiet' + ext.args = '--quiet' ext.prefix = { "${meta.id}_run${meta.run}_trimmed" } - publishDir = [ - path: { "${params.outdir}/QC_shortreads/fastqc" }, - mode: params.publish_dir_mode, - pattern: "*.html" - ] - tag = { "${meta.id}_run${meta.run}" } + publishDir = [path: { "${params.outdir}/QC_shortreads/fastqc" }, mode: params.publish_dir_mode, pattern: "*.html"] + tag = { "${meta.id}_run${meta.run}" } } withName: BBMAP_BBNORM { - ext.args = [ - params.bbnorm_target ? "target=${params.bbnorm_target}" : '', - params.bbnorm_min ? "min=${params.bbnorm_min}" : '', - ].join(' ').trim() - publishDir = [ - [ - path : { "${params.outdir}/bbmap/bbnorm/logs" }, - enabled: params.save_bbnorm_reads, - mode : params.publish_dir_mode, - pattern: "*.log" - ], - [ - path : { "${params.outdir}/bbmap/bbnorm/"}, - mode : 'copy', - enabled: params.save_bbnorm_reads, - mode : params.publish_dir_mode, - pattern: "*.fastq.gz" - ] - ] + ext.args = [params.bbnorm_target ? "target=${params.bbnorm_target}" : '', params.bbnorm_min ? "min=${params.bbnorm_min}" : ''].join(' ').trim() + publishDir = [[path: { "${params.outdir}/bbmap/bbnorm/logs" }, enabled: params.save_bbnorm_reads, mode: params.publish_dir_mode, pattern: "*.log"], [path: { "${params.outdir}/bbmap/bbnorm/" }, mode: 'copy', enabled: params.save_bbnorm_reads, mode: params.publish_dir_mode, pattern: "*.fastq.gz"]] } withName: PORECHOP_PORECHOP { - publishDir = [ - path: { "${params.outdir}/QC_longreads/porechop" }, - mode: params.publish_dir_mode, - pattern: "*_trimmed.fastq", - enabled: params.save_porechop_reads - ] + publishDir = [path: { "${params.outdir}/QC_longreads/porechop" }, mode: params.publish_dir_mode, pattern: "*_trimmed.fastq", enabled: params.save_porechop_reads] ext.prefix = { "${meta.id}_run${meta.run}_trimmed" } } withName: FILTLONG { - publishDir = [ - path: { "${params.outdir}/QC_longreads/Filtlong" }, - mode: params.publish_dir_mode, - pattern: "*_lr_filtlong.fastq.gz", - enabled: params.save_filtlong_reads - ] + publishDir = [path: { "${params.outdir}/QC_longreads/Filtlong" }, mode: params.publish_dir_mode, pattern: "*_lr_filtlong.fastq.gz", enabled: params.save_filtlong_reads] ext.prefix = { "${meta.id}_run${meta.run}_lengthfiltered" } } withName: NANOLYSE { - publishDir = [ - [ - path: { "${params.outdir}/QC_longreads/NanoLyse" }, - mode: params.publish_dir_mode, - pattern: "*.log" - ], - [ - path: { "${params.outdir}/QC_longreads/NanoLyse" }, - mode: params.publish_dir_mode, - pattern: "*_nanolyse.fastq.gz", - enabled: params.save_lambdaremoved_reads - ] - ] + publishDir = [[path: { "${params.outdir}/QC_longreads/NanoLyse" }, mode: params.publish_dir_mode, pattern: "*.log"], [path: { "${params.outdir}/QC_longreads/NanoLyse" }, mode: params.publish_dir_mode, pattern: "*_nanolyse.fastq.gz", enabled: params.save_lambdaremoved_reads]] ext.prefix = { "${meta.id}_run${meta.run}_lambdafiltered" } } withName: NANOPLOT_RAW { ext.prefix = 'raw' - ext.args = { [ - "-p raw_", - "--title ${meta.id}_raw", - "-c darkblue", - ].join(' ').trim() } - publishDir = [ - path: { "${params.outdir}/QC_longreads/NanoPlot/${meta.id}" }, - mode: params.publish_dir_mode, - pattern: "*.{png,html,txt}" - ] + ext.args = { ["-p raw_", "--title ${meta.id}_raw", "-c darkblue"].join(' ').trim() } + publishDir = [path: { "${params.outdir}/QC_longreads/NanoPlot/${meta.id}" }, mode: params.publish_dir_mode, pattern: "*.{png,html,txt}"] } withName: NANOPLOT_FILTERED { - ext.args = { [ - "-p filtered_", - "--title ${meta.id}_filtered", - "-c darkblue", - ].join(' ').trim() } - publishDir = [ - path: { "${params.outdir}/QC_longreads/NanoPlot/${meta.id}" }, - mode: params.publish_dir_mode, - pattern: "*.{png,html,txt}" - ] + ext.args = { ["-p filtered_", "--title ${meta.id}_filtered", "-c darkblue"].join(' ').trim() } + publishDir = [path: { "${params.outdir}/QC_longreads/NanoPlot/${meta.id}" }, mode: params.publish_dir_mode, pattern: "*.{png,html,txt}"] } withName: CENTRIFUGE_CENTRIFUGE { - publishDir = [ - path: { "${params.outdir}/Taxonomy/centrifuge/${meta.id}" }, - mode: params.publish_dir_mode, - pattern: "*.txt" - ] + publishDir = [path: { "${params.outdir}/Taxonomy/centrifuge/${meta.id}" }, mode: params.publish_dir_mode, pattern: "*.txt"] } withName: CENTRIFUGE_KREPORT { ext.prefix = { "${meta.id}_kreport" } - publishDir = [ - path: { "${params.outdir}/Taxonomy/centrifuge/${meta.id}" }, - mode: params.publish_dir_mode, - pattern: "*.txt" - ] + publishDir = [path: { "${params.outdir}/Taxonomy/centrifuge/${meta.id}" }, mode: params.publish_dir_mode, pattern: "*.txt"] } withName: KRAKEN2 { - ext.args = '--quiet' - publishDir = [ - path: { "${params.outdir}/Taxonomy/kraken2/${meta.id}" }, - mode: params.publish_dir_mode, - pattern: "*.txt" - ] + ext.args = '--quiet' + publishDir = [path: { "${params.outdir}/Taxonomy/kraken2/${meta.id}" }, mode: params.publish_dir_mode, pattern: "*.txt"] } withName: KREPORT2KRONA_CENTRIFUGE { - publishDir = [ - path: { "${params.outdir}/Taxonomy/${meta.classifier}/${meta.id}" }, - mode: params.publish_dir_mode, - pattern: "*.txt", - enabled: false - ] + publishDir = [path: { "${params.outdir}/Taxonomy/${meta.classifier}/${meta.id}" }, mode: params.publish_dir_mode, pattern: "*.txt", enabled: false] } withName: KRONA_KTIMPORTTAXONOMY { - publishDir = [ - path: { "${params.outdir}/Taxonomy/${meta.classifier}/${meta.id}" }, - mode: params.publish_dir_mode, - pattern: "*.html" - ] + publishDir = [path: { "${params.outdir}/Taxonomy/${meta.classifier}/${meta.id}" }, mode: params.publish_dir_mode, pattern: "*.html"] } //pattern: "*.{fa.gz,log}" //'pattern' didnt work, probably because the output is in a folder, solved with 'saveAs' withName: MEGAHIT { - ext.args = params.megahit_options ?: '' - publishDir = [ - path: { "${params.outdir}/Assembly" }, - mode: params.publish_dir_mode, - saveAs: { - filename -> filename.equals('versions.yml') ? null : - filename.indexOf('.contigs.fa.gz') > 0 ? filename : - filename.indexOf('.log') > 0 ? filename : null } - ] - } - - withName: SPADES { - ext.args = params.spades_options ?: '' - publishDir = [ - path: { "${params.outdir}/Assembly/SPAdes" }, - mode: params.publish_dir_mode, - pattern: "*.{fasta.gz,gfa.gz,log}" - ] - } - - withName: SPADESHYBRID { - ext.args = params.spades_options ?: '' - publishDir = [ - path: { "${params.outdir}/Assembly/SPAdesHybrid" }, - mode: params.publish_dir_mode, - pattern: "*.{fasta.gz,gfa.gz,log}" - ] + ext.args = params.megahit_options ?: '' + publishDir = [path: { "${params.outdir}/Assembly" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename.indexOf('.contigs.fa.gz') > 0 ? filename : filename.indexOf('.log') > 0 ? filename : null }] + } + + withName: METASPADES { + ext.args = params.spades_options ?: '' + ext.prefix = { "SPAdes-${meta.id}" } + publishDir = [path: { "${params.outdir}/Assembly/SPAdes" }, mode: params.publish_dir_mode, pattern: "*.{fasta.gz,gfa.gz,fa.gz,log}"] + } + + withName: METASPADESHYBRID { + ext.args = params.spades_options ?: '' + ext.prefix = { "SPAdesHybrid-${meta.id}" } + publishDir = [path: { "${params.outdir}/Assembly/SPAdesHybrid" }, mode: params.publish_dir_mode, pattern: "*.{fasta.gz,gfa.gz,fa.gz,log}"] } withName: QUAST { - publishDir = [ - path: { "${params.outdir}/Assembly/${meta.assembler}/QC/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + publishDir = [path: { "${params.outdir}/Assembly/${meta.assembler}/QC/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }] } withName: GENOMAD_ENDTOEND { - ext.args = [ - "--cleanup", - "--min-score ${params.genomad_min_score}", - "--splits ${params.genomad_splits}", - ].join(' ').trim() - publishDir = [ - path: { "${params.outdir}/VirusIdentification/geNomad/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + ext.args = ["--cleanup", "--min-score ${params.genomad_min_score}", "--splits ${params.genomad_splits}"].join(' ').trim() + publishDir = [path: { "${params.outdir}/VirusIdentification/geNomad/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }] } withName: BOWTIE2_ASSEMBLY_ALIGN { - ext.args = params.bowtie2_mode ? params.bowtie2_mode : params.ancient_dna ? '--very-sensitive -N 1' : '' + ext.args = params.bowtie2_mode ? params.bowtie2_mode : params.ancient_dna ? '--very-sensitive -N 1' : '' ext.prefix = { "${meta.id}.assembly" } - publishDir = [ - [ - path: { "${params.outdir}/Assembly/${assembly_meta.assembler}/QC/${assembly_meta.id}" }, - mode: params.publish_dir_mode, - pattern: "*.log" - ], - [ - path: { "${params.outdir}/Assembly/${assembly_meta.assembler}/QC/${assembly_meta.id}" }, - mode: params.publish_dir_mode, - pattern: "*.{bam,bai}", - enabled: params.save_assembly_mapped_reads - ], - ] + publishDir = [[path: { "${params.outdir}/Assembly/${assembly_meta.assembler}/QC/${assembly_meta.id}" }, mode: params.publish_dir_mode, pattern: "*.log"], [path: { "${params.outdir}/Assembly/${assembly_meta.assembler}/QC/${assembly_meta.id}" }, mode: params.publish_dir_mode, pattern: "*.{bam,bai}", enabled: params.save_assembly_mapped_reads]] } withName: 'MAG_DEPTHS_PLOT|MAG_DEPTHS_SUMMARY' { - publishDir = [ - path: { "${params.outdir}/GenomeBinning/depths/bins" }, - mode: params.publish_dir_mode, - pattern: "*.{png,tsv}" - ] + publishDir = [path: { "${params.outdir}/GenomeBinning/depths/bins" }, mode: params.publish_dir_mode, pattern: "*.{png,tsv}"] } - withName: 'BIN_SUMMARY' { - publishDir = [ - path: { "${params.outdir}/GenomeBinning" }, - mode: params.publish_dir_mode, - pattern: "*.{png,tsv}" - ] + withName: BIN_SUMMARY { + publishDir = [path: { "${params.outdir}/GenomeBinning" }, mode: params.publish_dir_mode, pattern: "*.{png,tsv}"] } withName: BUSCO_DB_PREPARATION { - publishDir = [ - path: { "${params.outdir}/GenomeBinning/QC/BUSCO" }, - mode: params.publish_dir_mode, - pattern: "*.tar.gz" - ] + publishDir = [path: { "${params.outdir}/GenomeBinning/QC/BUSCO" }, mode: params.publish_dir_mode, pattern: "*.tar.gz"] } - withName: 'BUSCO' { - ext.args = [ - params.busco_db ? '--offline' : '' - ].join(' ').trim() - publishDir = [ - path: { "${params.outdir}/GenomeBinning/QC/BUSCO" }, - mode: params.publish_dir_mode, - pattern: "*.{log,err,faa.gz,fna.gz,gff,txt}" - ] + withName: BUSCO { + ext.args = [params.busco_db ? '--offline' : ''].join(' ').trim() + publishDir = [path: { "${params.outdir}/GenomeBinning/QC/BUSCO" }, mode: params.publish_dir_mode, pattern: "*.{log,err,faa.gz,fna.gz,gff,txt}"] } withName: BUSCO_SAVE_DOWNLOAD { - publishDir = [ - path: { "${params.outdir}/GenomeBinning/QC/BUSCO" }, - mode: params.publish_dir_mode, - overwrite: false, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + publishDir = [path: { "${params.outdir}/GenomeBinning/QC/BUSCO" }, mode: params.publish_dir_mode, overwrite: false, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }] } withName: 'BUSCO_SUMMARY|QUAST_BINS|QUAST_BINS_SUMMARY' { - publishDir = [ - path: { "${params.outdir}/GenomeBinning/QC" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + publishDir = [path: { "${params.outdir}/GenomeBinning/QC" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }] } withName: ARIA2_UNTAR { - publishDir = [ - path: { "${params.outdir}/GenomeBinning/QC/CheckM/checkm_downloads" }, - mode: params.publish_dir_mode, - overwrite: false, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_checkm_data - ] + publishDir = [path: { "${params.outdir}/GenomeBinning/QC/CheckM/checkm_downloads" }, mode: params.publish_dir_mode, overwrite: false, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: params.save_checkm_data] } - withName: 'CHECKM_LINEAGEWF' { - tag = { "${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}" } + withName: CHECKM_LINEAGEWF { + tag = { "${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}" } ext.prefix = { "${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}_wf" } - publishDir = [ - path: { "${params.outdir}/GenomeBinning/QC/CheckM" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + publishDir = [path: { "${params.outdir}/GenomeBinning/QC/CheckM" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }] } - withName: 'CHECKM_QA' { + withName: CHECKM_QA { ext.prefix = { "${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}_qa" } - ext.args = "-o 2 --tab_table" - publishDir = [ - path: { "${params.outdir}/GenomeBinning/QC/CheckM" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + ext.args = "-o 2 --tab_table" + publishDir = [path: { "${params.outdir}/GenomeBinning/QC/CheckM" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }] } - withName: 'COMBINE_CHECKM_TSV' { + withName: COMBINE_CHECKM_TSV { ext.prefix = { "checkm_summary" } - publishDir = [ - path: { "${params.outdir}/GenomeBinning/QC" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + publishDir = [path: { "${params.outdir}/GenomeBinning/QC" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }] } - withName: 'GUNC_DOWNLOADDB' { - publishDir = [ - path: { "${params.outdir}/GenomeBinning/QC/GUNC" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.gunc_save_db - ] + withName: GUNC_DOWNLOADDB { + publishDir = [path: { "${params.outdir}/GenomeBinning/QC/GUNC" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: params.gunc_save_db] } // Make sure to keep directory in sync with gunc_qc.nf - withName: 'GUNC_RUN' { - publishDir = [ - path: { "${params.outdir}/GenomeBinning/QC/GUNC/raw/${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}/${fasta.baseName}/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + withName: GUNC_RUN { + publishDir = [path: { "${params.outdir}/GenomeBinning/QC/GUNC/raw/${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}/${fasta.baseName}/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }] } // Make sure to keep directory in sync with gunc_qc.nf - withName: 'GUNC_MERGECHECKM' { - publishDir = [ - path: { "${params.outdir}/GenomeBinning/QC/GUNC/checkmmerged/${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}/${checkm_file.baseName}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + withName: GUNC_MERGECHECKM { + publishDir = [path: { "${params.outdir}/GenomeBinning/QC/GUNC/checkmmerged/${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}/${checkm_file.baseName}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }] } withName: CAT_DB_GENERATE { - publishDir = [ - path: { "${params.outdir}/Taxonomy/CAT" }, - mode: params.publish_dir_mode, - pattern: "*.tar.gz" - ] + publishDir = [path: { "${params.outdir}/Taxonomy/CAT" }, mode: params.publish_dir_mode, pattern: "*.tar.gz"] } withName: CAT { - publishDir = [ - path: { "${params.outdir}/Taxonomy/CAT/${meta.assembler}/${meta.binner}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + publishDir = [path: { "${params.outdir}/Taxonomy/CAT/${meta.assembler}/${meta.binner}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }] } withName: CAT_SUMMARY { ext.prefix = "cat_summary" - publishDir = [ - path: { "${params.outdir}/Taxonomy/CAT/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + publishDir = [path: { "${params.outdir}/Taxonomy/CAT/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }] } withName: GTDBTK_CLASSIFYWF { ext.args = "--extension fa" ext.prefix = { "${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}" } - publishDir = [ - path: { "${params.outdir}/Taxonomy/GTDB-Tk/${meta.assembler}/${meta.binner}/${meta.id}" }, - mode: params.publish_dir_mode, - pattern: "*.{log,tasv,tree.gz,fasta,fasta.gz}" - ] + publishDir = [path: { "${params.outdir}/Taxonomy/GTDB-Tk/${meta.assembler}/${meta.binner}/${meta.id}" }, mode: params.publish_dir_mode, pattern: "*.{log,tasv,tree.gz,fasta,fasta.gz}"] } withName: GTDBTK_SUMMARY { ext.args = "--extension fa" - publishDir = [ - path: { "${params.outdir}/Taxonomy/GTDB-Tk" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + publishDir = [path: { "${params.outdir}/Taxonomy/GTDB-Tk" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }] } withName: PROKKA { ext.args = "--metagenome" - publishDir = [ - path: { "${params.outdir}/Annotation/Prokka/${meta.assembler}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + publishDir = [path: { "${params.outdir}/Annotation/Prokka/${meta.assembler}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }] } withName: PRODIGAL { ext.args = "-p meta" - publishDir = [ - path: { "${params.outdir}/Annotation/Prodigal/${meta.assembler}/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + publishDir = [path: { "${params.outdir}/Annotation/Prodigal/${meta.assembler}/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }] } withName: FREEBAYES { ext.prefix = { "${meta.assembler}-${meta.id}" } ext.args = "-p ${params.freebayes_ploidy} -q ${params.freebayes_min_basequality} -F ${params.freebayes_minallelefreq}" - publishDir = [ - path: { "${params.outdir}/Ancient_DNA/variant_calling/freebayes" }, - mode: params.publish_dir_mode, - pattern: "*.vcf.gz" - ] + publishDir = [path: { "${params.outdir}/Ancient_DNA/variant_calling/freebayes" }, mode: params.publish_dir_mode, pattern: "*.vcf.gz"] } withName: BCFTOOLS_VIEW { ext.prefix = { "${meta.assembler}-${meta.id}.filtered" } ext.args = "-v snps,mnps -i 'QUAL>=${params.bcftools_view_high_variant_quality} || (QUAL>=${params.bcftools_view_medium_variant_quality} && FORMAT/AO>=${params.bcftools_view_minimal_allelesupport})'" - publishDir = [ - path: { "${params.outdir}/Ancient_DNA/variant_calling/filtered" }, - mode: params.publish_dir_mode, - pattern: "*.vcf.gz" - ] + publishDir = [path: { "${params.outdir}/Ancient_DNA/variant_calling/filtered" }, mode: params.publish_dir_mode, pattern: "*.vcf.gz"] } withName: BCFTOOLS_CONSENSUS { ext.prefix = { "${meta.assembler}-${meta.id}" } - publishDir = [ - path: {"${params.outdir}/Ancient_DNA/variant_calling/consensus" }, - mode: params.publish_dir_mode, - pattern: "*.fa" - ] + publishDir = [path: { "${params.outdir}/Ancient_DNA/variant_calling/consensus" }, mode: params.publish_dir_mode, pattern: "*.fa"] } withName: BCFTOOLS_INDEX { ext.prefix = { "${meta.assembler}-${meta.id}" } ext.args = "-t" - publishDir = [ - path: {"${params.outdir}/Ancient_DNA/variant_calling/index" }, - mode: params.publish_dir_mode, - enabled: false - ] + publishDir = [path: { "${params.outdir}/Ancient_DNA/variant_calling/index" }, mode: params.publish_dir_mode, enabled: false] } withName: PYDAMAGE_ANALYZE { ext.prefix = { "${meta.assembler}-${meta.id}" } - publishDir = [ - path: {"${params.outdir}/Ancient_DNA/pydamage/analyze/${meta.assembler}-${meta.id}/" }, - mode: params.publish_dir_mode - ] + publishDir = [path: { "${params.outdir}/Ancient_DNA/pydamage/analyze/${meta.assembler}-${meta.id}/" }, mode: params.publish_dir_mode] } withName: PYDAMAGE_FILTER { ext.prefix = { "${meta.assembler}-${meta.id}" } ext.args = "-t ${params.pydamage_accuracy}" - publishDir = [ - path: {"${params.outdir}/Ancient_DNA/pydamage/filter/${meta.assembler}-${meta.id}/" }, - mode: params.publish_dir_mode - ] + publishDir = [path: { "${params.outdir}/Ancient_DNA/pydamage/filter/${meta.assembler}-${meta.id}/" }, mode: params.publish_dir_mode] } withName: SAMTOOLS_FAIDX { ext.prefix = { "${meta.assembler}-${meta.id}" } - publishDir = [ - path: {"${params.outdir}/Ancient_DNA/samtools/faidx" }, - mode: params.publish_dir_mode, - enabled: false - ] + publishDir = [path: { "${params.outdir}/Ancient_DNA/samtools/faidx" }, mode: params.publish_dir_mode, enabled: false] } withName: METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS { - publishDir = [ - path: { "${params.outdir}/GenomeBinning/depths/contigs" }, - mode: params.publish_dir_mode, - pattern: '*-depth.txt.gz' - ] + publishDir = [path: { "${params.outdir}/GenomeBinning/depths/contigs" }, mode: params.publish_dir_mode, pattern: '*-depth.txt.gz'] ext.prefix = { "${meta.assembler}-${meta.id}-depth" } } withName: METABAT2_METABAT2 { - publishDir = [ - [ - path: { "${params.outdir}/GenomeBinning/MetaBAT2/bins/" }, - mode: params.publish_dir_mode, - pattern: '*[!lowDepth|tooShort|unbinned].fa.gz' - ], - [ - path: { "${params.outdir}/GenomeBinning/MetaBAT2/discarded" }, - mode: params.publish_dir_mode, - pattern: '*tooShort.fa.gz' - ], - [ - path: { "${params.outdir}/GenomeBinning/MetaBAT2/discarded" }, - mode: params.publish_dir_mode, - pattern: '*lowDepth.fa.gz' - ] - ] + publishDir = [[path: { "${params.outdir}/GenomeBinning/MetaBAT2/bins/" }, mode: params.publish_dir_mode, pattern: '*[!lowDepth|tooShort|unbinned].fa.gz'], [path: { "${params.outdir}/GenomeBinning/MetaBAT2/discarded" }, mode: params.publish_dir_mode, pattern: '*tooShort.fa.gz'], [path: { "${params.outdir}/GenomeBinning/MetaBAT2/discarded" }, mode: params.publish_dir_mode, pattern: '*lowDepth.fa.gz']] ext.prefix = { "${meta.assembler}-MetaBAT2-${meta.id}" } - ext.args = [ - params.min_contig_size < 1500 ? "-m 1500" : "-m ${params.min_contig_size}", - "--unbinned", - "--seed ${params.metabat_rng_seed}" - ].join(' ').trim() + ext.args = [params.min_contig_size < 1500 ? "-m 1500" : "-m ${params.min_contig_size}", "--unbinned", "--seed ${params.metabat_rng_seed}"].join(' ').trim() } withName: MAXBIN2 { - publishDir = [ - [ - path: { "${params.outdir}/GenomeBinning/MaxBin2/discarded" }, - mode: params.publish_dir_mode, - pattern: '*.tooshort.gz' - ], - ] + publishDir = [[path: { "${params.outdir}/GenomeBinning/MaxBin2/discarded" }, mode: params.publish_dir_mode, pattern: '*.tooshort.gz']] ext.prefix = { "${meta.assembler}-MaxBin2-${meta.id}" } - // if no gene found, will crash so allow ignore so rest of pipeline - // completes but without MaxBin2 results } withName: ADJUST_MAXBIN2_EXT { - publishDir = [ - [ - path: { "${params.outdir}/GenomeBinning/MaxBin2/bins/" }, - mode: params.publish_dir_mode, - pattern: '*.fa.gz' - ], - ] - } - - withName: "CONCOCT_.*" { - publishDir = [ - [ - path: { "${params.outdir}/GenomeBinning/CONCOCT/stats/" }, - mode: params.publish_dir_mode, - pattern: "*.{txt,csv,tsv}" - ], - [ - path: { "${params.outdir}/GenomeBinning/CONCOCT/bins" }, - mode: params.publish_dir_mode, - saveAs: { filename -> new File(filename).getName() }, - pattern: "*/*.fa.gz" - ] - ] + publishDir = [[path: { "${params.outdir}/GenomeBinning/MaxBin2/bins/" }, mode: params.publish_dir_mode, pattern: '*.fa.gz']] + } + + withName: 'CONCOCT_.*' { + publishDir = [[path: { "${params.outdir}/GenomeBinning/CONCOCT/stats/" }, mode: params.publish_dir_mode, pattern: "*.{txt,csv,tsv}"], [path: { "${params.outdir}/GenomeBinning/CONCOCT/bins" }, mode: params.publish_dir_mode, saveAs: { filename -> new File(filename).getName() }, pattern: "*/*.fa.gz"]] ext.prefix = { "${meta.assembler}-CONCOCT-${meta.id}" } } withName: SPLIT_FASTA { - publishDir = [ - [ - path: { "${params.outdir}/GenomeBinning/${meta.binner}/unbinned" }, - mode: params.publish_dir_mode, - pattern: '*.*[0-9].fa.gz' - ], - [ - path: { "${params.outdir}/GenomeBinning/${meta.binner}/unbinned/discarded" }, - mode: params.publish_dir_mode, - pattern: '*.pooled.fa.gz' - ], - [ - path: { "${params.outdir}/GenomeBinning/${meta.binner}/unbinned/discarded" }, - mode: params.publish_dir_mode, - pattern: '*.remaining.fa.gz' - ] - ] + publishDir = [[path: { "${params.outdir}/GenomeBinning/${meta.binner}/unbinned" }, mode: params.publish_dir_mode, pattern: '*.*[0-9].fa.gz'], [path: { "${params.outdir}/GenomeBinning/${meta.binner}/unbinned/discarded" }, mode: params.publish_dir_mode, pattern: '*.pooled.fa.gz'], [path: { "${params.outdir}/GenomeBinning/${meta.binner}/unbinned/discarded" }, mode: params.publish_dir_mode, pattern: '*.remaining.fa.gz']] } withName: DASTOOL_FASTATOCONTIG2BIN_METABAT2 { @@ -700,84 +328,44 @@ process { } withName: DASTOOL_DASTOOL { - publishDir = [ - [ - path: { "${params.outdir}/GenomeBinning/DASTool" }, - mode: params.publish_dir_mode, - pattern: '*.{tsv,log,eval,seqlength}' - ], - ] + publishDir = [[path: { "${params.outdir}/GenomeBinning/DASTool" }, mode: params.publish_dir_mode, pattern: '*.{tsv,log,eval,seqlength}']] ext.prefix = { "${meta.assembler}-DASTool-${meta.id}" } - ext.args = "--write_bins --write_unbinned --write_bin_evals --score_threshold ${params.refine_bins_dastool_threshold}" + ext.args = "--write_bins --write_unbinned --write_bin_evals --score_threshold ${params.refine_bins_dastool_threshold}" } withName: RENAME_POSTDASTOOL { - publishDir = [ - [ - path: { "${params.outdir}/GenomeBinning/DASTool/unbinned" }, - mode: params.publish_dir_mode, - pattern: '*-DASToolUnbinned-*.fa' - ], - [ - path: { "${params.outdir}/GenomeBinning/DASTool/bins" }, - mode: params.publish_dir_mode, - // pattern needs to be updated in case of new binning methods - pattern: '*-{MetaBAT2,MaxBin2,CONCOCT}Refined-*.fa' - ] - ] + publishDir = [[path: { "${params.outdir}/GenomeBinning/DASTool/unbinned" }, mode: params.publish_dir_mode, pattern: '*-DASToolUnbinned-*.fa'], [path: { "${params.outdir}/GenomeBinning/DASTool/bins" }, mode: params.publish_dir_mode, pattern: '*-{MetaBAT2,MaxBin2,CONCOCT}Refined-*.fa']] } withName: TIARA_TIARA { - publishDir = [ - path: { "${params.outdir}/Taxonomy/Tiara/" }, - mode: params.publish_dir_mode, - pattern: "*.txt" - ] - ext.args = { "--min_len ${params.tiara_min_length} --probabilities" } + publishDir = [path: { "${params.outdir}/Taxonomy/Tiara/" }, mode: params.publish_dir_mode, pattern: "*.txt"] + ext.args = { "--min_len ${params.tiara_min_length} --probabilities" } ext.prefix = { "${meta.assembler}-${meta.id}.tiara" } } withName: TIARA_CLASSIFY { - ext.args = { "--join_prokaryotes --assembler ${meta.assembler}" } + ext.args = { "--join_prokaryotes --assembler ${meta.assembler}" } ext.prefix = { "${meta.assembler}-${meta.binner}-${meta.bin}-${meta.id}" } } withName: TIARA_SUMMARY { - publishDir = [ - path: { "${params.outdir}/GenomeBinning/Tiara" }, - mode: params.publish_dir_mode, - pattern: "tiara_summary.tsv" - ] + publishDir = [path: { "${params.outdir}/GenomeBinning/Tiara" }, mode: params.publish_dir_mode, pattern: "tiara_summary.tsv"] ext.prefix = "tiara_summary" } withName: MMSEQS_DATABASES { ext.prefix = { "${params.metaeuk_mmseqs_db.replaceAll("/", "-")}" } - publishDir = [ - path: { "${params.outdir}/Annotation/mmseqs_db/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_mmseqs_db - ] + publishDir = [path: { "${params.outdir}/Annotation/mmseqs_db/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: params.save_mmseqs_db] } withName: METAEUK_EASYPREDICT { ext.args = "" ext.prefix = { "${meta.id}" } - publishDir = [ - path: { "${params.outdir}/Annotation/MetaEuk/${meta.assembler}/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + publishDir = [path: { "${params.outdir}/Annotation/MetaEuk/${meta.assembler}/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }] } - withName: 'MULTIQC' { - ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } - publishDir = [ - path: { "${params.outdir}/multiqc" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + withName: MULTIQC { + ext.args = { params.multiqc_title ? "--title ${params.multiqc_title}" : '' } + publishDir = [path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }] } - } diff --git a/modules.json b/modules.json index 0cab4e4e..d2009658 100644 --- a/modules.json +++ b/modules.json @@ -237,6 +237,11 @@ "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, + "spades": { + "branch": "master", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": ["modules"] + }, "tiara/tiara": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", diff --git a/modules/local/spades.nf b/modules/local/spades.nf deleted file mode 100644 index 9ef7ec77..00000000 --- a/modules/local/spades.nf +++ /dev/null @@ -1,51 +0,0 @@ -process SPADES { - tag "$meta.id" - - conda "bioconda::spades=3.15.3" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/spades:3.15.3--h95f258a_0' : - 'biocontainers/spades:3.15.3--h95f258a_0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("SPAdes-${meta.id}_scaffolds.fasta"), emit: assembly - path "SPAdes-${meta.id}.log" , emit: log - path "SPAdes-${meta.id}_contigs.fasta.gz" , emit: contigs_gz - path "SPAdes-${meta.id}_scaffolds.fasta.gz" , emit: assembly_gz - path "SPAdes-${meta.id}_graph.gfa.gz" , emit: graph - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - maxmem = task.memory.toGiga() - // The -s option is not supported for metaspades. Each time this is called with `meta.single_end` it's because - // read depth was normalized with BBNorm, which actually outputs pairs, but in an interleaved file. - def readstr = meta.single_end ? "--12 ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}" - - if ( params.spades_fix_cpus == -1 || task.cpus == params.spades_fix_cpus ) - """ - metaspades.py \ - $args \ - --threads "${task.cpus}" \ - --memory $maxmem \ - ${readstr} \ - -o spades - mv spades/assembly_graph_with_scaffolds.gfa SPAdes-${meta.id}_graph.gfa - mv spades/scaffolds.fasta SPAdes-${meta.id}_scaffolds.fasta - mv spades/contigs.fasta SPAdes-${meta.id}_contigs.fasta - mv spades/spades.log SPAdes-${meta.id}.log - gzip "SPAdes-${meta.id}_contigs.fasta" - gzip "SPAdes-${meta.id}_graph.gfa" - gzip -c "SPAdes-${meta.id}_scaffolds.fasta" > "SPAdes-${meta.id}_scaffolds.fasta.gz" - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version 2>&1 | sed 's/Python //g') - metaspades: \$(metaspades.py --version | sed "s/SPAdes genome assembler v//; s/ \\[.*//") - END_VERSIONS - """ - else - error "ERROR: '--spades_fix_cpus' was specified, but not succesfully applied. Likely this is caused by changed process properties in a custom config file." -} diff --git a/modules/local/spadeshybrid.nf b/modules/local/spadeshybrid.nf deleted file mode 100644 index 13578a69..00000000 --- a/modules/local/spadeshybrid.nf +++ /dev/null @@ -1,49 +0,0 @@ -process SPADESHYBRID { - tag "$meta.id" - - conda "bioconda::spades=3.15.3" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/spades:3.15.3--h95f258a_0' : - 'biocontainers/spades:3.15.3--h95f258a_0' }" - - input: - tuple val(meta), path(long_reads), path(short_reads) - - output: - tuple val(meta), path("SPAdesHybrid-${meta.id}_scaffolds.fasta"), emit: assembly - path "SPAdesHybrid-${meta.id}.log" , emit: log - path "SPAdesHybrid-${meta.id}_contigs.fasta.gz" , emit: contigs_gz - path "SPAdesHybrid-${meta.id}_scaffolds.fasta.gz" , emit: assembly_gz - path "SPAdesHybrid-${meta.id}_graph.gfa.gz" , emit: graph - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - maxmem = task.memory.toGiga() - if ( params.spadeshybrid_fix_cpus == -1 || task.cpus == params.spadeshybrid_fix_cpus ) - """ - metaspades.py \ - $args \ - --threads "${task.cpus}" \ - --memory $maxmem \ - --pe1-1 ${short_reads[0]} \ - --pe1-2 ${short_reads[1]} \ - --nanopore ${long_reads} \ - -o spades - mv spades/assembly_graph_with_scaffolds.gfa SPAdesHybrid-${meta.id}_graph.gfa - mv spades/scaffolds.fasta SPAdesHybrid-${meta.id}_scaffolds.fasta - mv spades/contigs.fasta SPAdesHybrid-${meta.id}_contigs.fasta - mv spades/spades.log SPAdesHybrid-${meta.id}.log - gzip "SPAdesHybrid-${meta.id}_contigs.fasta" - gzip "SPAdesHybrid-${meta.id}_graph.gfa" - gzip -c "SPAdesHybrid-${meta.id}_scaffolds.fasta" > "SPAdesHybrid-${meta.id}_scaffolds.fasta.gz" - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version 2>&1 | sed 's/Python //g') - metaspades: \$(metaspades.py --version | sed "s/SPAdes genome assembler v//; s/ \\[.*//") - END_VERSIONS - """ - else - error "ERROR: '--spadeshybrid_fix_cpus' was specified, but not succesfully applied. Likely this is caused by changed process properties in a custom config file." -} diff --git a/modules/nf-core/spades/environment.yml b/modules/nf-core/spades/environment.yml new file mode 100644 index 00000000..8cc5321f --- /dev/null +++ b/modules/nf-core/spades/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::spades=4.0.0 diff --git a/modules/nf-core/spades/main.nf b/modules/nf-core/spades/main.nf new file mode 100644 index 00000000..81b36809 --- /dev/null +++ b/modules/nf-core/spades/main.nf @@ -0,0 +1,98 @@ +process SPADES { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/spades:4.0.0--h5fb382e_1' : + 'biocontainers/spades:4.0.0--h5fb382e_1' }" + + input: + tuple val(meta), path(illumina), path(pacbio), path(nanopore) + path yml + path hmm + + output: + tuple val(meta), path('*.scaffolds.fa.gz') , optional:true, emit: scaffolds + tuple val(meta), path('*.contigs.fa.gz') , optional:true, emit: contigs + tuple val(meta), path('*.transcripts.fa.gz') , optional:true, emit: transcripts + tuple val(meta), path('*.gene_clusters.fa.gz'), optional:true, emit: gene_clusters + tuple val(meta), path('*.assembly.gfa.gz') , optional:true, emit: gfa + tuple val(meta), path('warnings.log') , optional:true, emit: warnings + tuple val(meta), path('*.spades.log') , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def maxmem = task.memory.toGiga() + def illumina_reads = illumina ? ( meta.single_end ? "-s $illumina" : "-1 ${illumina[0]} -2 ${illumina[1]}" ) : "" + def pacbio_reads = pacbio ? "--pacbio $pacbio" : "" + def nanopore_reads = nanopore ? "--nanopore $nanopore" : "" + def custom_hmms = hmm ? "--custom-hmms $hmm" : "" + def reads = yml ? "--dataset $yml" : "$illumina_reads $pacbio_reads $nanopore_reads" + """ + spades.py \\ + $args \\ + --threads $task.cpus \\ + --memory $maxmem \\ + $custom_hmms \\ + $reads \\ + -o ./ + mv spades.log ${prefix}.spades.log + + if [ -f scaffolds.fasta ]; then + mv scaffolds.fasta ${prefix}.scaffolds.fa + gzip -n ${prefix}.scaffolds.fa + fi + if [ -f contigs.fasta ]; then + mv contigs.fasta ${prefix}.contigs.fa + gzip -n ${prefix}.contigs.fa + fi + if [ -f transcripts.fasta ]; then + mv transcripts.fasta ${prefix}.transcripts.fa + gzip -n ${prefix}.transcripts.fa + fi + if [ -f assembly_graph_with_scaffolds.gfa ]; then + mv assembly_graph_with_scaffolds.gfa ${prefix}.assembly.gfa + gzip -n ${prefix}.assembly.gfa + fi + + if [ -f gene_clusters.fasta ]; then + mv gene_clusters.fasta ${prefix}.gene_clusters.fa + gzip -n ${prefix}.gene_clusters.fa + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + spades: \$(spades.py --version 2>&1 | sed -n 's/^.*SPAdes genome assembler v//p') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def maxmem = task.memory.toGiga() + def illumina_reads = illumina ? ( meta.single_end ? "-s $illumina" : "-1 ${illumina[0]} -2 ${illumina[1]}" ) : "" + def pacbio_reads = pacbio ? "--pacbio $pacbio" : "" + def nanopore_reads = nanopore ? "--nanopore $nanopore" : "" + def custom_hmms = hmm ? "--custom-hmms $hmm" : "" + def reads = yml ? "--dataset $yml" : "$illumina_reads $pacbio_reads $nanopore_reads" + """ + echo "" | gzip > ${prefix}.scaffolds.fa.gz + echo "" | gzip > ${prefix}.contigs.fa.gz + echo "" | gzip > ${prefix}.transcripts.fa.gz + echo "" | gzip > ${prefix}.gene_clusters.fa.gz + echo "" | gzip > ${prefix}.assembly.gfa.gz + touch ${prefix}.spades.log + touch warnings.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + spades: \$(spades.py --version 2>&1 | sed -n 's/^.*SPAdes genome assembler v//p') + END_VERSIONS + """ +} diff --git a/modules/nf-core/spades/meta.yml b/modules/nf-core/spades/meta.yml new file mode 100644 index 00000000..986871be --- /dev/null +++ b/modules/nf-core/spades/meta.yml @@ -0,0 +1,99 @@ +name: spades +description: Assembles a small genome (bacterial, fungal, viral) +keywords: + - genome + - assembly + - genome assembler + - small genome + - de novo assembler +tools: + - spades: + description: SPAdes (St. Petersburg genome assembler) is intended for both standard isolates and single-cell MDA bacteria assemblies. + homepage: http://cab.spbu.ru/files/release3.15.0/manual.html + documentation: http://cab.spbu.ru/files/release3.15.0/manual.html + tool_dev_url: https://github.com/ablab/spades + doi: 10.1089/cmb.2012.0021 + licence: ["GPL v2"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - illumina: + type: file + description: | + List of input FastQ (Illumina or PacBio CCS reads) files + of size 1 and 2 for single-end and paired-end data, + respectively. This input data type is required. + - pacbio: + type: file + description: | + List of input PacBio CLR FastQ files of size 1. + - nanopore: + type: file + description: | + List of input FastQ files of size 1, originating from Oxford Nanopore technology. + - yml: + type: file + description: | + Path to yml file containing read information. + The raw FASTQ files listed in this YAML file MUST be supplied to the respective illumina/pacbio/nanopore input channel(s) _in addition_ to this YML. + File entries in this yml must contain only the file name and no paths. + pattern: "*.{yml,yaml}" + - hmm: + type: file + description: File or directory with amino acid HMMs for Spades HMM-guided mode. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - scaffolds: + type: file + description: | + Fasta file containing scaffolds + pattern: "*.fa.gz" + - contigs: + type: file + description: | + Fasta file containing contigs + pattern: "*.fa.gz" + - transcripts: + type: file + description: | + Fasta file containing transcripts + pattern: "*.fa.gz" + - gene_clusters: + type: file + description: | + Fasta file containing gene_clusters + pattern: "*.fa.gz" + - gfa: + type: file + description: | + gfa file containing assembly + pattern: "*.gfa.gz" + - log: + type: file + description: | + Spades log file + pattern: "*.spades.log" + - log: + type: file + description: | + Spades warning log file + pattern: "*.warning.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@JoseEspinosa" + - "@drpatelh" + - "@d4straub" +maintainers: + - "@JoseEspinosa" + - "@drpatelh" + - "@d4straub" diff --git a/modules/nf-core/spades/tests/main.nf.test b/modules/nf-core/spades/tests/main.nf.test new file mode 100644 index 00000000..3a93f486 --- /dev/null +++ b/modules/nf-core/spades/tests/main.nf.test @@ -0,0 +1,228 @@ +nextflow_process { + + name "Test Process SPADES" + script "../main.nf" + process "SPADES" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "spades" + + test("sarscov2 - se ") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) ], + [], + [] + ] + input[1] = [] + input[2] = [] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.scaffolds, + process.out.contigs, + process.out.transcripts, + process.out.gene_clusters, + process.out.gfa, + process.out.versions + ).match() }, + { assert path(process.out.log[0][1]).readLines().any { it.contains("SPAdes pipeline finished") } } + ) + } + } + + test("sarscov2 - pe ") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) ], + [], + [] + ] + input [1] = [] + input [2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.scaffolds, + process.out.contigs, + process.out.transcripts, + process.out.gene_clusters, + process.out.gfa, + process.out.versions + ).match() }, + { assert path(process.out.log[0][1]).readLines().any { it.contains("SPAdes pipeline finished") } }, + { assert file(process.out.warnings[0][1]).find{ file(it).name == "warnings.log"} } + ) + } + + } + // isnt perfect, because CCS reads should rather be used with -s instead of --pacbio + test("sarscov2 - pe - pacbio ") { + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) ], + [], + [ file(params.modules_testdata_base_path + "genomics/sarscov2/nanopore/fastq/test.fastq.gz", checkIfExists: true) ] + ] + input [1] = [] + input [2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.scaffolds, + process.out.contigs, + process.out.transcripts, + process.out.gene_clusters, + process.out.gfa, + process.out.versions + ).match() }, + { assert path(process.out.log[0][1]).readLines().any { it.contains("SPAdes pipeline finished") } }, + { assert file(process.out.warnings[0][1]).find{ file(it).name == "warnings.log"} } + ) + } + } + + test("sarscov2 - pe - nanopore ") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) ], + [], + [ file(params.modules_testdata_base_path + "genomics/sarscov2/nanopore/fastq/test.fastq.gz", checkIfExists: true) ] + ] + input [1] = [] + input [2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.scaffolds, + process.out.contigs, + process.out.transcripts, + process.out.gene_clusters, + process.out.gfa, + process.out.versions + ).match() }, + { assert path(process.out.log[0][1]).readLines().any { it.contains("SPAdes pipeline finished") } }, + { assert file(process.out.warnings[0][1]).find{ file(it).name == "warnings.log"} } + ) + } + } + + test("sarscov2 - pe - nanopore - yml ") { + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) ], + [], + [ file(params.modules_testdata_base_path + "genomics/sarscov2/nanopore/fastq/test.fastq.gz", checkIfExists: true) ] + ] + input [1] = file(params.modules_testdata_base_path + "delete_me/spades/spades_input_yml.yml", checkIfExists: true) + input [2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.scaffolds, + process.out.contigs, + process.out.transcripts, + process.out.gene_clusters, + process.out.gfa, + process.out.versions + ).match() }, + { assert path(process.out.log[0][1]).readLines().any { it.contains("SPAdes pipeline finished") } }, + { assert file(process.out.warnings[0][1]).find{ file(it).name == "warnings.log"} } + ) + } + } + + test("sarscov2 - pe - hmm ") { + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + [ file("https://github.com/nf-core/test-datasets/raw/viralrecon/illumina/sispa/SRR11140744_R1.fastq.gz", checkIfExists: true), + file("https://github.com/nf-core/test-datasets/raw/viralrecon/illumina/sispa/SRR11140744_R2.fastq.gz", checkIfExists: true) ], + [], + [] + ] + input [1] = [] + input [2] = [file(params.modules_testdata_base_path + "/genomics/sarscov2/genome/proteome.hmm.gz", checkIfExists: true)] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.scaffolds, + process.out.contigs, + process.out.transcripts, + process.out.gene_clusters, + process.out.gfa, + process.out.versions + ).match() }, + { assert path(process.out.log[0][1]).readLines().any { it.contains("SPAdes pipeline finished") } } + ) + } + } + + test("sarscov2 - pe - stub ") { + options "-stub" + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) ], + [], + [] + ] + input [1] = [] + input [2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/spades/tests/main.nf.test.snap b/modules/nf-core/spades/tests/main.nf.test.snap new file mode 100644 index 00000000..3f8c7718 --- /dev/null +++ b/modules/nf-core/spades/tests/main.nf.test.snap @@ -0,0 +1,403 @@ +{ + "sarscov2 - pe - nanopore ": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.scaffolds.fa.gz:md5,7ddaf03740df422a93fcaffbcd7e9679" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.contigs.fa.gz:md5,7ddaf03740df422a93fcaffbcd7e9679" + ] + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.assembly.gfa.gz:md5,19418df83534fc93543dec4ec9b2ae72" + ] + ], + [ + "versions.yml:md5,990abcdf543421412170e5cf413ec56d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-07T07:13:08.663068339" + }, + "sarscov2 - pe - hmm ": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.scaffolds.fa.gz:md5,ce077d5f3380690f8d9a5fe188f82128" + ] + ], + [ + + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.assembly.gfa.gz:md5,07136eab8e231f095dc5dd62f1b62a91" + ] + ], + [ + "versions.yml:md5,990abcdf543421412170e5cf413ec56d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-07T08:04:19.650636803" + }, + "sarscov2 - pe - pacbio ": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.scaffolds.fa.gz:md5,7ddaf03740df422a93fcaffbcd7e9679" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.contigs.fa.gz:md5,7ddaf03740df422a93fcaffbcd7e9679" + ] + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.assembly.gfa.gz:md5,19418df83534fc93543dec4ec9b2ae72" + ] + ], + [ + "versions.yml:md5,990abcdf543421412170e5cf413ec56d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-07T07:12:49.305512756" + }, + "sarscov2 - pe ": { + "content": [ + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.contigs.fa.gz:md5,70e4a5485dd59566b212a199c31c343b" + ] + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.assembly.gfa.gz:md5,b773132d52be5090cdbdf5a643027093" + ] + ], + [ + "versions.yml:md5,990abcdf543421412170e5cf413ec56d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-07T07:12:36.161628498" + }, + "sarscov2 - pe - nanopore - yml ": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.scaffolds.fa.gz:md5,7ddaf03740df422a93fcaffbcd7e9679" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.contigs.fa.gz:md5,7ddaf03740df422a93fcaffbcd7e9679" + ] + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.assembly.gfa.gz:md5,19418df83534fc93543dec4ec9b2ae72" + ] + ], + [ + "versions.yml:md5,990abcdf543421412170e5cf413ec56d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-07T07:13:21.868805946" + }, + "sarscov2 - se ": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.scaffolds.fa.gz:md5,65ba6a517c152dbe219bf4b5b92bdad7" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.contigs.fa.gz:md5,65ba6a517c152dbe219bf4b5b92bdad7" + ] + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.assembly.gfa.gz:md5,e4836fdf7104d79e314e3e50986b4bb2" + ] + ], + [ + "versions.yml:md5,990abcdf543421412170e5cf413ec56d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-07T07:12:16.562778962" + }, + "sarscov2 - pe - stub ": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.scaffolds.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.transcripts.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gene_clusters.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test.assembly.gfa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "warnings.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.spades.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + "versions.yml:md5,990abcdf543421412170e5cf413ec56d" + ], + "contigs": [ + [ + { + "id": "test", + "single_end": false + }, + "test.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "gene_clusters": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gene_clusters.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "gfa": [ + [ + { + "id": "test", + "single_end": false + }, + "test.assembly.gfa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.spades.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "scaffolds": [ + [ + { + "id": "test", + "single_end": false + }, + "test.scaffolds.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "transcripts": [ + [ + { + "id": "test", + "single_end": false + }, + "test.transcripts.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,990abcdf543421412170e5cf413ec56d" + ], + "warnings": [ + [ + { + "id": "test", + "single_end": false + }, + "warnings.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-07T07:20:07.195881734" + } +} \ No newline at end of file diff --git a/modules/nf-core/spades/tests/nextflow.config b/modules/nf-core/spades/tests/nextflow.config new file mode 100644 index 00000000..adec1bde --- /dev/null +++ b/modules/nf-core/spades/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: SPADES { + ext.args = '--rnaviral' + } +} diff --git a/modules/nf-core/spades/tests/tags.yml b/modules/nf-core/spades/tests/tags.yml new file mode 100644 index 00000000..035861ff --- /dev/null +++ b/modules/nf-core/spades/tests/tags.yml @@ -0,0 +1,2 @@ +spades: + - "modules/nf-core/spades/**" diff --git a/workflows/mag.nf b/workflows/mag.nf index f71d4218..ea5abc14 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -47,6 +47,8 @@ include { KRONA_KRONADB } from '../modul include { KRONA_KTIMPORTTAXONOMY } from '../modules/nf-core/krona/ktimporttaxonomy/main' include { KRAKENTOOLS_KREPORT2KRONA as KREPORT2KRONA_CENTRIFUGE } from '../modules/nf-core/krakentools/kreport2krona/main' include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' +include { SPADES as METASPADES } from '../modules/nf-core/spades/main' +include { SPADES as METASPADESHYBRID } from '../modules/nf-core/spades/main' include { GUNZIP as GUNZIP_ASSEMBLIES } from '../modules/nf-core/gunzip' include { PRODIGAL } from '../modules/nf-core/prodigal/main' include { PROKKA } from '../modules/nf-core/prokka/main' @@ -67,8 +69,6 @@ include { POOL_SINGLE_READS as POOL_SHORT_SINGLE_READS } from '../modules include { POOL_PAIRED_READS } from '../modules/local/pool_paired_reads' include { POOL_SINGLE_READS as POOL_LONG_READS } from '../modules/local/pool_single_reads' include { MEGAHIT } from '../modules/local/megahit' -include { SPADES } from '../modules/local/spades' -include { SPADESHYBRID } from '../modules/local/spadeshybrid' include { QUAST } from '../modules/local/quast' include { QUAST_BINS } from '../modules/local/quast_bins' include { QUAST_BINS_SUMMARY } from '../modules/local/quast_bins_summary' @@ -589,14 +589,14 @@ workflow MAG { } if (!params.single_end && !params.skip_spades){ - SPADES ( ch_short_reads_spades ) - ch_spades_assemblies = SPADES.out.assembly + METASPADES ( ch_short_reads_spades.map{ meta, reads -> [meta, reads, [], []]}, [], [] ) + ch_spades_assemblies = METASPADES.out.scaffolds .map { meta, assembly -> def meta_new = meta + [assembler: 'SPAdes'] [ meta_new, assembly ] } ch_assemblies = ch_assemblies.mix(ch_spades_assemblies) - ch_versions = ch_versions.mix(SPADES.out.versions.first()) + ch_versions = ch_versions.mix(METASPADES.out.versions.first()) } if (!params.single_end && !params.skip_spadeshybrid){ @@ -606,16 +606,16 @@ workflow MAG { ch_reads_spadeshybrid = ch_long_reads_spades .map { meta, reads -> [ meta.id, meta, reads ] } .combine(ch_short_reads_spades_tmp, by: 0) - .map { id, meta_long, long_reads, meta_short, short_reads -> [ meta_short, long_reads, short_reads ] } + .map { id, meta_long, long_reads, meta_short, short_reads -> [ meta_short, short_reads, [], long_reads ] } - SPADESHYBRID ( ch_reads_spadeshybrid ) - ch_spadeshybrid_assemblies = SPADESHYBRID.out.assembly + METASPADESHYBRID ( ch_reads_spadeshybrid, [], [] ) + ch_spadeshybrid_assemblies = METASPADESHYBRID.out.scaffolds .map { meta, assembly -> def meta_new = meta + [assembler: "SPAdesHybrid"] [ meta_new, assembly ] } ch_assemblies = ch_assemblies.mix(ch_spadeshybrid_assemblies) - ch_versions = ch_versions.mix(SPADESHYBRID.out.versions.first()) + ch_versions = ch_versions.mix(METASPADESHYBRID.out.versions.first()) } } else { ch_assemblies_split = ch_input_assemblies From 391ebd4a60ff52c23fc865592c3a3e531a775d3e Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Mon, 9 Sep 2024 16:12:22 +0200 Subject: [PATCH 02/21] Update CHANGELOG.md --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 18ddc29e..20583c12 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,10 +9,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` +- [#666](https://github.com/nf-core/mag/pull/666) - Update SPAdes to version 4.0.0 (requested by @elsherbini, fix by @jfy133) + ### `Fixed` ### `Dependencies` +| Tool | Previous version | New version | +| ------ | ---------------- | ----------- | +| SPAdes | 3.15.3 | 4.0.0 | + ### `Deprecated` ## 3.0.3 [2024-08-27] From e6301c83f3aad9d75926beb86cd0d1a65583ffef Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 10 Sep 2024 10:42:50 +0200 Subject: [PATCH 03/21] Fix output docs --- docs/output.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/output.md b/docs/output.md index 5f889056..320cf09b 100644 --- a/docs/output.md +++ b/docs/output.md @@ -206,10 +206,10 @@ Trimmed (short) reads are assembled with both megahit and SPAdes. Hybrid assembl Output files - `Assembly/SPAdes/` - - `[sample/group]_scaffolds.fasta.gz`: Compressed assembled scaffolds in fasta format - - `[sample/group]_graph.gfa.gz`: Compressed assembly graph in gfa format - - `[sample/group]_contigs.fasta.gz`: Compressed assembled contigs in fasta format - - `[sample/group].log`: Log file + - `[sample/group].scaffolds.fa.gz`: Compressed assembled scaffolds in fasta format + - `[sample/group].assembly.gfa.gz`: Compressed assembly graph in gfa format + - `[sample/group].contigs.fa.gz`: Compressed assembled contigs in fasta format + - `[sample/group].spades.log`: Log file - `QC/[sample/group]/`: Directory containing QUAST files and Bowtie2 mapping logs - `SPAdes-[sample].bowtie2.log`: Bowtie2 log file indicating how many reads have been mapped from the sample that the metagenome was assembled from, only present if `--coassemble_group` is not set. - `SPAdes-[sample/group]-[sampleToMap].bowtie2.log`: Bowtie2 log file indicating how many reads have been mapped from the respective sample ("sampleToMap"). @@ -225,10 +225,10 @@ SPAdesHybrid is a part of the [SPAdes](http://cab.spbu.ru/software/spades/) soft Output files - `Assembly/SPAdesHybrid/` - - `[sample/group]_scaffolds.fasta.gz`: Compressed assembled scaffolds in fasta format - - `[sample/group]_graph.gfa.gz`: Compressed assembly graph in gfa format - - `[sample/group]_contigs.fasta.gz`: Compressed assembled contigs in fasta format - - `[sample/group].log`: Log file + - `[sample/group].scaffolds.fa.gz`: Compressed assembled scaffolds in fasta format + - `[sample/group].assembly.gfa.gz`: Compressed assembly graph in gfa format + - `[sample/group].contigs.fa.gz`: Compressed assembled contigs in fasta format + - `[sample/group].spades.log`: Log file - `QC/[sample/group]/`: Directory containing QUAST files and Bowtie2 mapping logs - `SPAdesHybrid-[sample].bowtie2.log`: Bowtie2 log file indicating how many reads have been mapped from the sample that the metagenome was assembled from, only present if `--coassemble_group` is not set. - `SPAdesHybrid-[sample/group]-[sampleToMap].bowtie2.log`: Bowtie2 log file indicating how many reads have been mapped from the respective sample ("sampleToMap"). From debd080a696a3bf3dfe423fbcdd11f8d5c599333 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 10 Sep 2024 10:43:33 +0200 Subject: [PATCH 04/21] Make sure warnings log also gets a prefix --- modules.json | 2 +- modules/nf-core/spades/main.nf | 8 ++++++-- modules/nf-core/spades/tests/main.nf.test.snap | 6 +++--- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/modules.json b/modules.json index d2009658..bbfcc47f 100644 --- a/modules.json +++ b/modules.json @@ -239,7 +239,7 @@ }, "spades": { "branch": "master", - "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "git_sha": "cfebb244d8c83ae533bf2db399f9af361927d504", "installed_by": ["modules"] }, "tiara/tiara": { diff --git a/modules/nf-core/spades/main.nf b/modules/nf-core/spades/main.nf index 81b36809..36cdfe44 100644 --- a/modules/nf-core/spades/main.nf +++ b/modules/nf-core/spades/main.nf @@ -18,7 +18,7 @@ process SPADES { tuple val(meta), path('*.transcripts.fa.gz') , optional:true, emit: transcripts tuple val(meta), path('*.gene_clusters.fa.gz'), optional:true, emit: gene_clusters tuple val(meta), path('*.assembly.gfa.gz') , optional:true, emit: gfa - tuple val(meta), path('warnings.log') , optional:true, emit: warnings + tuple val(meta), path('*.warnings.log') , optional:true, emit: warnings tuple val(meta), path('*.spades.log') , emit: log path "versions.yml" , emit: versions @@ -66,6 +66,10 @@ process SPADES { gzip -n ${prefix}.gene_clusters.fa fi + if [ -f warnings.log ]; then + mv warnings.log ${prefix}.warnings.log + fi + cat <<-END_VERSIONS > versions.yml "${task.process}": spades: \$(spades.py --version 2>&1 | sed -n 's/^.*SPAdes genome assembler v//p') @@ -88,7 +92,7 @@ process SPADES { echo "" | gzip > ${prefix}.gene_clusters.fa.gz echo "" | gzip > ${prefix}.assembly.gfa.gz touch ${prefix}.spades.log - touch warnings.log + touch ${prefix}.warnings.log cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/spades/tests/main.nf.test.snap b/modules/nf-core/spades/tests/main.nf.test.snap index 3f8c7718..e1b3b652 100644 --- a/modules/nf-core/spades/tests/main.nf.test.snap +++ b/modules/nf-core/spades/tests/main.nf.test.snap @@ -311,7 +311,7 @@ "id": "test", "single_end": false }, - "warnings.log:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.warnings.log:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "6": [ @@ -389,7 +389,7 @@ "id": "test", "single_end": false }, - "warnings.log:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.warnings.log:md5,d41d8cd98f00b204e9800998ecf8427e" ] ] } @@ -400,4 +400,4 @@ }, "timestamp": "2024-06-07T07:20:07.195881734" } -} \ No newline at end of file +} From 4d19b8fc90b43636040c93d051ac21cba964a4e2 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 12 Sep 2024 16:01:53 +0200 Subject: [PATCH 05/21] Fix a syntax error (Ben made me do it) --- workflows/mag.nf | 222 ++++++++++++++++++++++++----------------------- 1 file changed, 113 insertions(+), 109 deletions(-) diff --git a/workflows/mag.nf b/workflows/mag.nf index c3bb9fca..3e628f37 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -50,6 +50,7 @@ include { CAT_FASTQ } from '../modul include { SPADES as METASPADES } from '../modules/nf-core/spades/main' include { SPADES as METASPADESHYBRID } from '../modules/nf-core/spades/main' include { GUNZIP as GUNZIP_ASSEMBLIES } from '../modules/nf-core/gunzip' +include { GUNZIP as GUNZIP_ASSEMBLYINPUT } from '../modules/nf-core/gunzip' include { PRODIGAL } from '../modules/nf-core/prodigal/main' include { PROKKA } from '../modules/nf-core/prokka/main' include { MMSEQS_DATABASES } from '../modules/nf-core/mmseqs/databases/main' @@ -79,112 +80,105 @@ include { CAT_SUMMARY } from "../modules include { BIN_SUMMARY } from '../modules/local/bin_summary' include { COMBINE_TSV as COMBINE_SUMMARY_TSV } from '../modules/local/combine_tsv' -//////////////////////////////////////////////////// -/* -- Create channel for reference databases -- */ -//////////////////////////////////////////////////// - -if ( params.host_genome ) { - host_fasta = params.genomes[params.host_genome].fasta ?: false - ch_host_fasta = Channel - .value(file( "${host_fasta}" )) - host_bowtie2index = params.genomes[params.host_genome].bowtie2 ?: false - ch_host_bowtie2index = Channel - .value(file( "${host_bowtie2index}/*" )) -} else if ( params.host_fasta ) { - ch_host_fasta = Channel - .value(file( "${params.host_fasta}" )) -} else { - ch_host_fasta = Channel.empty() -} - -if (params.busco_db) { - ch_busco_db = file(params.busco_db, checkIfExists: true) -} else { - ch_busco_db = [] -} +workflow MAG { -if(params.checkm_db) { - ch_checkm_db = file(params.checkm_db, checkIfExists: true) -} + take: + ch_raw_short_reads // channel: samplesheet read in from --input + ch_raw_long_reads + ch_input_assemblies -if (params.gunc_db) { - ch_gunc_db = file(params.gunc_db, checkIfExists: true) -} else { - ch_gunc_db = Channel.empty() -} + main: -if(params.kraken2_db){ - ch_kraken2_db_file = file(params.kraken2_db, checkIfExists: true) -} else { - ch_kraken2_db_file = [] -} + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() -if(params.cat_db){ - ch_cat_db_file = Channel - .value(file( "${params.cat_db}" )) -} else { - ch_cat_db_file = Channel.empty() -} + //////////////////////////////////////////////////// + /* -- Create channel for reference databases -- */ + //////////////////////////////////////////////////// + + if ( params.host_genome ) { + host_fasta = params.genomes[params.host_genome].fasta ?: false + ch_host_fasta = Channel + .value(file( "${host_fasta}" )) + host_bowtie2index = params.genomes[params.host_genome].bowtie2 ?: false + ch_host_bowtie2index = Channel + .value(file( "${host_bowtie2index}/*" )) + } else if ( params.host_fasta ) { + ch_host_fasta = Channel + .value(file( "${params.host_fasta}" )) + } else { + ch_host_fasta = Channel.empty() + } -if(params.krona_db){ - ch_krona_db_file = Channel - .value(file( "${params.krona_db}" )) -} else { - ch_krona_db_file = Channel.empty() -} + if (params.busco_db) { + ch_busco_db = file(params.busco_db, checkIfExists: true) + } else { + ch_busco_db = [] + } -if(!params.keep_phix) { - ch_phix_db_file = Channel - .value(file( "${params.phix_reference}" )) -} + if(params.checkm_db) { + ch_checkm_db = file(params.checkm_db, checkIfExists: true) + } -if (!params.keep_lambda) { - ch_nanolyse_db = Channel - .value(file( "${params.lambda_reference}" )) -} + if (params.gunc_db) { + ch_gunc_db = file(params.gunc_db, checkIfExists: true) + } else { + ch_gunc_db = Channel.empty() + } -if (params.genomad_db){ - ch_genomad_db = file(params.genomad_db, checkIfExists: true) -} else { - ch_genomad_db = Channel.empty() -} + if(params.kraken2_db){ + ch_kraken2_db_file = file(params.kraken2_db, checkIfExists: true) + } else { + ch_kraken2_db_file = [] + } -gtdb = ( params.skip_binqc || params.skip_gtdbtk ) ? false : params.gtdb_db + if(params.cat_db){ + ch_cat_db_file = Channel + .value(file( "${params.cat_db}" )) + } else { + ch_cat_db_file = Channel.empty() + } -if (gtdb) { - gtdb = file( "${gtdb}", checkIfExists: true) - gtdb_mash = params.gtdb_mash ? file("${params.gtdb_mash}", checkIfExists: true) : [] -} else { - gtdb = [] -} + if(params.krona_db){ + ch_krona_db_file = Channel + .value(file( "${params.krona_db}" )) + } else { + ch_krona_db_file = Channel.empty() + } -if(params.metaeuk_db && !params.skip_metaeuk) { - ch_metaeuk_db = Channel. - value(file("${params.metaeuk_db}", checkIfExists: true)) -} else { - ch_metaeuk_db = Channel.empty() -} + if(!params.keep_phix) { + ch_phix_db_file = Channel + .value(file( "${params.phix_reference}" )) + } -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN MAIN WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ + if (!params.keep_lambda) { + ch_nanolyse_db = Channel + .value(file( "${params.lambda_reference}" )) + } -// Additional info for completion email and summary -def busco_failed_bins = [:] + if (params.genomad_db){ + ch_genomad_db = file(params.genomad_db, checkIfExists: true) + } else { + ch_genomad_db = Channel.empty() + } -workflow MAG { + gtdb = ( params.skip_binqc || params.skip_gtdbtk ) ? false : params.gtdb_db - take: - ch_raw_short_reads // channel: samplesheet read in from --input - ch_raw_long_reads - ch_input_assemblies + if (gtdb) { + gtdb = file( "${gtdb}", checkIfExists: true) + gtdb_mash = params.gtdb_mash ? file("${params.gtdb_mash}", checkIfExists: true) : [] + } else { + gtdb = [] + } - main: + if(params.metaeuk_db && !params.skip_metaeuk) { + ch_metaeuk_db = Channel.value(file("${params.metaeuk_db}", checkIfExists: true)) + } else { + ch_metaeuk_db = Channel.empty() + } - ch_versions = Channel.empty() - ch_multiqc_files = Channel.empty() + // Additional info for completion email and summary + def busco_failed_bins = [:] // Get checkM database if not supplied @@ -510,7 +504,8 @@ workflow MAG { */ if ( !params.assembly_input ) { - // Co-assembly: prepare grouping for MEGAHIT and for pooling for SPAdes + + // Co-assembly preparation: grouping for MEGAHIT and for pooling for SPAdes if (params.coassemble_group) { // short reads // group and set group as new id @@ -549,20 +544,6 @@ workflow MAG { ch_long_reads_grouped = ch_long_reads } - ch_assemblies = Channel.empty() - - if (!params.skip_megahit){ - MEGAHIT ( ch_short_reads_grouped ) - ch_megahit_assemblies = MEGAHIT.out.assembly - .map { meta, assembly -> - def meta_new = meta + [assembler: 'MEGAHIT'] - [ meta_new, assembly ] - } - ch_assemblies = ch_assemblies.mix(ch_megahit_assemblies) - ch_versions = ch_versions.mix(MEGAHIT.out.versions.first()) - } - - // Co-assembly: pool reads for SPAdes if ( ! params.skip_spades || ! params.skip_spadeshybrid ){ if ( params.coassemble_group ) { if ( params.bbnorm ) { @@ -594,6 +575,10 @@ workflow MAG { ch_long_reads_spades = Channel.empty() } + // Assembly + + ch_assembled_contigs = Channel.empty() + if (!params.single_end && !params.skip_spades){ METASPADES ( ch_short_reads_spades.map{ meta, reads -> [meta, reads, [], []]}, [], [] ) ch_spades_assemblies = METASPADES.out.scaffolds @@ -601,7 +586,7 @@ workflow MAG { def meta_new = meta + [assembler: 'SPAdes'] [ meta_new, assembly ] } - ch_assemblies = ch_assemblies.mix(ch_spades_assemblies) + ch_assembled_contigs = ch_assembled_contigs.mix(ch_spades_assemblies) ch_versions = ch_versions.mix(METASPADES.out.versions.first()) } @@ -620,9 +605,28 @@ workflow MAG { def meta_new = meta + [assembler: "SPAdesHybrid"] [ meta_new, assembly ] } - ch_assemblies = ch_assemblies.mix(ch_spadeshybrid_assemblies) + ch_assembled_contigs = ch_assembled_contigs.mix(ch_spadeshybrid_assemblies) ch_versions = ch_versions.mix(METASPADESHYBRID.out.versions.first()) } + + if (!params.skip_megahit){ + MEGAHIT ( ch_short_reads_grouped ) + ch_megahit_assemblies = MEGAHIT.out.assembly + .map { meta, assembly -> + def meta_new = meta + [assembler: 'MEGAHIT'] + [ meta_new, assembly ] + } + ch_assembled_contigs = ch_assembled_contigs.mix(ch_megahit_assemblies) + ch_versions = ch_versions.mix(MEGAHIT.out.versions.first()) + } + + + + GUNZIP_ASSEMBLIES ( ch_assembled_contigs ) + ch_versions = ch_versions.mix(GUNZIP_ASSEMBLIES .out.versions) + + ch_assemblies = GUNZIP_ASSEMBLIES.out.gunzip + } else { ch_assemblies_split = ch_input_assemblies .branch { meta, assembly -> @@ -630,11 +634,11 @@ workflow MAG { ungzip: true } - GUNZIP_ASSEMBLIES(ch_assemblies_split.gzipped) - ch_versions = ch_versions.mix(GUNZIP_ASSEMBLIES.out.versions) + GUNZIP_ASSEMBLYINPUT(ch_assemblies_split.gunzip) + ch_versions = ch_versions.mix(GUNZIP_ASSEMBLYINPUT.out.versions) ch_assemblies = Channel.empty() - ch_assemblies = ch_assemblies.mix(ch_assemblies_split.ungzip, GUNZIP_ASSEMBLIES.out.gunzip) + ch_assemblies = ch_assemblies.mix(ch_assemblies_split.ungzip, GUNZIP_ASSEMBLYINPUT.out.gunzip) } ch_quast_multiqc = Channel.empty() From 5437376f5ba2594e1c15de6b0050417dd079a343 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 20 Sep 2024 15:25:47 +0200 Subject: [PATCH 06/21] Switch local MEGAHIT to official nf-core one --- conf/modules.config | 4 +- modules.json | 5 + modules/local/megahit.nf | 40 ---- modules/nf-core/megahit/environment.yml | 6 + modules/nf-core/megahit/main.nf | 70 +++++++ modules/nf-core/megahit/meta.yml | 114 ++++++++++++ modules/nf-core/megahit/tests/main.nf.test | 126 +++++++++++++ .../nf-core/megahit/tests/main.nf.test.snap | 172 ++++++++++++++++++ modules/nf-core/megahit/tests/tags.yml | 2 + workflows/mag.nf | 4 +- 10 files changed, 499 insertions(+), 44 deletions(-) delete mode 100644 modules/local/megahit.nf create mode 100644 modules/nf-core/megahit/environment.yml create mode 100644 modules/nf-core/megahit/main.nf create mode 100644 modules/nf-core/megahit/meta.yml create mode 100644 modules/nf-core/megahit/tests/main.nf.test create mode 100644 modules/nf-core/megahit/tests/main.nf.test.snap create mode 100644 modules/nf-core/megahit/tests/tags.yml diff --git a/conf/modules.config b/conf/modules.config index 09a8d5f3..2ed68105 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -117,10 +117,9 @@ process { publishDir = [path: { "${params.outdir}/Taxonomy/${meta.classifier}/${meta.id}" }, mode: params.publish_dir_mode, pattern: "*.html"] } - //pattern: "*.{fa.gz,log}" //'pattern' didnt work, probably because the output is in a folder, solved with 'saveAs' withName: MEGAHIT { ext.args = params.megahit_options ?: '' - publishDir = [path: { "${params.outdir}/Assembly" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename.indexOf('.contigs.fa.gz') > 0 ? filename : filename.indexOf('.log') > 0 ? filename : null }] + publishDir = [path: { "${params.outdir}/Assembly/MEGAHIT" }, mode: params.publish_dir_mode, pattern: "*.{fa.gz,log}"] } withName: METASPADES { @@ -240,6 +239,7 @@ process { withName: PRODIGAL { ext.args = "-p meta" + ext.prefix = { "${meta.assembler}-${meta.id}_prodigal" } publishDir = [path: { "${params.outdir}/Annotation/Prodigal/${meta.assembler}/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }] } diff --git a/modules.json b/modules.json index 1f387db8..fdcb25e5 100644 --- a/modules.json +++ b/modules.json @@ -167,6 +167,11 @@ "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, + "megahit": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "metabat2/jgisummarizebamcontigdepths": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", diff --git a/modules/local/megahit.nf b/modules/local/megahit.nf deleted file mode 100644 index 6f31425c..00000000 --- a/modules/local/megahit.nf +++ /dev/null @@ -1,40 +0,0 @@ -process MEGAHIT { - tag "$meta.id" - - conda "bioconda::megahit=1.2.9" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/megahit:1.2.9--h2e03b76_1' : - 'biocontainers/megahit:1.2.9--h2e03b76_1' }" - - input: - tuple val(meta), path(reads1), path(reads2) - - output: - tuple val(meta), path("MEGAHIT/MEGAHIT-${meta.id}.contigs.fa"), emit: assembly - path "MEGAHIT/*.log" , emit: log - path "MEGAHIT/MEGAHIT-${meta.id}.contigs.fa.gz" , emit: assembly_gz - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def input = meta.single_end ? "-r \"" + reads1.join(",") + "\"" : "-1 \"" + reads1.join(",") + "\" -2 \"" + reads2.join(",") + "\"" - mem = task.memory.toBytes() - if ( !params.megahit_fix_cpu_1 || task.cpus == 1 ) - """ - ## Check if we're in the same work directory as a previous failed MEGAHIT run - if [[ -d MEGAHIT ]]; then - rm -r MEGAHIT/ - fi - - megahit $args -t "${task.cpus}" -m $mem $input -o MEGAHIT --out-prefix "MEGAHIT-${meta.id}" - - gzip -c "MEGAHIT/MEGAHIT-${meta.id}.contigs.fa" > "MEGAHIT/MEGAHIT-${meta.id}.contigs.fa.gz" - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - megahit: \$(echo \$(megahit -v 2>&1) | sed 's/MEGAHIT v//') - END_VERSIONS - """ - else - error "ERROR: '--megahit_fix_cpu_1' was specified, but not succesfully applied. Likely this is caused by changed process properties in a custom config file." -} diff --git a/modules/nf-core/megahit/environment.yml b/modules/nf-core/megahit/environment.yml new file mode 100644 index 00000000..eed8b725 --- /dev/null +++ b/modules/nf-core/megahit/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::megahit=1.2.9 + - conda-forge::pigz=2.8 diff --git a/modules/nf-core/megahit/main.nf b/modules/nf-core/megahit/main.nf new file mode 100644 index 00000000..df1f72a7 --- /dev/null +++ b/modules/nf-core/megahit/main.nf @@ -0,0 +1,70 @@ +process MEGAHIT { + tag "${meta.id}" + label 'process_high' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'oras://community.wave.seqera.io/library/megahit_pigz:657d77006ae5f222' : + 'community.wave.seqera.io/library/megahit_pigz:87a590163e594224' }" + + input: + tuple val(meta), path(reads1), path(reads2) + + output: + tuple val(meta), path("*.contigs.fa.gz") , emit: contigs + tuple val(meta), path("intermediate_contigs/k*.contigs.fa.gz") , emit: k_contigs + tuple val(meta), path("intermediate_contigs/k*.addi.fa.gz") , emit: addi_contigs + tuple val(meta), path("intermediate_contigs/k*.local.fa.gz") , emit: local_contigs + tuple val(meta), path("intermediate_contigs/k*.final.contigs.fa.gz"), emit: kfinal_contigs + tuple val(meta), path('*.log') , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reads_command = meta.single_end || !reads2 ? "-r ${reads1}" : "-1 ${reads1.join(',')} -2 ${reads2.join(',')}" + """ + megahit \\ + ${reads_command} \\ + ${args} \\ + -t ${task.cpus} \\ + --out-prefix ${prefix} + + pigz \\ + --no-name \\ + -p ${task.cpus} \\ + ${args2} \\ + megahit_out/*.fa \\ + megahit_out/intermediate_contigs/*.fa + + mv megahit_out/* . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + megahit: \$(echo \$(megahit -v 2>&1) | sed 's/MEGAHIT v//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reads_command = meta.single_end || !reads2 ? "-r ${reads1}" : "-1 ${reads1.join(',')} -2 ${reads2.join(',')}" + """ + mkdir -p intermediate_contigs + echo "" | gzip > ${prefix}.contigs.fa.gz + echo "" | gzip > intermediate_contigs/k21.contigs.fa.gz + echo "" | gzip > intermediate_contigs/k21.addi.fa.gz + echo "" | gzip > intermediate_contigs/k21.local.fa.gz + echo "" | gzip > intermediate_contigs/k21.final.contigs.fa.gz + touch ${prefix}.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + megahit: \$(echo \$(megahit -v 2>&1) | sed 's/MEGAHIT v//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/megahit/meta.yml b/modules/nf-core/megahit/meta.yml new file mode 100644 index 00000000..04dab4c2 --- /dev/null +++ b/modules/nf-core/megahit/meta.yml @@ -0,0 +1,114 @@ +name: megahit +description: An ultra-fast metagenomic assembler for large and complex metagenomics +keywords: + - megahit + - denovo + - assembly + - debruijn + - metagenomics +tools: + - megahit: + description: "An ultra-fast single-node solution for large and complex metagenomics + assembly via succinct de Bruijn graph" + homepage: https://github.com/voutcn/megahit + documentation: https://github.com/voutcn/megahit + tool_dev_url: https://github.com/voutcn/megahit + doi: "10.1093/bioinformatics/btv033" + licence: ["GPL v3"] + args_id: "$args" + identifier: biotools:megahit + - pigz: + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" + args_id: "$args2" + + identifier: biotools:megahit +input: + - - meta: + type: map + description: | + Groovy Map containing sample information and input single, or paired-end FASTA/FASTQ files (optionally decompressed) + e.g. [ id:'test', single_end:false ] + - reads1: + type: file + description: | + A single or list of input FastQ files for single-end or R1 of paired-end library(s), + respectively in gzipped or uncompressed FASTQ or FASTA format. + - reads2: + type: file + description: | + A single or list of input FastQ files for R2 of paired-end library(s), + respectively in gzipped or uncompressed FASTQ or FASTA format. +output: + - contigs: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.contigs.fa.gz": + type: file + description: Final final contigs result of the assembly in FASTA format. + pattern: "*.contigs.fa.gz" + - k_contigs: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intermediate_contigs/k*.contigs.fa.gz: + type: file + description: Contigs assembled from the de Bruijn graph of order-K + pattern: "k*.contigs.fa.gz" + - addi_contigs: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intermediate_contigs/k*.addi.fa.gz: + type: file + description: Contigs assembled after iteratively removing local low coverage + unitigs in the de Bruijn graph of order-K + pattern: "k*.addi.fa.gz" + - local_contigs: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intermediate_contigs/k*.local.fa.gz: + type: file + description: Contigs of the locally assembled contigs for k=K + pattern: "k*.local.fa.gz" + - kfinal_contigs: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intermediate_contigs/k*.final.contigs.fa.gz: + type: file + description: Stand-alone contigs for k=K; if local assembly is turned on, the + file will be empty + pattern: "k*.final.contigs.fa.gz" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: Log file containing statistics of the assembly output + pattern: "*.log" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/megahit/tests/main.nf.test b/modules/nf-core/megahit/tests/main.nf.test new file mode 100644 index 00000000..b52765d4 --- /dev/null +++ b/modules/nf-core/megahit/tests/main.nf.test @@ -0,0 +1,126 @@ +nextflow_process { + + name "Test Process MEGAHIT" + script "../main.nf" + process "MEGAHIT" + + tag "modules" + tag "modules_nfcore" + tag "megahit" + + test("sarscov2 - fastq - se") { + + when { + process { + """ + input[0] = [ [id:"test", single_end:true], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + []] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.contigs[0][1]).linesGzip.toString().contains(">k") }, + { assert process.out.k_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}}, + { assert process.out.addi_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}}, + { assert process.out.local_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}}, + { assert process.out.kfinal_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}}, + { assert snapshot( + path(process.out.log[0][1]).readLines().last().contains("ALL DONE. Time elapsed"), + process.out.versions + ).match() + } + ) + } + + } + + test("sarscov2 - fastq - pe") { + + when { + process { + """ + input[0] = [ [id:"test", single_end:false], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.contigs[0][1]).linesGzip.toString().contains(">k") }, + { assert process.out.k_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}}, + { assert process.out.addi_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}}, + { assert process.out.local_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}}, + { assert process.out.kfinal_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}}, + { assert snapshot( + path(process.out.log[0][1]).readLines().last().contains("ALL DONE. Time elapsed"), + process.out.versions + ).match() + } + ) + } + + } + + test("sarscov2 - fastq - pe - coassembly") { + + when { + process { + """ + input[0] = [ [id:"test", single_end:false], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true)] , + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true)] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.contigs[0][1]).linesGzip.toString().contains(">k") }, + { assert process.out.k_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}}, + { assert process.out.addi_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}}, + { assert process.out.local_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}}, + { assert process.out.kfinal_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}}, + { assert snapshot( + path(process.out.log[0][1]).readLines().last().contains("ALL DONE. Time elapsed"), + process.out.versions + ).match() + } + ) + } + + } + + test("sarscov2 - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [id:"test", single_end:true], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/megahit/tests/main.nf.test.snap b/modules/nf-core/megahit/tests/main.nf.test.snap new file mode 100644 index 00000000..4677cc33 --- /dev/null +++ b/modules/nf-core/megahit/tests/main.nf.test.snap @@ -0,0 +1,172 @@ +{ + "sarscov2 - fastq - se": { + "content": [ + true, + [ + "versions.yml:md5,e3c0731297c9abe2f495ab6d541ac0e6" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-12T16:45:42.387947698" + }, + "sarscov2 - fastq - pe": { + "content": [ + true, + [ + "versions.yml:md5,e3c0731297c9abe2f495ab6d541ac0e6" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-12T16:45:48.679485983" + }, + "sarscov2 - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "k21.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "k21.final.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "k21.addi.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "k21.local.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "k21.final.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + "versions.yml:md5,e3c0731297c9abe2f495ab6d541ac0e6" + ], + "addi_contigs": [ + [ + { + "id": "test", + "single_end": true + }, + "k21.addi.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "contigs": [ + [ + { + "id": "test", + "single_end": true + }, + "test.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "k_contigs": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "k21.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "k21.final.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "kfinal_contigs": [ + [ + { + "id": "test", + "single_end": true + }, + "k21.final.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "local_contigs": [ + [ + { + "id": "test", + "single_end": true + }, + "k21.local.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e3c0731297c9abe2f495ab6d541ac0e6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-12T16:44:35.245399991" + }, + "sarscov2 - fastq - pe - coassembly": { + "content": [ + true, + [ + "versions.yml:md5,e3c0731297c9abe2f495ab6d541ac0e6" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-12T16:45:56.23363342" + } +} \ No newline at end of file diff --git a/modules/nf-core/megahit/tests/tags.yml b/modules/nf-core/megahit/tests/tags.yml new file mode 100644 index 00000000..9e865846 --- /dev/null +++ b/modules/nf-core/megahit/tests/tags.yml @@ -0,0 +1,2 @@ +megahit: + - "modules/nf-core/megahit/**" diff --git a/workflows/mag.nf b/workflows/mag.nf index 3e628f37..1eeb926d 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -47,6 +47,7 @@ include { KRONA_KRONADB } from '../modul include { KRONA_KTIMPORTTAXONOMY } from '../modules/nf-core/krona/ktimporttaxonomy/main' include { KRAKENTOOLS_KREPORT2KRONA as KREPORT2KRONA_CENTRIFUGE } from '../modules/nf-core/krakentools/kreport2krona/main' include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' +include { MEGAHIT } from '../modules/nf-core/megahit/main' include { SPADES as METASPADES } from '../modules/nf-core/spades/main' include { SPADES as METASPADESHYBRID } from '../modules/nf-core/spades/main' include { GUNZIP as GUNZIP_ASSEMBLIES } from '../modules/nf-core/gunzip' @@ -69,7 +70,6 @@ include { KRAKEN2 } from '../modules include { POOL_SINGLE_READS as POOL_SHORT_SINGLE_READS } from '../modules/local/pool_single_reads' include { POOL_PAIRED_READS } from '../modules/local/pool_paired_reads' include { POOL_SINGLE_READS as POOL_LONG_READS } from '../modules/local/pool_single_reads' -include { MEGAHIT } from '../modules/local/megahit' include { QUAST } from '../modules/local/quast' include { QUAST_BINS } from '../modules/local/quast_bins' include { QUAST_BINS_SUMMARY } from '../modules/local/quast_bins_summary' @@ -611,7 +611,7 @@ workflow MAG { if (!params.skip_megahit){ MEGAHIT ( ch_short_reads_grouped ) - ch_megahit_assemblies = MEGAHIT.out.assembly + ch_megahit_assemblies = MEGAHIT.out.contigs .map { meta, assembly -> def meta_new = meta + [assembler: 'MEGAHIT'] [ meta_new, assembly ] From 12b13d37f625d8ea71387ef0cbe676a338a8671c Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 20 Sep 2024 15:31:53 +0200 Subject: [PATCH 07/21] Fix wrongly set branch --- workflows/mag.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/mag.nf b/workflows/mag.nf index 1eeb926d..6b7610bc 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -634,7 +634,7 @@ workflow MAG { ungzip: true } - GUNZIP_ASSEMBLYINPUT(ch_assemblies_split.gunzip) + GUNZIP_ASSEMBLYINPUT(ch_assemblies_split.gzipped) ch_versions = ch_versions.mix(GUNZIP_ASSEMBLYINPUT.out.versions) ch_assemblies = Channel.empty() From 7d7ad71fcaf4fc02af121706bb0c1b2ce20438e6 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 20 Sep 2024 15:58:11 +0200 Subject: [PATCH 08/21] Fix linting --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 4964154b..13e79a7c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -256,7 +256,7 @@ process { ext.prefix = { "SPAdesHybrid-${meta.id}" } publishDir = [path: { "${params.outdir}/Assembly/SPAdesHybrid" }, mode: params.publish_dir_mode, pattern: "*.{fasta.gz,gfa.gz,fa.gz,log}"] } - + withName: QUAST { publishDir = [path: { "${params.outdir}/Assembly/${meta.assembler}/QC/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }] } From 9d7ce62724c296efaaabd1c970e114379b2bb09b Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 3 Oct 2024 15:57:34 +0200 Subject: [PATCH 09/21] Replace assembler single-use fix-cpu functions with single line definitions --- conf/base.config | 6 +++--- nextflow.config | 15 --------------- 2 files changed, 3 insertions(+), 18 deletions(-) diff --git a/conf/base.config b/conf/base.config index 71360a75..ac6fa2a0 100644 --- a/conf/base.config +++ b/conf/base.config @@ -120,7 +120,7 @@ process { } //MEGAHIT returns exit code 250 when running out of memory withName: MEGAHIT { - cpus = { check_megahit_cpus(8, task.attempt) } + cpus = { params.megahit_fix_cpu_1 ? 1 : 8 * task.attempt } memory = { check_max(40.GB * task.attempt, 'memory') } time = { check_max(16.h * task.attempt, 'time') } errorStrategy = { task.exitStatus in ((130..145) + 104 + 250) ? 'retry' : 'finish' } @@ -128,14 +128,14 @@ process { //SPAdes returns error(1) if it runs out of memory (and for other reasons as well...)! //exponential increase of memory and time with attempts, keep number of threads to enable reproducibility withName: METASPADES { - cpus = { check_spades_cpus(10, task.attempt) } + cpus = { params.spades_fix_cpus != -1 ? params.spades_fix_cpus : 10 * task.attempt } memory = { check_max(64.GB * (2 ** (task.attempt - 1)), 'memory') } time = { check_max(24.h * (2 ** (task.attempt - 1)), 'time') } errorStrategy = { task.exitStatus in ((130..145) + 104 + 21 + 12 + 1) ? 'retry' : 'finish' } maxRetries = 5 } withName: METASPADESHYBRID { - cpus = { check_spadeshybrid_cpus(10, task.attempt) } + cpus = { params.spadeshybrid_fix_cpus != -1 ? params.spadeshybrid_fix_cpus : 10 * task.attempt } memory = { check_max(64.GB * (2 ** (task.attempt - 1)), 'memory') } time = { check_max(24.h * (2 ** (task.attempt - 1)), 'time') } errorStrategy = { task.exitStatus in [143, 137, 21, 1] ? 'retry' : 'finish' } diff --git a/nextflow.config b/nextflow.config index 3b43df33..c2154fc9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -430,18 +430,3 @@ def check_max(obj, type) { } } } - -// Functions to fix number of cpus to allow reproducibility for MEGAHIT and SPAdes -// if corresponding parameters are specified, number of cpus is not increased with retries -def check_megahit_cpus (x, attempt ) { - if (params.megahit_fix_cpu_1) return 1 - else return check_max (x * attempt, 'cpus' ) -} -def check_spades_cpus (x, attempt ) { - if (params.spades_fix_cpus != -1) return check_max (params.spades_fix_cpus, 'cpus' ) - else return check_max (x * attempt, 'cpus' ) -} -def check_spadeshybrid_cpus (x, attempt ) { - if (params.spadeshybrid_fix_cpus != -1) return check_max (params.spadeshybrid_fix_cpus, 'cpus' ) - else return check_max (x * attempt, 'cpus' ) -} From ce449f2dcea6f22763c06e04901e869d8c58b0a6 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 3 Oct 2024 16:40:42 +0200 Subject: [PATCH 10/21] Ensure checkmax is still used --- conf/base.config | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/base.config b/conf/base.config index ac6fa2a0..5fe07f17 100644 --- a/conf/base.config +++ b/conf/base.config @@ -120,7 +120,7 @@ process { } //MEGAHIT returns exit code 250 when running out of memory withName: MEGAHIT { - cpus = { params.megahit_fix_cpu_1 ? 1 : 8 * task.attempt } + cpus = { params.megahit_fix_cpu_1 ? 1 : check_max(8 * task.attempt, 'cpus') } memory = { check_max(40.GB * task.attempt, 'memory') } time = { check_max(16.h * task.attempt, 'time') } errorStrategy = { task.exitStatus in ((130..145) + 104 + 250) ? 'retry' : 'finish' } @@ -128,14 +128,14 @@ process { //SPAdes returns error(1) if it runs out of memory (and for other reasons as well...)! //exponential increase of memory and time with attempts, keep number of threads to enable reproducibility withName: METASPADES { - cpus = { params.spades_fix_cpus != -1 ? params.spades_fix_cpus : 10 * task.attempt } + cpus = { params.spades_fix_cpus != -1 ? params.spades_fix_cpus : check_max(10 * task.attempt, 'cpus') } memory = { check_max(64.GB * (2 ** (task.attempt - 1)), 'memory') } time = { check_max(24.h * (2 ** (task.attempt - 1)), 'time') } errorStrategy = { task.exitStatus in ((130..145) + 104 + 21 + 12 + 1) ? 'retry' : 'finish' } maxRetries = 5 } withName: METASPADESHYBRID { - cpus = { params.spadeshybrid_fix_cpus != -1 ? params.spadeshybrid_fix_cpus : 10 * task.attempt } + cpus = { params.spadeshybrid_fix_cpus != -1 ? params.spadeshybrid_fix_cpus : check_max(10 * task.attempt, 'cpus') } memory = { check_max(64.GB * (2 ** (task.attempt - 1)), 'memory') } time = { check_max(24.h * (2 ** (task.attempt - 1)), 'time') } errorStrategy = { task.exitStatus in [143, 137, 21, 1] ? 'retry' : 'finish' } From efabee706a72413e10817a4aa46b357895513351 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 3 Oct 2024 16:46:55 +0200 Subject: [PATCH 11/21] Specify also module changes --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b34857ba..f6295c88 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#664](https://github.com/nf-core/mag/pull/664) - Update GTDBTk to latest version, with updated column names, update GTDB to release 220 (by @dialvarezs) - [#676](https://github.com/nf-core/mag/pull/676) - Added exit code 12 to valid SPAdes retry codes, due to OOM errors from spades-hammer (reported by @bawee, fix by @jfy133) -- [#666](https://github.com/nf-core/mag/pull/666) - Update SPAdes to version 4.0.0 (requested by @elsherbini, fix by @jfy133) +- [#666](https://github.com/nf-core/mag/pull/666) - Update SPAdes to version 4.0.0, replace both METASPADES and MEGAHIT with official nf-core modules (requested by @elsherbini, fix by @jfy133) ### `Fixed` From d7e51ef3f0564732306cba827fb28e03af7b3bf5 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 4 Oct 2024 09:21:06 +0200 Subject: [PATCH 12/21] Lets see if SPAdes now works with fusion with the new container... --- conf/test_full.config | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/conf/test_full.config b/conf/test_full.config index 9a01bc58..b09e6fe1 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -16,30 +16,30 @@ params { // Input data for full size test // hg19 reference with highly conserved and low-complexity regions masked by Brian Bushnell - host_fasta = "s3://ngi-igenomes/test-data/mag/hg19_main_mask_ribo_animal_allplant_allfungus.fa.gz" - input = "s3://ngi-igenomes/test-data/mag/samplesheets/samplesheet.full.csv" + host_fasta = "s3://ngi-igenomes/test-data/mag/hg19_main_mask_ribo_animal_allplant_allfungus.fa.gz" + input = "s3://ngi-igenomes/test-data/mag/samplesheets/samplesheet.full.csv" //centrifuge_db = "s3://ngi-igenomes/test-data/mag/p_compressed+h+v.tar.gz" - kraken2_db = "s3://ngi-igenomes/test-data/mag/minikraken_8GB_202003.tgz" - cat_db = "s3://ngi-igenomes/test-data/mag/CAT_prepare_20210107.tar.gz" + kraken2_db = "s3://ngi-igenomes/test-data/mag/minikraken_8GB_202003.tgz" + cat_db = "s3://ngi-igenomes/test-data/mag/CAT_prepare_20210107.tar.gz" // gtdb_db = "s3://ngi-igenomes/test-data/mag/gtdbtk_r214_data.tar.gz" ## This should be updated to release 220, once we get GTDB-Tk working again - skip_gtdbtk = true + skip_gtdbtk = true // TODO TEMPORARY: deactivate SPAdes due to incompatibility of container with fusion file system - skip_spades = true - skip_spadeshybrid = true + skip_spades = false + skip_spadeshybrid = false // reproducibility options for assembly - spades_fix_cpus = 10 - spadeshybrid_fix_cpus = 10 - megahit_fix_cpu_1 = true + spades_fix_cpus = 10 + spadeshybrid_fix_cpus = 10 + megahit_fix_cpu_1 = true // available options to enable reproducibility for BUSCO (--busco_db) not used here // to allow detection of possible problems in automated lineage selection mode using public databases // test CAT with official taxonomic ranks only - cat_official_taxonomy = true + cat_official_taxonomy = true // Skip CONCOCT due to timeout issues - skip_concoct = true + skip_concoct = true } From 9a2e958f1d33b75afb6df2a832ca6c8d533de79f Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 22 Oct 2024 09:21:06 +0200 Subject: [PATCH 13/21] Fix GTDBTk Url in schema --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index dab2eb50..bcbcfa1c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -542,7 +542,7 @@ "gtdb_db": { "type": "string", "description": "Specify the location of a GTDBTK database. Can be either an uncompressed directory or a `.tar.gz` archive. If not specified will be downloaded for you when GTDBTK or binning QC is not skipped.", - "default": "https://data.ace.uq.edu.au/public/gtdb/data/releases/release220/220.0/auxillary_files/gtdbtk_package/full_package/gtdbtk_r220_data.tar.gz" + "default": "https://data.gtdb.ecogenomic.org/releases/release220/220.0/auxillary_files/gtdbtk_package/full_package/gtdbtk_r220_data.tar.gz" }, "gtdb_mash": { "type": "string", From a77b7c2a09a735c28a7adf8fc5839f672d162593 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 22 Oct 2024 09:27:09 +0200 Subject: [PATCH 14/21] Update CHANGELOG --- CHANGELOG.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ce9b11a5..af6de241 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` - [#674](https://github.com/nf-core/mag/pull/674) - Changed to porechop-abi as default adapter trimming tool for long reads. User can still use porechop if preferred (added by @muabnezor) +- [#666](https://github.com/nf-core/mag/pull/666) - Update SPAdes to version 4.0.0, replace both METASPADES and MEGAHIT with official nf-core modules (requested by @elsherbini, fix by @jfy133) +- [#666](https://github.com/nf-core/mag/pull/666) - Update URLs to GTDB database downloads due to server move (reported by @Jokendo-collab, fix by @jfy133) ### `Fixed` @@ -27,6 +29,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | ------------ | ---------------- | ----------- | | Porechop_ABI | | 0.5.0 | | Filtlong | 0.2.0 | 0.2.1 | +| SPAdes | 3.15.3 | 4.0.0 | ### `Deprecated` @@ -41,7 +44,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#664](https://github.com/nf-core/mag/pull/664) - Update GTDBTk to latest version, with updated column names, update GTDB to release 220 (by @dialvarezs) - [#676](https://github.com/nf-core/mag/pull/676) - Added exit code 12 to valid SPAdes retry codes, due to OOM errors from spades-hammer (reported by @bawee, fix by @jfy133) -- [#666](https://github.com/nf-core/mag/pull/666) - Update SPAdes to version 4.0.0, replace both METASPADES and MEGAHIT with official nf-core modules (requested by @elsherbini, fix by @jfy133) ### `Fixed` @@ -54,7 +56,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | Tool | Previous version | New version | | ------ | ---------------- | ----------- | | GTDBTk | 2.3.2 | 2.4.0 | -| SPAdes | 3.15.3 | 4.0.0 | ### `Deprecated` From 717d43348d59b21b3781b62a10f40c4667c562f8 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 22 Oct 2024 09:51:11 +0200 Subject: [PATCH 15/21] Make sure meta mode is executed with the new module --- conf/modules.config | 4 ++-- nextflow_schema.json | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 0e1352f6..20390c6a 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -262,13 +262,13 @@ process { } withName: METASPADES { - ext.args = params.spades_options ?: '' + ext.args = params.spades_options ?: '--meta' ext.prefix = { "SPAdes-${meta.id}" } publishDir = [path: { "${params.outdir}/Assembly/SPAdes" }, mode: params.publish_dir_mode, pattern: "*.{fasta.gz,gfa.gz,fa.gz,log}"] } withName: METASPADESHYBRID { - ext.args = params.spades_options ?: '' + ext.args = params.spades_options ?: '--meta' ext.prefix = { "SPAdesHybrid-${meta.id}" } publishDir = [path: { "${params.outdir}/Assembly/SPAdesHybrid" }, mode: params.publish_dir_mode, pattern: "*.{fasta.gz,gfa.gz,fa.gz,log}"] } diff --git a/nextflow_schema.json b/nextflow_schema.json index ce1cd7c6..1063c4ac 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -551,7 +551,7 @@ }, "spades_options": { "type": "string", - "description": "Additional custom options for SPAdes.", + "description": "Additional custom options for SPAdes and SPAdesHybrid. You must also specify `--meta` to run SPAdes in metagenomic mode if customising these options!", "help_text": "An example is adjusting k-mers (\"-k 21,33,55,77\") or adding [advanced options](https://github.com/ablab/spades#advanced-options). But not -t, -m, -o or --out-prefix, because these are already in use. Must be used like this: --spades_options \"-k 21,33,55,77\")" }, "megahit_options": { From ebd18a16da759c9e03d1d2f56f29b0f643220bee Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 22 Oct 2024 10:49:44 +0200 Subject: [PATCH 16/21] Fix linting --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 02820eb1..12c6c6aa 100644 --- a/nextflow.config +++ b/nextflow.config @@ -90,7 +90,7 @@ params { cat_official_taxonomy = false save_cat_db = false skip_gtdbtk = false - gtdb_db = "https://data.ace.uq.edu.au/public/gtdb/data/releases/release220/220.0/auxillary_files/gtdbtk_package/full_package/gtdbtk_r220_data.tar.gz" + gtdb_db = "https://data.gtdb.ecogenomic.org/releases/release220/220.0/auxillary_files/gtdbtk_package/full_package/gtdbtk_r220_data.tar.gz" gtdb_mash = null gtdbtk_min_completeness = 50.0 gtdbtk_max_contamination = 10.0 From 48cad97806281ec0ffdd556f9156f66c226c9a59 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 22 Oct 2024 10:50:41 +0200 Subject: [PATCH 17/21] Remove a TODO --- conf/base.config | 1 - 1 file changed, 1 deletion(-) diff --git a/conf/base.config b/conf/base.config index e26414bc..21a8ac3e 100644 --- a/conf/base.config +++ b/conf/base.config @@ -10,7 +10,6 @@ process { - // TODO nf-core: Check the defaults for all processes cpus = { 1 * task.attempt } memory = { 7.GB * task.attempt } time = { 4.h * task.attempt } From 860926aae2100cea6c122a6d34842eb2fc06fb14 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 22 Oct 2024 10:52:54 +0200 Subject: [PATCH 18/21] Add MEGAHIT prefix to ensure MEGAHIT at the beginning of the input file name. --- conf/modules.config | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/modules.config b/conf/modules.config index 20390c6a..3a05419f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -258,6 +258,7 @@ process { withName: MEGAHIT { ext.args = params.megahit_options ?: '' + ext.prefix = { "MEGAHIT-${meta.id}" } publishDir = [path: { "${params.outdir}/Assembly/MEGAHIT" }, mode: params.publish_dir_mode, pattern: "*.{fa.gz,log}"] } From 639addba3fb34849fdc462c44bc0f0680886ac3d Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 22 Oct 2024 10:59:32 +0200 Subject: [PATCH 19/21] Re-add memory explicit parameter to MEGAHIT module (inbuilt into SPAdes module) --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 3a05419f..f9c3106c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -257,7 +257,7 @@ process { } withName: MEGAHIT { - ext.args = params.megahit_options ?: '' + ext.args = params.megahit_options ? params.megahit_options + "-m ${task.memory}" : "-m ${task.memory}" ext.prefix = { "MEGAHIT-${meta.id}" } publishDir = [path: { "${params.outdir}/Assembly/MEGAHIT" }, mode: params.publish_dir_mode, pattern: "*.{fa.gz,log}"] } From ec318c79669c2a199358a61f7357e2416e12d774 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 22 Oct 2024 11:10:58 +0200 Subject: [PATCH 20/21] Re-add MEGAHIT memory --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index f9c3106c..8f2c1042 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -257,7 +257,7 @@ process { } withName: MEGAHIT { - ext.args = params.megahit_options ? params.megahit_options + "-m ${task.memory}" : "-m ${task.memory}" + ext.args = { params.megahit_options ? params.megahit_options + "-m ${task.memory.toBytes()}" : "-m ${task.memory.toBytes()}" } ext.prefix = { "MEGAHIT-${meta.id}" } publishDir = [path: { "${params.outdir}/Assembly/MEGAHIT" }, mode: params.publish_dir_mode, pattern: "*.{fa.gz,log}"] } From 338a552a0c5d324360c4df69e36aee5c8bf0328d Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 22 Oct 2024 11:41:26 +0200 Subject: [PATCH 21/21] Use HTTPS URL for megahit container --- modules.json | 2 +- modules/nf-core/megahit/main.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules.json b/modules.json index 0e50ef75..3eea27cd 100644 --- a/modules.json +++ b/modules.json @@ -174,7 +174,7 @@ }, "megahit": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "7755db15e36b30da564cd67fffdfe18a255092aa", "installed_by": ["modules"] }, "metabat2/jgisummarizebamcontigdepths": { diff --git a/modules/nf-core/megahit/main.nf b/modules/nf-core/megahit/main.nf index df1f72a7..f6e50f94 100644 --- a/modules/nf-core/megahit/main.nf +++ b/modules/nf-core/megahit/main.nf @@ -3,7 +3,7 @@ process MEGAHIT { label 'process_high' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://community.wave.seqera.io/library/megahit_pigz:657d77006ae5f222' : + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f2/f2cb827988dca7067ff8096c37cb20bc841c878013da52ad47a50865d54efe83/data' : 'community.wave.seqera.io/library/megahit_pigz:87a590163e594224' }" input: