Merge pull request #272 from danilodileo/add-spades-options

Add spades options
nf-core · Mar 19, 2024 · ed8cf07 · ed8cf07
2 parents 066f21c + 44234ed
commit ed8cf07
Show file tree

Hide file tree

Showing 9 changed files with 42 additions and 30 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,14 +6,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## v1.0.1 - [date]
 
 ### `Added`
+- [#271](<[https://github.com/nf-core/metatdenovo/issues/271](https://github.com/nf-core/metatdenovo/issues/271)>) - Added flavor to SPADES modules
 
 ### `Changed`
 
-- [#268]([https://github.com/nf-core/ampliseq/pull/681](https://github.com/nf-core/metatdenovo/pull/268)) - Don't save so many intermediate Megahit files by default
+- [#268](<[https://github.com/nf-core/ampliseq/pull/681](https://github.com/nf-core/metatdenovo/pull/268)>) - Don't save so many intermediate Megahit files by default
 
 ### `Fixed`
 
-- [#269]([https://github.com/nf-core/ampliseq/pull/681](https://github.com/nf-core/metatdenovo/pull/269)) - Make Transdecoder work better with `-resume`
+- [#269](<[https://github.com/nf-core/ampliseq/pull/681](https://github.com/nf-core/metatdenovo/pull/269)>) - Make Transdecoder work better with `-resume`
 
 ### `Dependencies`
 

diff --git a/conf/modules.config b/conf/modules.config
@@ -90,16 +90,16 @@ process {
 
     withName: WRITESPADESYAML {
         publishDir = [
-            path: { "${params.outdir}/rnaspades" },
+            path: { "${params.outdir}/spades" },
             mode: params.publish_dir_mode,
             pattern: '*.yaml'
         ]
     }
 
     withName: SPADES {
-        ext.args = "--rna"
+        ext.args = { "--${params.spades_flavor}" }
         publishDir = [
-            path: { "${params.outdir}/rnaspades" },
+            path: { "${params.outdir}/spades" },
             mode: params.publish_dir_mode,
             pattern: '*.{gz,log}'
         ]

diff --git a/conf/test_rnaspades.config → conf/test_spades.config b/conf/test_rnaspades.config → conf/test_spades.config
@@ -11,7 +11,7 @@
 */
 
 params {
-    config_profile_name        = 'Test rnaspades assembler profile'
+    config_profile_name        = 'Test spades assembler profile'
     config_profile_description = 'Minimal test dataset to check pipeline function'
     // Limit resources so that this can run on GitHub Actions
     max_cpus   = 2
@@ -22,7 +22,7 @@ params {
     input    = 'https://raw.githubusercontent.com/nf-core/test-datasets/metatdenovo/samplesheet/samplesheet.csv'
 
     // Assembler option
-    assembler     = 'rnaspades'
+    assembler     = 'spades'
 
     skip_eukulele  = true
     skip_eggnog    = true

diff --git a/docs/output.md b/docs/output.md
@@ -20,7 +20,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and the results
     - [BBnorm](#bbnorm) - Normalize the reads in the samples to use less resources for assembly (optional)
   - [Assembly step](#assembly-step) - Generate contigs with an assembler program
     - [Megahit](#megahit) - Output from Megahit assembly (default)
-    - [RNASpades](#rnaspades) - Output from Spades assembly (optional)
+    - [Spades](#spades) - Output from Spades assembly (optional)
   - [ORF Caller step](#orf-caller-step) - Identify protein-coding genes (ORFs) with an ORF caller
     - [Prodigal](#prodigal) - Output from Prodigal (default)
     - [Prokka](#prokka) - Output from Prokka (optional)
@@ -126,17 +126,17 @@ BBnorm is a BBmap tool.
 
 </details>
 
-#### RNASpades
+#### Spades
 
-Optionally, you can use [RNASpades](https://cab.spbu.ru/software/rnaspades/) to assemble reads into contigs.
+Optionally, you can use [Spades](https://github.com/ablab/spades) to assemble reads into contigs.
 
 <details markdown="1">
 <summary>Output files</summary>
 
-- `rnaspades/`
-  - `rnaspades.assembly.gfa.gz`: gfa file output from rnaspades
-  - `rnaspades.spades.log`: log file output from rnaspades run
-  - `rnaspades.transcripts.fa.gz`: reference genome created by RNASpades
+- `spades/`
+  - `spades.assembly.gfa.gz`: gfa file output from spades
+  - `spades.spades.log`: log file output from spades run
+  - `spades.transcripts.fa.gz`: reference genome created by Spades
 
 </details>
 

diff --git a/docs/usage.md b/docs/usage.md
@@ -93,7 +93,8 @@ To turn on digital normalization, use the `--bbnorm` parameter and, if required,
 
 By default, the pipeline uses Megahit (`--assembler megahit`) to assemble the cleaned and trimmed reads to create the reference contigs.
 Megahit is fast and it does not require a lot of memory to run, making it ideal for large sets of samples.
-The workflow also supports RNAspades, (`--assembler rnaspades` ) as an alternative.
+The workflow also supports Spades, (`--assembler spades` ) as an alternative.
+If you work with virus you can specify it into SPADES by using the option `--spades_flavor rnaviral`
 
 You can also choose to input contigs from an assembly that you made outside the pipeline using the `--assembly file.fna` (where `file.fna` is the name of a fasta file with contigs) option.
 
@@ -187,10 +188,10 @@ ORF-HMM combination will be ranked according to score and E-value.
 ## Example pipeline command with some common features
 
 ```bash
-nextflow run nf-core/metatdenovo -profile docker --input samplesheet.csv --assembler rnaspades --orf_caller prokka --eggnog --eukulele_db gtdb
+nextflow run nf-core/metatdenovo -profile docker --input samplesheet.csv --assembler spades --orf_caller prokka --eggnog --eukulele_db gtdb
 ```
 
-In this example, we are running metatdenovo with `rnaspades` as assembler, `prokka` as ORF caller, `eggnog` for functional annotation and EUKulele with the GTDB database for taxonomic annotation.
+In this example, we are running metatdenovo with `spades` as assembler, `prokka` as ORF caller, `eggnog` for functional annotation and EUKulele with the GTDB database for taxonomic annotation.
 
 Note that the pipeline will create the following files in your working directory:
 
@@ -219,7 +220,7 @@ with `params.yaml` containing:
 
 ```yaml
 input: 'samplesheet.csv'
-assembler: 'rnaspades'
+assembler: 'spades'
 orf_caller: 'prokka'
 eggnog: true
 eukulele_db: 'gtdb'

diff --git a/modules/local/transdecoder.nf b/modules/local/transdecoder.nf
@@ -15,7 +15,7 @@ process TRANSDECODER {
     tuple val(meta), path("*.gff3"), emit: gff
     tuple val(meta), path("*.cds") , emit: cds
     tuple val(meta), path("*.bed") , emit: bed
-    path "versions.yml"              , emit: versions
+    path "versions.yml"            , emit: versions
 
     when:
     task.ext.when == null || task.ext.when

diff --git a/nextflow.config b/nextflow.config
@@ -42,6 +42,7 @@ params {
     // assembler option
     assembler                   = 'megahit'
     min_contig_length           = 0
+    spades_flavor               = 'rna'
 
     // Mapping options
     save_samtools               = false
@@ -225,7 +226,7 @@ profiles {
     test_eggnog         { includeConfig 'conf/test_eggnog.config'       }
     test_eukulele       { includeConfig 'conf/test_eukulele.config'     }
     test_kofamscan      { includeConfig 'conf/test_kofamscan.config'    }
-    test_rnaspades      { includeConfig 'conf/test_rnaspades.config'    }
+    test_spades         { includeConfig 'conf/test_spades.config'    }
     gitpod {
         executor.name          = 'local'
         executor.cpus          = 16

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -169,8 +169,8 @@
                 "assembler": {
                     "type": "string",
                     "default": "megahit",
-                    "enum": ["megahit", "rnaspades"],
-                    "description": "Specify which assembler you would like to run, possible alternatives: megahit, rnaspades. default: megahit",
+                    "enum": ["megahit", "spades"],
+                    "description": "Specify which assembler you would like to run, possible alternatives: megahit, spades. default: megahit",
                     "fa_icon": "fas fa-bezier-curve"
                 },
                 "assembly": {
@@ -186,6 +186,13 @@
                     "default": 0,
                     "description": "Filter out contigs shorter than this.",
                     "fa_icon": "fas fa-align-justify"
+                },
+                "spades_flavor": {
+                    "type": "string",
+                    "default": "rna",
+                    "enum": ["rna", "isolate", "sc", "meta", "plasmid", "metaplasmid", "metaviral", "rnaviral"],
+                    "description": "Select which type of assembly you want to make. Default: rna",
+                    "help_text": "This option allows you to run an assembly for a different scope. For instance, with '--spades_flavor rna' you will get an assembly for prokaryotic metatrancriptomes, `--spades_flavor rnaviral' for viral metatranscriptomes or `--spades_flavor metaviral` for viral metagenomes. The argument to the parameter will be passed as a parameter to Spades; e.g. `--spades_flavor rna` will be passed as `--rna` to Spades."
                 }
             },
             "fa_icon": "fas fa-bezier-curve"
@@ -255,7 +262,6 @@
             "properties": {
                 "skip_eggnog": {
                     "type": "boolean",
-                    "default": false,
                     "description": "Skip EGGNOG functional annotation",
                     "fa_icon": "fas fa-forward"
                 },
@@ -268,7 +274,6 @@
                 },
                 "skip_kofamscan": {
                     "type": "boolean",
-                    "default": false,
                     "description": "skip kofamscan run",
                     "fa_icon": "fas fa-forward"
                 },
@@ -311,7 +316,6 @@
             "properties": {
                 "skip_eukulele": {
                     "type": "boolean",
-                    "default": false,
                     "description": "skip eukulele run",
                     "fa_icon": "fas fa-forward"
                 },

diff --git a/workflows/metatdenovo.nf b/workflows/metatdenovo.nf
@@ -264,13 +264,13 @@ workflow METATDENOVO {
     }
 
     //
-    // MODULE: Run Megahit or RNAspades on all interleaved fastq files
+    // MODULE: Run Megahit or Spades on all interleaved fastq files
     //
     if ( params.assembly ) {
         Channel
             .value ( [ [ id: 'user_assembly' ], file(params.assembly) ] )
             .set { ch_assembly_contigs }
-    } else if ( assembler == 'rnaspades' ) {
+    } else if ( assembler == 'spades' ) {
         // 1. Write a yaml file for Spades
         WRITESPADESYAML (
             ch_pe_reads_to_assembly.toList(),
@@ -281,16 +281,21 @@ workflow METATDENOVO {
         ch_pe_reads_to_assembly
             .mix(ch_se_reads_to_assembly)
             .collect()
-            .map { [ [ id:'rnaspades' ], it, [], [] ] }
+            .map { [ [ id:'spades' ], it, [], [] ] }
             .set { ch_spades }
         SPADES (
             ch_spades,
             WRITESPADESYAML.out.yaml,
             []
         )
-        ch_assembly = SPADES.out.transcripts
+
+        SPADES.out.transcripts
+            .ifEmpty{ [] }
+            .combine(SPADES.out.contigs.ifEmpty{ [] } )
+            .set { ch_assembly }
         ch_versions = ch_versions.mix(SPADES.out.versions)
-        FORMATSPADES( ch_assembly )
+
+        FORMATSPADES( ch_assembly.first() )
         ch_assembly_contigs = FORMATSPADES.out.assembly
         ch_versions    = ch_versions.mix(FORMATSPADES.out.versions)
     } else if ( assembler == 'megahit' ) {