diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1aac440d..72f784a9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -47,7 +47,7 @@ jobs: strategy: matrix: # Run remaining test profiles with minimum nextflow version - profile: [test, test_prokka, test_rnaspades, test_transdecoder] + profile: [test, test_prokka, test_spades, test_transdecoder] steps: - name: Check out pipeline code uses: actions/checkout@v2 diff --git a/CHANGELOG.md b/CHANGELOG.md index b8a119e9..dec20584 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.0.1 - [date] +## v1.1.0 - [date] ### `Added` - [#271](<[https://github.com/nf-core/metatdenovo/issues/271](https://github.com/nf-core/metatdenovo/issues/271)>) - Added flavor to SPADES modules @@ -20,6 +20,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Deprecated` +## v1.0.1 - [2024-04-02] + +### `Fixed` + +- [#277](https://github.com/nf-core/metatdenovo/pull/277) - Fix mistake in how `--eukulele_db` parameter is handled. Remove possibility to use a list of dbs in the same run. +- [#277](https://github.com/nf-core/metatdenovo/pull/277) - Gzip user provided assembly files to avoid overwriting by assuming they're already zipped. + ## v1.0.0 - [2024-02-15] Initial release of nf-core/metatdenovo, created with the [nf-core](https://nf-co.re/) template. diff --git a/modules.json b/modules.json index 5da9c3aa..eeeb17cb 100644 --- a/modules.json +++ b/modules.json @@ -55,6 +55,11 @@ "git_sha": "9e71d8519dfbfc328c078bba14d4bd4c99e39a94", "installed_by": ["modules"] }, + "pigz/compress": { + "branch": "master", + "git_sha": "0eab94fc1e48703c1b0a8704bd665f554905c39d", + "installed_by": ["modules"] + }, "prodigal": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", diff --git a/modules/nf-core/pigz/compress/environment.yml b/modules/nf-core/pigz/compress/environment.yml new file mode 100644 index 00000000..7551d187 --- /dev/null +++ b/modules/nf-core/pigz/compress/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "pigz_compress" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "pigz=2.8" diff --git a/modules/nf-core/pigz/compress/main.nf b/modules/nf-core/pigz/compress/main.nf new file mode 100644 index 00000000..152e7006 --- /dev/null +++ b/modules/nf-core/pigz/compress/main.nf @@ -0,0 +1,45 @@ +process PIGZ_COMPRESS { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pigz:2.8': + 'biocontainers/pigz:2.8' }" + + input: + tuple val(meta), path(raw_file) + + output: + tuple val(meta), path("$archive"), emit: archive + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + archive = raw_file.toString() + ".gz" + """ + # Note: needs --stdout for pigz to avoid the following issue: + # pigz: skipping: ${raw_file} is a symbolic link + pigz --processes $task.cpus --stdout --force ${args} ${raw_file} > ${archive} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz:\$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + archive = raw_file.toString() + ".gz" + """ + touch ${archive} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz:\$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/pigz/compress/meta.yml b/modules/nf-core/pigz/compress/meta.yml new file mode 100644 index 00000000..42efd735 --- /dev/null +++ b/modules/nf-core/pigz/compress/meta.yml @@ -0,0 +1,47 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "pigz_compress" +description: Compresses files with pigz. +keywords: + - compress + - gzip + - parallelized +tools: + - "pigz": + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - raw_file: + type: file + description: File to be compressed + pattern: "*.*" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - archive: + type: file + description: The compressed file + pattern: "*.gz" + + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@leoisl" +maintainers: + - "@leoisl" diff --git a/modules/nf-core/pigz/compress/tests/main.nf.test b/modules/nf-core/pigz/compress/tests/main.nf.test new file mode 100644 index 00000000..248d40fb --- /dev/null +++ b/modules/nf-core/pigz/compress/tests/main.nf.test @@ -0,0 +1,49 @@ +nextflow_process { + name "Test Process PIGZ_COMPRESS" + script "../main.nf" + process "PIGZ_COMPRESS" + + tag "modules" + tag "modules_nfcore" + tag "pigz" + tag "pigz/compress" + + test("sarscov2 - genome - fasta") { + when { + process { + """ + input[0] = [ + [ id:'test'], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - genome - fasta - stub") { + options "-stub-run" + when { + process { + """ + input[0] = [ + [ id:'test'], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.archive[0][1]).name).match() } + ) + } + } +} diff --git a/modules/nf-core/pigz/compress/tests/main.nf.test.snap b/modules/nf-core/pigz/compress/tests/main.nf.test.snap new file mode 100644 index 00000000..6e50456f --- /dev/null +++ b/modules/nf-core/pigz/compress/tests/main.nf.test.snap @@ -0,0 +1,37 @@ +{ + "sarscov2 - genome - fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.fasta.gz:md5,6e9fe4042a72f2345f644f239272b7e6" + ] + ], + "1": [ + "versions.yml:md5,ca30e9e1ffa1394ba7eefdac8cf3a3ad" + ], + "archive": [ + [ + { + "id": "test" + }, + "genome.fasta.gz:md5,6e9fe4042a72f2345f644f239272b7e6" + ] + ], + "versions": [ + "versions.yml:md5,ca30e9e1ffa1394ba7eefdac8cf3a3ad" + ] + } + ], + "timestamp": "2023-12-11T22:39:53.350546" + }, + "sarscov2 - genome - fasta - stub": { + "content": [ + "genome.fasta.gz" + ], + "timestamp": "2023-12-11T22:52:24.309192" + } +} \ No newline at end of file diff --git a/modules/nf-core/pigz/compress/tests/tags.yml b/modules/nf-core/pigz/compress/tests/tags.yml new file mode 100644 index 00000000..42c46bfa --- /dev/null +++ b/modules/nf-core/pigz/compress/tests/tags.yml @@ -0,0 +1,2 @@ +pigz/compress: + - "modules/nf-core/pigz/compress/**" diff --git a/nextflow_schema.json b/nextflow_schema.json index 8c357d34..1effcd65 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -330,7 +330,7 @@ "type": "string", "enum": ["gtdb", "phylodb", "marmmetsp", "mmetsp", "eukprot"], "description": "EUKulele database.", - "help_text": "This option allows the user to specify which database (or set of databases, separated by comma) to use with EUKulele. Databases that are provided with EUKulele will be downloaded if not already present inside the database directory (see --eukulele_dbpath). Possible alternatives: phylodb, mmetsp, marmmetsp, eukprot. NB: you can't use this option with a custom database as eukulele will not recognize the name and it will start to download phylodb by default. If you want to use a custom database, please skip this option and specify only --eukulele_dbpath.", + "help_text": "This option allows the user to specify which database to use with EUKulele. Databases that are provided with EUKulele will be downloaded if not already present inside the database directory (see --eukulele_dbpath). Possible alternatives: phylodb, mmetsp, marmmetsp, eukprot. NB: you can't use this option with a custom database as eukulele will not recognize the name and it will start to download phylodb by default. If you want to use a custom database, please skip this option and specify only --eukulele_dbpath.", "fa_icon": "far fa-file-code" }, "eukulele_dbpath": { diff --git a/workflows/metatdenovo.nf b/workflows/metatdenovo.nf index 5d932b5a..33d0745f 100644 --- a/workflows/metatdenovo.nf +++ b/workflows/metatdenovo.nf @@ -121,6 +121,7 @@ include { CAT_FASTQ } from '../modules/nf-core/ include { FASTQC } from '../modules/nf-core/fastqc/main' include { MULTIQC } from '../modules/nf-core/multiqc/main' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +include { PIGZ_COMPRESS as PIGZ_ASSEMBLY } from '../modules/nf-core/pigz/compress/main' // // SUBWORKFLOWS: Installed directly from nf-core/modules @@ -267,9 +268,15 @@ workflow METATDENOVO { // MODULE: Run Megahit or Spades on all interleaved fastq files // if ( params.assembly ) { - Channel - .value ( [ [ id: 'user_assembly' ], file(params.assembly) ] ) - .set { ch_assembly_contigs } + // If the input assembly is not gzipped, do that since all downstream calls assume this + if ( ! params.assembly.endsWith('.gz') ) { + PIGZ_ASSEMBLY(Channel.fromPath(params.assembly).map { [ [ id:params.assembly ], it ] } ) + PIGZ_ASSEMBLY.out.archive.first().set { ch_assembly_contigs } + } else { + Channel + .value ( [ [ id: 'user_assembly' ], file(params.assembly) ] ) + .set { ch_assembly_contigs } + } } else if ( assembler == 'spades' ) { // 1. Write a yaml file for Spades WRITESPADESYAML ( @@ -308,7 +315,9 @@ workflow METATDENOVO { .map { [ [ id: 'megahit' ], it ] } .set { ch_assembly_contigs } ch_versions = ch_versions.mix(MEGAHIT_INTERLEAVED.out.versions) - } else { error 'Assembler not specified!' } + } else { + error 'Assembler not specified!' + } // If the user asked for length filtering, perform that with SEQTK_SEQ (the actual length parameter is used in modules.config) if ( params.min_contig_length > 0 ) { @@ -474,11 +483,11 @@ workflow METATDENOVO { // SUBWORKFLOW: Eukulele // ch_eukulele_db = Channel.empty() - if( !params.skip_eukulele){ + if( ! params.skip_eukulele ) { // Create a channel for EUKulele either with a named database or not. The latter means a user-provided database in a directory. if ( params.eukulele_db ) { Channel - .fromList ( params.eukulele_db.split(',') ) + .of ( params.eukulele_db ) .map { [ it, file(params.eukulele_dbpath) ] } .set { ch_eukulele_db } } else {