From 2cd58207d31fb58b56de2b87b951ff7a53719f55 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Tue, 2 Apr 2024 13:48:31 +0200 Subject: [PATCH 1/8] Fix bug in how eukulele_db was handled --- CHANGELOG.md | 4 ++++ nextflow_schema.json | 2 +- workflows/metatdenovo.nf | 6 ++++-- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e6e05ffe..71efe6af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v1.0.1 - [2024-04-02] + +- [#N](path) - Fix mistake in how `--eukulele_db` parameter is handled. Remove possibility to use a list of dbs in the same run. + ## v1.0.0 - [date] Initial release of nf-core/metatdenovo, created with the [nf-core](https://nf-co.re/) template. diff --git a/nextflow_schema.json b/nextflow_schema.json index ed156e20..accea5f2 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -326,7 +326,7 @@ "type": "string", "enum": ["gtdb", "phylodb", "marmmetsp", "mmetsp", "eukprot"], "description": "EUKulele database.", - "help_text": "This option allows the user to specify which database (or set of databases, separated by comma) to use with EUKulele. Databases that are provided with EUKulele will be downloaded if not already present inside the database directory (see --eukulele_dbpath). Possible alternatives: phylodb, mmetsp, marmmetsp, eukprot. NB: you can't use this option with a custom database as eukulele will not recognize the name and it will start to download phylodb by default. If you want to use a custom database, please skip this option and specify only --eukulele_dbpath.", + "help_text": "This option allows the user to specify which database to use with EUKulele. Databases that are provided with EUKulele will be downloaded if not already present inside the database directory (see --eukulele_dbpath). Possible alternatives: phylodb, mmetsp, marmmetsp, eukprot. NB: you can't use this option with a custom database as eukulele will not recognize the name and it will start to download phylodb by default. If you want to use a custom database, please skip this option and specify only --eukulele_dbpath.", "fa_icon": "far fa-file-code" }, "eukulele_dbpath": { diff --git a/workflows/metatdenovo.nf b/workflows/metatdenovo.nf index ea51fb76..12674dec 100644 --- a/workflows/metatdenovo.nf +++ b/workflows/metatdenovo.nf @@ -4,6 +4,8 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +System.out.println "param: '${params.eukulele_db}', split: ${params.eukulele_db.split(',')}" + include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) @@ -469,11 +471,11 @@ workflow METATDENOVO { // SUBWORKFLOW: Eukulele // ch_eukulele_db = Channel.empty() - if( !params.skip_eukulele){ + if( ! params.skip_eukulele ) { // Create a channel for EUKulele either with a named database or not. The latter means a user-provided database in a directory. if ( params.eukulele_db ) { Channel - .fromList ( params.eukulele_db.split(',') ) + .of ( params.eukulele_db ) .map { [ it, file(params.eukulele_dbpath) ] } .set { ch_eukulele_db } } else { From b467024a44886b99110db5635d559fcc05d9988b Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Tue, 2 Apr 2024 16:20:02 +0200 Subject: [PATCH 2/8] Gzip user specified assembly file if unzipped --- CHANGELOG.md | 1 + modules.json | 5 +++++ workflows/metatdenovo.nf | 17 +++++++++++++---- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 71efe6af..5ed8fa87 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## v1.0.1 - [2024-04-02] - [#N](path) - Fix mistake in how `--eukulele_db` parameter is handled. Remove possibility to use a list of dbs in the same run. +- [#N](path) - Gzip user provided assembly files to avoid overwriting by assuming they're already zipped. ## v1.0.0 - [date] diff --git a/modules.json b/modules.json index 7ad9351c..34e00d49 100644 --- a/modules.json +++ b/modules.json @@ -55,6 +55,11 @@ "git_sha": "9e71d8519dfbfc328c078bba14d4bd4c99e39a94", "installed_by": ["modules"] }, + "pigz/compress": { + "branch": "master", + "git_sha": "0eab94fc1e48703c1b0a8704bd665f554905c39d", + "installed_by": ["modules"] + }, "prodigal": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", diff --git a/workflows/metatdenovo.nf b/workflows/metatdenovo.nf index 12674dec..b4f6354a 100644 --- a/workflows/metatdenovo.nf +++ b/workflows/metatdenovo.nf @@ -123,6 +123,7 @@ include { CAT_FASTQ } from '../modules/nf-core/ include { FASTQC } from '../modules/nf-core/fastqc/main' include { MULTIQC } from '../modules/nf-core/multiqc/main' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +include { PIGZ_COMPRESS as PIGZ_ASSEMBLY } from '../modules/nf-core/pigz/compress/main' // // SUBWORKFLOWS: Installed directly from nf-core/modules @@ -269,9 +270,15 @@ workflow METATDENOVO { // MODULE: Run Megahit or RNAspades on all interleaved fastq files // if ( params.assembly ) { - Channel - .value ( [ [ id: 'user_assembly' ], file(params.assembly) ] ) - .set { ch_assembly_contigs } + // If the input assembly is not gzipped, do that since all downstream calls assume this + if ( ! params.assembly.endsWith('.gz') ) { + PIGZ_ASSEMBLY(Channel.fromPath(params.assembly).map { [ [ id:params.assembly ], it ] } ) + PIGZ_ASSEMBLY.out.archive.first().set { ch_assembly_contigs } + } else { + Channel + .value ( [ [ id: 'user_assembly' ], file(params.assembly) ] ) + .set { ch_assembly_contigs } + } } else if ( assembler == 'rnaspades' ) { // 1. Write a yaml file for Spades WRITESPADESYAML ( @@ -305,7 +312,9 @@ workflow METATDENOVO { .map { [ [ id: 'megahit' ], it ] } .set { ch_assembly_contigs } ch_versions = ch_versions.mix(MEGAHIT_INTERLEAVED.out.versions) - } else { error 'Assembler not specified!' } + } else { + error 'Assembler not specified!' + } // If the user asked for length filtering, perform that with SEQTK_SEQ (the actual length parameter is used in modules.config) if ( params.min_contig_length > 0 ) { From 064a130df406431bc0ed6b652bde29d1d980bc83 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Tue, 2 Apr 2024 16:22:04 +0200 Subject: [PATCH 3/8] Bump to version 1.0.1 --- assets/multiqc_config.yml | 4 ++-- nextflow.config | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 4c8e1f76..e0817b8b 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/metatdenovo + This report has been generated by the nf-core/metatdenovo analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-metatdenovo-methods-description": order: -1000 diff --git a/nextflow.config b/nextflow.config index 266de3d6..8af2eac3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -288,7 +288,7 @@ manifest { description = """Assembly and annotation of metatranscriptomic data, both prokaryotic and eukaryotic""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '1.0.0' + version = '1.0.1' doi = '' } From d10b1172d8ddd77d2d0e834e4d15937c1e99c1b4 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Tue, 2 Apr 2024 16:27:55 +0200 Subject: [PATCH 4/8] Update CHANGELOG.md --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ed8fa87..8c1aae43 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,8 +5,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## v1.0.1 - [2024-04-02] -- [#N](path) - Fix mistake in how `--eukulele_db` parameter is handled. Remove possibility to use a list of dbs in the same run. -- [#N](path) - Gzip user provided assembly files to avoid overwriting by assuming they're already zipped. +- [#277](https://github.com/nf-core/metatdenovo/pull/277) - Fix mistake in how `--eukulele_db` parameter is handled. Remove possibility to use a list of dbs in the same run. +- [#277](https://github.com/nf-core/metatdenovo/pull/277) - Gzip user provided assembly files to avoid overwriting by assuming they're already zipped. ## v1.0.0 - [date] From 7dd092828b00c7b141d787a3743a92129093c728 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Tue, 2 Apr 2024 20:34:46 +0200 Subject: [PATCH 5/8] Remove debug line --- workflows/metatdenovo.nf | 2 -- 1 file changed, 2 deletions(-) diff --git a/workflows/metatdenovo.nf b/workflows/metatdenovo.nf index b4f6354a..48d53c54 100644 --- a/workflows/metatdenovo.nf +++ b/workflows/metatdenovo.nf @@ -4,8 +4,6 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -System.out.println "param: '${params.eukulele_db}', split: ${params.eukulele_db.split(',')}" - include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) From 8ef699294acfc343b6623de993ce40d3d5c4f8a4 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Wed, 3 Apr 2024 11:01:00 +0200 Subject: [PATCH 6/8] Add pigz module --- modules/nf-core/pigz/compress/environment.yml | 9 ++++ modules/nf-core/pigz/compress/main.nf | 45 +++++++++++++++++ modules/nf-core/pigz/compress/meta.yml | 47 ++++++++++++++++++ .../nf-core/pigz/compress/tests/main.nf.test | 49 +++++++++++++++++++ .../pigz/compress/tests/main.nf.test.snap | 37 ++++++++++++++ modules/nf-core/pigz/compress/tests/tags.yml | 2 + 6 files changed, 189 insertions(+) create mode 100644 modules/nf-core/pigz/compress/environment.yml create mode 100644 modules/nf-core/pigz/compress/main.nf create mode 100644 modules/nf-core/pigz/compress/meta.yml create mode 100644 modules/nf-core/pigz/compress/tests/main.nf.test create mode 100644 modules/nf-core/pigz/compress/tests/main.nf.test.snap create mode 100644 modules/nf-core/pigz/compress/tests/tags.yml diff --git a/modules/nf-core/pigz/compress/environment.yml b/modules/nf-core/pigz/compress/environment.yml new file mode 100644 index 00000000..7551d187 --- /dev/null +++ b/modules/nf-core/pigz/compress/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "pigz_compress" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "pigz=2.8" diff --git a/modules/nf-core/pigz/compress/main.nf b/modules/nf-core/pigz/compress/main.nf new file mode 100644 index 00000000..152e7006 --- /dev/null +++ b/modules/nf-core/pigz/compress/main.nf @@ -0,0 +1,45 @@ +process PIGZ_COMPRESS { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pigz:2.8': + 'biocontainers/pigz:2.8' }" + + input: + tuple val(meta), path(raw_file) + + output: + tuple val(meta), path("$archive"), emit: archive + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + archive = raw_file.toString() + ".gz" + """ + # Note: needs --stdout for pigz to avoid the following issue: + # pigz: skipping: ${raw_file} is a symbolic link + pigz --processes $task.cpus --stdout --force ${args} ${raw_file} > ${archive} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz:\$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + archive = raw_file.toString() + ".gz" + """ + touch ${archive} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz:\$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/pigz/compress/meta.yml b/modules/nf-core/pigz/compress/meta.yml new file mode 100644 index 00000000..42efd735 --- /dev/null +++ b/modules/nf-core/pigz/compress/meta.yml @@ -0,0 +1,47 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "pigz_compress" +description: Compresses files with pigz. +keywords: + - compress + - gzip + - parallelized +tools: + - "pigz": + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - raw_file: + type: file + description: File to be compressed + pattern: "*.*" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - archive: + type: file + description: The compressed file + pattern: "*.gz" + + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@leoisl" +maintainers: + - "@leoisl" diff --git a/modules/nf-core/pigz/compress/tests/main.nf.test b/modules/nf-core/pigz/compress/tests/main.nf.test new file mode 100644 index 00000000..248d40fb --- /dev/null +++ b/modules/nf-core/pigz/compress/tests/main.nf.test @@ -0,0 +1,49 @@ +nextflow_process { + name "Test Process PIGZ_COMPRESS" + script "../main.nf" + process "PIGZ_COMPRESS" + + tag "modules" + tag "modules_nfcore" + tag "pigz" + tag "pigz/compress" + + test("sarscov2 - genome - fasta") { + when { + process { + """ + input[0] = [ + [ id:'test'], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - genome - fasta - stub") { + options "-stub-run" + when { + process { + """ + input[0] = [ + [ id:'test'], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.archive[0][1]).name).match() } + ) + } + } +} diff --git a/modules/nf-core/pigz/compress/tests/main.nf.test.snap b/modules/nf-core/pigz/compress/tests/main.nf.test.snap new file mode 100644 index 00000000..6e50456f --- /dev/null +++ b/modules/nf-core/pigz/compress/tests/main.nf.test.snap @@ -0,0 +1,37 @@ +{ + "sarscov2 - genome - fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.fasta.gz:md5,6e9fe4042a72f2345f644f239272b7e6" + ] + ], + "1": [ + "versions.yml:md5,ca30e9e1ffa1394ba7eefdac8cf3a3ad" + ], + "archive": [ + [ + { + "id": "test" + }, + "genome.fasta.gz:md5,6e9fe4042a72f2345f644f239272b7e6" + ] + ], + "versions": [ + "versions.yml:md5,ca30e9e1ffa1394ba7eefdac8cf3a3ad" + ] + } + ], + "timestamp": "2023-12-11T22:39:53.350546" + }, + "sarscov2 - genome - fasta - stub": { + "content": [ + "genome.fasta.gz" + ], + "timestamp": "2023-12-11T22:52:24.309192" + } +} \ No newline at end of file diff --git a/modules/nf-core/pigz/compress/tests/tags.yml b/modules/nf-core/pigz/compress/tests/tags.yml new file mode 100644 index 00000000..42c46bfa --- /dev/null +++ b/modules/nf-core/pigz/compress/tests/tags.yml @@ -0,0 +1,2 @@ +pigz/compress: + - "modules/nf-core/pigz/compress/**" From e9727e0bd299ff13bd6425a17544640d52296c02 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Wed, 3 Apr 2024 13:19:59 +0200 Subject: [PATCH 7/8] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 95c1709a..dec20584 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.1.0 - [2024-02-15] +## v1.1.0 - [date] ### `Added` - [#271](<[https://github.com/nf-core/metatdenovo/issues/271](https://github.com/nf-core/metatdenovo/issues/271)>) - Added flavor to SPADES modules From 2e703e08846549ef7d82f882c6f41854a4bdfddc Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Wed, 3 Apr 2024 15:26:20 +0200 Subject: [PATCH 8/8] Update ci.yml --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1aac440d..72f784a9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -47,7 +47,7 @@ jobs: strategy: matrix: # Run remaining test profiles with minimum nextflow version - profile: [test, test_prokka, test_rnaspades, test_transdecoder] + profile: [test, test_prokka, test_spades, test_transdecoder] steps: - name: Check out pipeline code uses: actions/checkout@v2