From 6f38499f76edfbf2e0c8712a52e43c5ccab33a1a Mon Sep 17 00:00:00 2001 From: "Diego Alvarez S." Date: Mon, 28 Oct 2024 17:31:36 -0300 Subject: [PATCH 01/15] Testing param --- nextflow.config | 1 + nextflow_schema.json | 5 +++++ workflows/mag.nf | 20 ++++++++++++-------- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/nextflow.config b/nextflow.config index 07325acf..ea0a87e5 100644 --- a/nextflow.config +++ b/nextflow.config @@ -121,6 +121,7 @@ params { refine_bins_dastool = false refine_bins_dastool_threshold = 0.5 postbinning_input = 'raw_bins_only' + include_unbins_in_postbinning = true // Bin QC skip_binqc = false diff --git a/nextflow_schema.json b/nextflow_schema.json index ceb3ac08..26342674 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -704,6 +704,11 @@ "type": "integer", "default": 3000, "description": "Minimum contig length for Tiara to use for domain classification. For accurate classification, should be longer than 3000 bp." + }, + "include_unbins_in_postbinning": { + "type": "boolean", + "description": "Include unbinned contigs in the post-binning output (.", + "default": true } } }, diff --git a/workflows/mag.nf b/workflows/mag.nf index 7afb4316..27262116 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -769,7 +769,11 @@ workflow MAG { ch_input_for_postbinning_bins_unbins = ch_binning_results_bins.mix(ch_binning_results_unbins) } - DEPTHS(ch_input_for_postbinning_bins_unbins, BINNING.out.metabat2depths, ch_short_reads) + def ch_input_for_postbinning = params.include_unbins_in_postbinning + ? ch_input_for_postbinning_bins_unbins + : ch_input_for_postbinning_bins + + DEPTHS(ch_input_for_postbinning, BINNING.out.metabat2depths, ch_short_reads) ch_input_for_binsummary = DEPTHS.out.depths_summary ch_versions = ch_versions.mix(DEPTHS.out.versions) @@ -777,7 +781,7 @@ workflow MAG { * Bin QC subworkflows: for checking bin completeness with either BUSCO, CHECKM, and/or GUNC */ - ch_input_bins_for_qc = ch_input_for_postbinning_bins_unbins.transpose() + ch_input_bins_for_qc = ch_input_for_postbinning.transpose() if (!params.skip_binqc && params.binqc_tool == 'busco') { /* @@ -821,7 +825,7 @@ workflow MAG { ch_versions = ch_versions.mix(GUNC_QC.out.versions) } else if (params.run_gunc) { - ch_input_bins_for_gunc = ch_input_for_postbinning_bins_unbins.filter { meta, bins -> + ch_input_bins_for_gunc = ch_input_for_postbinning.filter { meta, bins -> meta.domain != "eukarya" } GUNC_QC(ch_input_bins_for_qc, ch_gunc_db, []) @@ -830,7 +834,7 @@ workflow MAG { ch_quast_bins_summary = Channel.empty() if (!params.skip_quast) { - ch_input_for_quast_bins = ch_input_for_postbinning_bins_unbins + ch_input_for_quast_bins = ch_input_for_postbinning .groupTuple() .map { meta, bins -> def new_bins = bins.flatten() @@ -859,7 +863,7 @@ workflow MAG { ch_cat_db = CAT_DB_GENERATE.out.db } CAT( - ch_input_for_postbinning_bins_unbins, + ch_input_for_postbinning, ch_cat_db ) // Group all classification results for each sample in a single file @@ -890,7 +894,7 @@ workflow MAG { ch_gtdbtk_summary = Channel.empty() if (gtdb) { - ch_gtdb_bins = ch_input_for_postbinning_bins_unbins.filter { meta, bins -> + ch_gtdb_bins = ch_input_for_postbinning.filter { meta, bins -> meta.domain != "eukarya" } @@ -925,7 +929,7 @@ workflow MAG { */ if (!params.skip_prokka) { - ch_bins_for_prokka = ch_input_for_postbinning_bins_unbins + ch_bins_for_prokka = ch_input_for_postbinning .transpose() .map { meta, bin -> def meta_new = meta + [id: bin.getBaseName()] @@ -944,7 +948,7 @@ workflow MAG { } if (!params.skip_metaeuk && (params.metaeuk_db || params.metaeuk_mmseqs_db)) { - ch_bins_for_metaeuk = ch_input_for_postbinning_bins_unbins + ch_bins_for_metaeuk = ch_input_for_postbinning .transpose() .filter { meta, bin -> meta.domain in ["eukarya", "unclassified"] From dca7b71aa40d958a014e03b967ae62b7d5f45081 Mon Sep 17 00:00:00 2001 From: "Diego Alvarez S." Date: Mon, 28 Oct 2024 17:55:26 -0300 Subject: [PATCH 02/15] Complete schema description --- nextflow_schema.json | 2 +- workflows/mag.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 26342674..f11868b6 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -707,7 +707,7 @@ }, "include_unbins_in_postbinning": { "type": "boolean", - "description": "Include unbinned contigs in the post-binning output (.", + "description": "Include unbinned contigs in the post-binning steps (bin qc, taxonomic classification and annotation).", "default": true } } diff --git a/workflows/mag.nf b/workflows/mag.nf index 27262116..9a02d1b7 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -769,7 +769,7 @@ workflow MAG { ch_input_for_postbinning_bins_unbins = ch_binning_results_bins.mix(ch_binning_results_unbins) } - def ch_input_for_postbinning = params.include_unbins_in_postbinning + ch_input_for_postbinning = params.include_unbins_in_postbinning ? ch_input_for_postbinning_bins_unbins : ch_input_for_postbinning_bins From dd7f135f9816c483c2bc3d0b23c704b85f554396 Mon Sep 17 00:00:00 2001 From: "Diego Alvarez S." Date: Mon, 28 Oct 2024 17:55:33 -0300 Subject: [PATCH 03/15] Fix channel --- workflows/mag.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/mag.nf b/workflows/mag.nf index 9a02d1b7..19ea6be2 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -828,7 +828,7 @@ workflow MAG { ch_input_bins_for_gunc = ch_input_for_postbinning.filter { meta, bins -> meta.domain != "eukarya" } - GUNC_QC(ch_input_bins_for_qc, ch_gunc_db, []) + GUNC_QC(ch_input_bins_for_gunc, ch_gunc_db, []) ch_versions = ch_versions.mix(GUNC_QC.out.versions) } From 705198e3d2437ca1759bde33cff4586cee8b9224 Mon Sep 17 00:00:00 2001 From: "Diego Alvarez S." Date: Mon, 28 Oct 2024 18:13:07 -0300 Subject: [PATCH 04/15] Invert parameter --- nextflow.config | 2 +- nextflow_schema.json | 6 +++--- workflows/mag.nf | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/nextflow.config b/nextflow.config index ea0a87e5..2bca0b93 100644 --- a/nextflow.config +++ b/nextflow.config @@ -121,7 +121,7 @@ params { refine_bins_dastool = false refine_bins_dastool_threshold = 0.5 postbinning_input = 'raw_bins_only' - include_unbins_in_postbinning = true + exclude_unbins_in_postbinning = false // Bin QC skip_binqc = false diff --git a/nextflow_schema.json b/nextflow_schema.json index f11868b6..233f042a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -705,10 +705,10 @@ "default": 3000, "description": "Minimum contig length for Tiara to use for domain classification. For accurate classification, should be longer than 3000 bp." }, - "include_unbins_in_postbinning": { + "exclude_unbins_in_postbinning": { "type": "boolean", - "description": "Include unbinned contigs in the post-binning steps (bin qc, taxonomic classification and annotation).", - "default": true + "description": "Exclude unbinned contigs in the post-binning steps (bin qc, taxonomic classification and annotation).", + "default": false } } }, diff --git a/workflows/mag.nf b/workflows/mag.nf index 19ea6be2..6ae9bce7 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -769,9 +769,9 @@ workflow MAG { ch_input_for_postbinning_bins_unbins = ch_binning_results_bins.mix(ch_binning_results_unbins) } - ch_input_for_postbinning = params.include_unbins_in_postbinning - ? ch_input_for_postbinning_bins_unbins - : ch_input_for_postbinning_bins + ch_input_for_postbinning = params.exclude_unbins_in_postbinning + ? ch_input_for_postbinning_bins + : ch_input_for_postbinning_bins_unbins DEPTHS(ch_input_for_postbinning, BINNING.out.metabat2depths, ch_short_reads) ch_input_for_binsummary = DEPTHS.out.depths_summary From 0149ed214db563b40e12db84845a065c19bde56b Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 29 Oct 2024 14:02:44 +0000 Subject: [PATCH 05/15] Prepare for patch --- .nf-core.yml | 2 +- CHANGELOG.md | 2 ++ assets/multiqc_config.yml | 4 ++-- nextflow.config | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.nf-core.yml b/.nf-core.yml index 558d7eee..5e2971cf 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -18,5 +18,5 @@ template: org: nf-core outdir: . skip_features: null - version: 3.3.0dev + version: 3.0.1 update: null diff --git a/CHANGELOG.md b/CHANGELOG.md index a902cd7b..0156701b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` +- [#707](https://github.com/nf-core/mag/pull/674) - Fix missing space resulting in malformed args for MEGAHIT (reported by @d4straub, fix by @jfy133) + ### `Dependencies` ### `Deprecated` diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index fe8d388f..434dfa7b 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/mag + This report has been generated by the nf-core/mag analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-mag-methods-description": order: -1000 diff --git a/nextflow.config b/nextflow.config index 07325acf..4c5e67fa 100644 --- a/nextflow.config +++ b/nextflow.config @@ -375,7 +375,7 @@ manifest { description = """Assembly, binning and annotation of metagenomes""" mainScript = 'main.nf' nextflowVersion = '!>=24.04.2' - version = '3.3.0dev' + version = '3.2.1' doi = '10.1093/nargab/lqac007' } From 229197447c910cc19fe5ad411e454562b582ed07 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 29 Oct 2024 14:05:08 +0000 Subject: [PATCH 06/15] Update CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0156701b..8296e383 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## dev [unreleased] +## 3.2.1 [2024-10-29] ### `Added` From dbe49fe704e3f0baaa5c60e198ebd8ba268d2b06 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 29 Oct 2024 15:05:54 +0100 Subject: [PATCH 07/15] Add sapces to MEGAHIT options to stop breaking args list --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 0fbea292..d6ee1d73 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -257,7 +257,7 @@ process { } withName: MEGAHIT { - ext.args = { params.megahit_options ? params.megahit_options + "-m ${task.memory.toBytes()}" : "-m ${task.memory.toBytes()}" } + ext.args = { params.megahit_options ? params.megahit_options + " -m ${task.memory.toBytes()}" : " -m ${task.memory.toBytes()}" } ext.prefix = { "MEGAHIT-${meta.id}" } publishDir = [path: { "${params.outdir}/Assembly/MEGAHIT" }, mode: params.publish_dir_mode, pattern: "*.{fa.gz,log}"] } From 4c43894c156d77d04bfd2f73c220ef795463a947 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 29 Oct 2024 15:06:40 +0100 Subject: [PATCH 08/15] Update .nf-core.yml --- .nf-core.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.nf-core.yml b/.nf-core.yml index 5e2971cf..f945d00c 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -18,5 +18,5 @@ template: org: nf-core outdir: . skip_features: null - version: 3.0.1 + version: 3.2.1 update: null From 52ccb1cdf4e4d4861a4521b51da3c6f6acec84ba Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 29 Oct 2024 15:53:58 +0100 Subject: [PATCH 09/15] Update modules.config --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index d6ee1d73..507e475f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -257,7 +257,7 @@ process { } withName: MEGAHIT { - ext.args = { params.megahit_options ? params.megahit_options + " -m ${task.memory.toBytes()}" : " -m ${task.memory.toBytes()}" } + ext.args = { params.megahit_options ? params.megahit_options + " -m ${task.memory.toBytes()}" : "-m ${task.memory.toBytes()}" } ext.prefix = { "MEGAHIT-${meta.id}" } publishDir = [path: { "${params.outdir}/Assembly/MEGAHIT" }, mode: params.publish_dir_mode, pattern: "*.{fa.gz,log}"] } From 9504788600dd56f5721050ae03ae32da2dcdb26e Mon Sep 17 00:00:00 2001 From: "Diego Alvarez S." Date: Wed, 30 Oct 2024 02:17:47 -0300 Subject: [PATCH 10/15] Apply suggestions from code review Co-authored-by: James A. Fellows Yates --- nextflow.config | 2 +- nextflow_schema.json | 5 +++-- workflows/mag.nf | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/nextflow.config b/nextflow.config index 2bca0b93..1ecabbdc 100644 --- a/nextflow.config +++ b/nextflow.config @@ -121,7 +121,7 @@ params { refine_bins_dastool = false refine_bins_dastool_threshold = 0.5 postbinning_input = 'raw_bins_only' - exclude_unbins_in_postbinning = false + exclude_unbins_from_postbinning = false // Bin QC skip_binqc = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 233f042a..923306e5 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -705,9 +705,10 @@ "default": 3000, "description": "Minimum contig length for Tiara to use for domain classification. For accurate classification, should be longer than 3000 bp." }, - "exclude_unbins_in_postbinning": { + "exclude_unbins_from_postbinning": { "type": "boolean", - "description": "Exclude unbinned contigs in the post-binning steps (bin qc, taxonomic classification and annotation).", + "description": "Exclude unbinned contigs in the post-binning steps (bin QC, taxonomic classification, and annotation steps).", + "help": "If you're not interested in assemblies results that are not considered 'genome level', excluding unbinned contigs can greatly speed up downstream steps such as Prokka, that can be quite slow and spin up many tasks.", "default": false } } diff --git a/workflows/mag.nf b/workflows/mag.nf index 6ae9bce7..356c39fe 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -769,7 +769,7 @@ workflow MAG { ch_input_for_postbinning_bins_unbins = ch_binning_results_bins.mix(ch_binning_results_unbins) } - ch_input_for_postbinning = params.exclude_unbins_in_postbinning + ch_input_for_postbinning = params.exclude_unbins_from_postbinning ? ch_input_for_postbinning_bins : ch_input_for_postbinning_bins_unbins From af7300f290954bc8a3f82f52ff2576296fb095cc Mon Sep 17 00:00:00 2001 From: "Diego Alvarez S." Date: Wed, 30 Oct 2024 02:24:25 -0300 Subject: [PATCH 11/15] Update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a902cd7b..7e7c3b9f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,10 +6,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## dev [unreleased] ### `Added` +- [#708](https://github.com/nf-core/mag/pull/708) - Added `dev-exclude-unbinned` parameter to exclude unbinned contigs from post-binning processes (added by @dialvarezs) ### `Changed` ### `Fixed` +- [#708](https://github.com/nf-core/mag/pull/708) - Fixed channel passed as GUNC input (added by @dialvarezs) ### `Dependencies` From aa2520147105030d3a4072ba2e61cd705bcaaba1 Mon Sep 17 00:00:00 2001 From: "Diego Alvarez S." Date: Wed, 30 Oct 2024 02:25:37 -0300 Subject: [PATCH 12/15] Pre-commit --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e7c3b9f..77b15c09 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,11 +6,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## dev [unreleased] ### `Added` + - [#708](https://github.com/nf-core/mag/pull/708) - Added `dev-exclude-unbinned` parameter to exclude unbinned contigs from post-binning processes (added by @dialvarezs) ### `Changed` ### `Fixed` + - [#708](https://github.com/nf-core/mag/pull/708) - Fixed channel passed as GUNC input (added by @dialvarezs) ### `Dependencies` From daa493f46ebcee8f25d1cb25307d59b9564b2aaf Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 30 Oct 2024 10:15:41 +0100 Subject: [PATCH 13/15] Apply suggestions from code review --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8296e383..f7b929e3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## 3.2.1 [2024-10-29] +## 3.2.1 [2024-10-30] ### `Added` From 8c46072e939b1db25c4c0ba3386753890150fca7 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 30 Oct 2024 10:31:26 +0100 Subject: [PATCH 14/15] Post patch bump --- .nf-core.yml | 2 +- CHANGELOG.md | 12 ++++++++++++ assets/multiqc_config.yml | 4 ++-- nextflow.config | 2 +- 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/.nf-core.yml b/.nf-core.yml index f945d00c..558d7eee 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -18,5 +18,5 @@ template: org: nf-core outdir: . skip_features: null - version: 3.2.1 + version: 3.3.0dev update: null diff --git a/CHANGELOG.md b/CHANGELOG.md index f7b929e3..1e6ad844 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,18 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## dev [unreleased] + +### `Added` + +### `Changed` + +### `Fixed` + +### `Dependencies` + +### `Deprecated` + ## 3.2.1 [2024-10-30] ### `Added` diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 434dfa7b..fe8d388f 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/mag + This report has been generated by the nf-core/mag analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-mag-methods-description": order: -1000 diff --git a/nextflow.config b/nextflow.config index 4c5e67fa..07325acf 100644 --- a/nextflow.config +++ b/nextflow.config @@ -375,7 +375,7 @@ manifest { description = """Assembly, binning and annotation of metagenomes""" mainScript = 'main.nf' nextflowVersion = '!>=24.04.2' - version = '3.2.1' + version = '3.3.0dev' doi = '10.1093/nargab/lqac007' } From 1a00c14f3e2a6ead1f03f9ac73052f80d9f58e3d Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 30 Oct 2024 10:48:55 +0100 Subject: [PATCH 15/15] Apply suggestions from code review --- CHANGELOG.md | 2 +- nextflow_schema.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 77b15c09..f46adf9c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` -- [#708](https://github.com/nf-core/mag/pull/708) - Added `dev-exclude-unbinned` parameter to exclude unbinned contigs from post-binning processes (added by @dialvarezs) +- [#708](https://github.com/nf-core/mag/pull/708) - Added `--exclude_unbins_from_postbinning` parameter to exclude unbinned contigs from post-binning processes, speeding up Prokka in some cases (added by @dialvarezs) ### `Changed` diff --git a/nextflow_schema.json b/nextflow_schema.json index 923306e5..0875606d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -708,7 +708,7 @@ "exclude_unbins_from_postbinning": { "type": "boolean", "description": "Exclude unbinned contigs in the post-binning steps (bin QC, taxonomic classification, and annotation steps).", - "help": "If you're not interested in assemblies results that are not considered 'genome level', excluding unbinned contigs can greatly speed up downstream steps such as Prokka, that can be quite slow and spin up many tasks.", + "help": "If you're not interested in assemby results that are not considered 'genome level', excluding unbinned contigs can greatly speed up downstream steps such as Prokka, that can be quite slow and spin up many tasks.", "default": false } }