From 232ac581f91fa607124179386cf30c5fde5b5b63 Mon Sep 17 00:00:00 2001 From: d4straub Date: Mon, 15 Jul 2024 10:34:58 +0200 Subject: [PATCH 01/31] bump version to 2.11.0dev --- CHANGELOG.md | 12 ++++++++++++ assets/multiqc_config.yml | 4 ++-- nextflow.config | 2 +- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b938f472..b9a45dae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,18 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## nf-core/ampliseq version 2.11.0dev + +### `Added` + +### `Changed` + +### `Fixed` + +### `Dependencies` + +### `Removed` + ## nf-core/ampliseq version 2.10.0 - 2024-06-27 ### `Added` diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 0a63581f..ec828d82 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/ampliseq + This report has been generated by the nf-core/ampliseq analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-ampliseq-methods-description": order: -1000 diff --git a/nextflow.config b/nextflow.config index 81124505..7e0a4025 100644 --- a/nextflow.config +++ b/nextflow.config @@ -356,7 +356,7 @@ manifest { description = """Amplicon sequencing analysis workflow using DADA2 and QIIME2""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '2.10.0' + version = '2.11.0dev' doi = '10.5281/zenodo.1493841,10.3389/fmicb.2020.550420' } From 71cfa7905e10d52729ce5213aeb3f003e4544c6c Mon Sep 17 00:00:00 2001 From: d4straub Date: Mon, 15 Jul 2024 11:37:12 +0200 Subject: [PATCH 02/31] fix sample sheet tests with meta.id --- CHANGELOG.md | 2 ++ workflows/ampliseq.nf | 8 ++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b9a45dae..8fca35e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` +- [#761](https://github.com/nf-core/ampliseq/pull/761) - Some sample sheet checks were not applied due to changes in the metadata ["meta"] structure in version 2.9.0 + ### `Dependencies` ### `Removed` diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index fa221126..5105b545 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -268,13 +268,13 @@ workflow AMPLISEQ { ch_input_reads = Channel.empty() if ( params.input ) { // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ - ch_input_reads = Channel.fromSamplesheet("input") + ch_input_reads = Channel.fromSamplesheet("input") // meta: meta.sample, meta.run .map{ meta, readfw, readrv -> meta.single_end = single_end.toBoolean() def reads = single_end ? readfw : [readfw,readrv] - if ( !meta.single_end && !readrv ) { error("Entry `reverseReads` is missing in $params.input for $meta.id, either correct the samplesheet or use `--single_end`, `--pacbio`, or `--iontorrent`") } // make sure that reverse reads are present when single_end isnt specified - if ( !meta.single_end && ( readfw.getSimpleName() == meta.id || readrv.getSimpleName() == meta.id ) ) { error("Entry `sampleID` cannot be identical to simple name of `forwardReads` or `reverseReads`, please change `sampleID` in $params.input for sample $meta.id") } // sample name and any file name without extensions arent identical, because rename_raw_data_files.nf would forward 3 files (2 renamed +1 input) instead of 2 in that case - if ( meta.single_end && ( readfw.getSimpleName() == meta.id+"_1" || readfw.getSimpleName() == meta.id+"_2" ) ) { error("Entry `sampleID`+ `_1` or `_2` cannot be identical to simple name of `forwardReads`, please change `sampleID` in $params.input for sample $meta.id") } // sample name and file name without extensions arent identical, because rename_raw_data_files.nf would forward 2 files (1 renamed +1 input) instead of 1 in that case + if ( !meta.single_end && !readrv ) { error("Entry `reverseReads` is missing in $params.input for $meta.sample, either correct the samplesheet or use `--single_end`, `--pacbio`, or `--iontorrent`") } // make sure that reverse reads are present when single_end isnt specified + if ( !meta.single_end && ( readfw.getSimpleName() == meta.sample || readrv.getSimpleName() == meta.sample ) ) { error("Entry `sampleID` cannot be identical to simple name of `forwardReads` or `reverseReads`, please change `sampleID` in $params.input for sample $meta.sample") } // sample name and any file name without extensions arent identical, because rename_raw_data_files.nf would forward 3 files (2 renamed +1 input) instead of 2 in that case + if ( meta.single_end && ( readfw.getSimpleName() == meta.sample+"_1" || readfw.getSimpleName() == meta.sample+"_2" ) ) { error("Entry `sampleID`+ `_1` or `_2` cannot be identical to simple name of `forwardReads`, please change `sampleID` in $params.input for sample $meta.sample") } // sample name and file name without extensions arent identical, because rename_raw_data_files.nf would forward 2 files (1 renamed +1 input) instead of 1 in that case return [meta, reads] } } else if ( params.input_fasta ) { ch_input_fasta = Channel.fromPath(params.input_fasta, checkIfExists: true) From 08febedc56f8f8527f0c25cd2e1aaacd3382beb2 Mon Sep 17 00:00:00 2001 From: d4straub Date: Mon, 15 Jul 2024 13:46:29 +0200 Subject: [PATCH 03/31] update output docu: ASV post processing --- CHANGELOG.md | 2 ++ docs/output.md | 10 ++++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b9a45dae..d36313df 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` +- [#762](https://github.com/nf-core/ampliseq/pull/762) - Improved output documentation section "Optional ASV filtering" + ### `Fixed` ### `Dependencies` diff --git a/docs/output.md b/docs/output.md index 9c35b0a2..302af707 100644 --- a/docs/output.md +++ b/docs/output.md @@ -23,7 +23,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [Cutadapt](#cutadapt) - Primer trimming - [MultiQC](#multiqc) - Aggregate report describing results - [ASV inferrence with DADA2](#asv-inferrence-with-dada2) - Infer Amplicon Sequence Variants (ASVs) -- [Optional ASV filtering](#optional-asv-filtering) - Filter ASVs to optimize downstream analysis +- [Optional ASV post processing](#optional-asv-post-processing) - Filter ASVs to optimize downstream analysis - [VSEARCH cluster](#vsearch-cluster) - Centroid fasta file, filtered asv table, and stats - [Barrnap](#barrnap) - Predict ribosomal RNA sequences and optional filtering - [Length filter](#length-filter) - Optionally, ASV can be filtered by length thresholds @@ -163,7 +163,9 @@ For binned quality scores in NovaSeq data, monotonicity in the fitted error mode -### Optional ASV filtering +### Optional ASV post processing + +ASV post-processing takes place after DADA2's ASV computation (i.e. after chimera removal, for example table `ASV_tax.tsv`) but _before_ taxonomic classification. Post-processing will affect all downstream files. Clustering and filters are applied sequentially, in the same sequence as shown here. All filters are off by default and can be enabled by setting thresholds as detailed in the parameter documentation. #### VSEARCH cluster @@ -184,7 +186,7 @@ This directory will hold the centroid fasta file, the filtered asv count table ( Barrnap predicts the location of ribosomal RNA genes in genomes, here it can be used to discriminate rRNA sequences from potential contamination. It supports bacteria (5S,23S,16S), archaea (5S,5.8S,23S,16S), metazoan mitochondria (12S,16S) and eukaryotes (5S,5.8S,28S,18S). -Optionally, ASV sequences can be filtered for rRNA sequences identified by Barrnap with `--filter_ssu` that can take a list of abbreviations of the above supported categories (kingdoms), e.g. `bac,arc,mito,euk`. This filtering takes place after DADA2's ASV computation (i.e. after chimera removal) but _before_ taxonomic classification (also applies to above mentioned taxonomic classification with DADA2, i.e. files `ASV_tax.tsv` & `ASV_tax_species.tsv`). +Optionally, ASV sequences can be filtered for rRNA sequences identified by Barrnap with `--filter_ssu` that can take a list of abbreviations of the above supported categories (kingdoms), e.g. `bac,arc,mito,euk`.
Output files @@ -200,7 +202,7 @@ Optionally, ASV sequences can be filtered for rRNA sequences identified by Barrn #### Length filter -Optionally, a length filter can be used to reduce potential contamination after ASV computation. For example with 515f and 806r primers the majority of 16S rRNA amplicon sequences should have a length of 253 bp and amplicons vary significantely are likely spurious. +Optionally, a length filter can be used to reduce potential contamination after ASV computation. For example with 515f and 806r primers the majority of 16S rRNA amplicon sequences should have a length of 253 bp and amplicons that vary significantly are likely spurious. The minimum ASV length threshold can be set by `--min_len_asv` and the maximum length threshold with `--max_len_asv`. If no threshold is set, the filter (and output) is omitted. From 6558d3ee4028171d93e7d577bb15be15f73ec826 Mon Sep 17 00:00:00 2001 From: d4straub Date: Mon, 15 Jul 2024 14:09:39 +0200 Subject: [PATCH 04/31] update parameters docu: add description --- CHANGELOG.md | 2 +- nextflow_schema.json | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d36313df..37ddced3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` -- [#762](https://github.com/nf-core/ampliseq/pull/762) - Improved output documentation section "Optional ASV filtering" +- [#762](https://github.com/nf-core/ampliseq/pull/762) - Improved output documentation section "Optional ASV filtering" and parameter documentation ### `Fixed` diff --git a/nextflow_schema.json b/nextflow_schema.json index 1bba874b..ceaf9366 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -150,6 +150,7 @@ "primer_removal": { "title": "Primer removal", "type": "object", + "description": "Spurious sequences sometimes lack primer sequences and primers introduce errors that can be removed in that step", "default": "", "properties": { "retain_untrimmed": { @@ -188,7 +189,7 @@ "read_trimming_and_quality_filtering": { "title": "Read trimming and quality filtering", "type": "object", - "description": "", + "description": "Read trimming and quality filtering is supposed to reduce spurious results and aid error correction", "default": "", "properties": { "trunclenf": { @@ -271,6 +272,7 @@ "asv_post_processing": { "title": "ASV post processing", "type": "object", + "description": "ASV post-processing takes place after ASV computation but before taxonomic assignment, it will affect all downstream processes", "default": "", "properties": { "vsearch_cluster": { @@ -575,6 +577,7 @@ "title": "ASV filtering", "type": "object", "default": "", + "description": "Filtering by taxonomy or abundance will affect all downstream analysis", "fa_icon": "fas fa-filter", "properties": { "exclude_taxa": { @@ -600,7 +603,7 @@ "downstream_analysis": { "title": "Downstream analysis", "type": "object", - "description": "", + "description": "Metadata is used here to visualize data either for quality control or publication ready figures", "default": "", "fa_icon": "fas fa-bacteria", "properties": { @@ -652,7 +655,7 @@ "differential_abundance_analysis": { "title": "Differential abundance analysis", "type": "object", - "description": "", + "description": "Differential abundance analysis relies on provided metadata", "default": "", "fa_icon": "fas fa-bacteria", "properties": { @@ -705,7 +708,7 @@ "pipeline_report": { "title": "Pipeline summary report", "type": "object", - "description": "", + "description": "Customization of the pipeline report", "default": "", "properties": { "report_template": { From fd106a20a95cae7e096a200e6158aa70971b65cd Mon Sep 17 00:00:00 2001 From: Daniel Straub <42973691+d4straub@users.noreply.github.com> Date: Mon, 15 Jul 2024 15:07:48 +0200 Subject: [PATCH 05/31] Apply suggestions from code review Co-authored-by: Till E. <64961761+tillenglert@users.noreply.github.com> --- workflows/ampliseq.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 5105b545..f0073841 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -272,9 +272,9 @@ workflow AMPLISEQ { .map{ meta, readfw, readrv -> meta.single_end = single_end.toBoolean() def reads = single_end ? readfw : [readfw,readrv] - if ( !meta.single_end && !readrv ) { error("Entry `reverseReads` is missing in $params.input for $meta.sample, either correct the samplesheet or use `--single_end`, `--pacbio`, or `--iontorrent`") } // make sure that reverse reads are present when single_end isnt specified - if ( !meta.single_end && ( readfw.getSimpleName() == meta.sample || readrv.getSimpleName() == meta.sample ) ) { error("Entry `sampleID` cannot be identical to simple name of `forwardReads` or `reverseReads`, please change `sampleID` in $params.input for sample $meta.sample") } // sample name and any file name without extensions arent identical, because rename_raw_data_files.nf would forward 3 files (2 renamed +1 input) instead of 2 in that case - if ( meta.single_end && ( readfw.getSimpleName() == meta.sample+"_1" || readfw.getSimpleName() == meta.sample+"_2" ) ) { error("Entry `sampleID`+ `_1` or `_2` cannot be identical to simple name of `forwardReads`, please change `sampleID` in $params.input for sample $meta.sample") } // sample name and file name without extensions arent identical, because rename_raw_data_files.nf would forward 2 files (1 renamed +1 input) instead of 1 in that case + if ( !meta.single_end && !readrv ) { error("Entry `reverseReads` is missing in $params.input for $meta.sample, either correct the samplesheet or use `--single_end`, `--pacbio`, or `--iontorrent`") } // make sure that reverse reads are present when single_end isn't specified + if ( !meta.single_end && ( readfw.getSimpleName() == meta.sample || readrv.getSimpleName() == meta.sample ) ) { error("Entry `sampleID` cannot be identical to simple name of `forwardReads` or `reverseReads`, please change `sampleID` in $params.input for sample $meta.sample") } // sample name and any file name without extensions aren't identical, because rename_raw_data_files.nf would forward 3 files (2 renamed +1 input) instead of 2 in that case + if ( meta.single_end && ( readfw.getSimpleName() == meta.sample+"_1" || readfw.getSimpleName() == meta.sample+"_2" ) ) { error("Entry `sampleID`+ `_1` or `_2` cannot be identical to simple name of `forwardReads`, please change `sampleID` in $params.input for sample $meta.sample") } // sample name and file name without extensions aren't identical, because rename_raw_data_files.nf would forward 2 files (1 renamed +1 input) instead of 1 in that case return [meta, reads] } } else if ( params.input_fasta ) { ch_input_fasta = Channel.fromPath(params.input_fasta, checkIfExists: true) From 2c4dc67273d1b25f45fbaeaff9e44d1d106db032 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Tue, 23 Jul 2024 14:05:15 +0200 Subject: [PATCH 06/31] Add sbdi-gtdb R09-RS220 --- conf/ref_databases.config | 14 +++++++++++--- nextflow_schema.json | 7 ++++--- .../local/utils_nfcore_ampliseq_pipeline/main.nf | 12 +++++++++++- 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/conf/ref_databases.config b/conf/ref_databases.config index c2db0ed2..b33c903e 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -130,11 +130,19 @@ params { dbversion = "RDP 18/11.5 (https://zenodo.org/record/4310151/)" } 'sbdi-gtdb' { - title = "SBDI-GTDB - Sativa curated 16S GTDB database - Release R08-RS214-1" - file = [ "https://figshare.scilifelab.se/ndownloader/files/45818841", "https://figshare.scilifelab.se/ndownloader/files/45818850" ] + title = "SBDI-GTDB - Sativa curated 16S GTDB database - Release R09-RS220-1" + file = [ "https://figshare.scilifelab.se/ndownloader/files/47244061", "https://figshare.scilifelab.se/ndownloader/files/47244076" ] citation = "Lundin D, Andersson A. SBDI Sativa curated 16S GTDB database. FigShare. doi: 10.17044/scilifelab.14869077.v6" fmtscript = "taxref_reformat_sbdi-gtdb.sh" - dbversion = "SBDI-GTDB-R08-RS214-1 (https://figshare.scilifelab.se/articles/dataset/SBDI_Sativa_curated_16S_GTDB_database/14869077/6)" + dbversion = "SBDI-GTDB-R09-RS220-1 (https://figshare.scilifelab.se/articles/dataset/SBDI_Sativa_curated_16S_GTDB_database/14869077/7)" + taxlevels = "Domain,Kingdom,Phylum,Class,Order,Family,Genus,Species" + } + 'sbdi-gtdb=R09-RS220-1' { + title = "SBDI-GTDB - Sativa curated 16S GTDB database - Release R09-RS220-1" + file = [ "https://figshare.scilifelab.se/ndownloader/files/47244061", "https://figshare.scilifelab.se/ndownloader/files/47244076" ] + citation = "Lundin D, Andersson A. SBDI Sativa curated 16S GTDB database. FigShare. doi: 10.17044/scilifelab.14869077.v6" + fmtscript = "taxref_reformat_sbdi-gtdb.sh" + dbversion = "SBDI-GTDB-R09-RS220-1 (https://figshare.scilifelab.se/articles/dataset/SBDI_Sativa_curated_16S_GTDB_database/14869077/7)" taxlevels = "Domain,Kingdom,Phylum,Class,Order,Family,Genus,Species" } 'sbdi-gtdb=R08-RS214-1' { diff --git a/nextflow_schema.json b/nextflow_schema.json index ceaf9366..2b977c0a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -372,10 +372,11 @@ "rdp", "rdp=18", "sbdi-gtdb", - "sbdi-gtdb=R06-RS202-1", - "sbdi-gtdb=R06-RS202-3", - "sbdi-gtdb=R07-RS207-1", + "sbdi-gtdb=R09-RS220-1", "sbdi-gtdb=R08-RS214-1", + "sbdi-gtdb=R07-RS207-1", + "sbdi-gtdb=R06-RS202-3", + "sbdi-gtdb=R06-RS202-1", "silva", "silva=132", "silva=138", diff --git a/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf index 321463ae..6a31f97f 100644 --- a/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf @@ -237,7 +237,17 @@ def validateInputParameters() { error("Incompatible parameters: `--filter_ssu` cannot be used with `--skip_barrnap` because filtering for SSU's depends on barrnap.") } - String[] sbdi_compatible_databases = ["coidb","coidb=221216","gtdb","gtdb=R09-RS220","gtdb=R08-RS214","gtdb=R07-RS207","gtdb=R06-RS202","gtdb=R05-RS95","midori2-co1","midori2-co1=gb250","pr2","pr2=5.0.0","pr2=4.14.0","pr2=4.13.0","rdp","rdp=18","sbdi-gtdb","sbdi-gtdb=R08-RS214-1","sbdi-gtdb=R07-RS207-1","silva","silva=138","silva=132","unite-fungi","unite-fungi=9.0","unite-fungi=8.3","unite-fungi=8.2","unite-alleuk","unite-alleuk=9.0","unite-alleuk=8.3","unite-alleuk=8.2"] + String[] sbdi_compatible_databases = [ + "coidb","coidb=221216", + "gtdb","gtdb=R09-RS220","gtdb=R08-RS214","gtdb=R07-RS207","gtdb=R06-RS202","gtdb=R05-RS95", + "midori2-co1","midori2-co1=gb250", + "pr2","pr2=5.0.0","pr2=4.14.0","pr2=4.13.0", + "rdp","rdp=18", + "sbdi-gtdb","sbdi-gtdb=R09-RS220-1","sbdi-gtdb=R08-RS214-1","sbdi-gtdb=R07-RS207-1", + "silva","silva=138","silva=132", + "unite-fungi","unite-fungi=9.0","unite-fungi=8.3","unite-fungi=8.2", + "unite-alleuk","unite-alleuk=9.0","unite-alleuk=8.3","unite-alleuk=8.2" + ] if (params.sbdiexport){ if (params.sintax_ref_taxonomy ) { if (!Arrays.stream(sbdi_compatible_databases).anyMatch(entry -> params.sintax_ref_taxonomy.toString().equals(entry)) ) { From 0b9a70bb2adf08ca33a5c61b0925d120eeb5d762 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Tue, 23 Jul 2024 14:08:03 +0200 Subject: [PATCH 07/31] Updated CHANGELOG.md --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5fcf89b5..60879ee4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- [#765](https://github.com/nf-core/ampliseq/pull/765) - Added version R09-RS220 of curated GTDB 16S taxonomy: `sbdi-gtdb=R09-RS220-1` or `sbdi-gtdb` as parameter to `--dada_ref_taxonomy` + ### `Changed` - [#762](https://github.com/nf-core/ampliseq/pull/762) - Improved output documentation section "Optional ASV filtering" and parameter documentation From e5e8f370422fcce7c48f3697d85cf94c1cdc5fac Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Tue, 23 Jul 2024 19:09:03 +0200 Subject: [PATCH 08/31] Work on SINTAX/UNITE dbs --- ...tax.sh => taxref_reformat_sintax_fasta.sh} | 1 - bin/taxref_reformat_sintax_tar.sh | 13 ++++ conf/ref_databases.config | 74 ++++++++++++------- nextflow_schema.json | 16 ++-- 4 files changed, 70 insertions(+), 34 deletions(-) rename bin/{taxref_reformat_sintax.sh => taxref_reformat_sintax_fasta.sh} (99%) create mode 100755 bin/taxref_reformat_sintax_tar.sh diff --git a/bin/taxref_reformat_sintax.sh b/bin/taxref_reformat_sintax_fasta.sh similarity index 99% rename from bin/taxref_reformat_sintax.sh rename to bin/taxref_reformat_sintax_fasta.sh index b076d0af..30e63fca 100755 --- a/bin/taxref_reformat_sintax.sh +++ b/bin/taxref_reformat_sintax_fasta.sh @@ -5,4 +5,3 @@ # Just rename the preformatted file # Assumes only one (gzipped) file mv * sintaxdb.fa.gz - diff --git a/bin/taxref_reformat_sintax_tar.sh b/bin/taxref_reformat_sintax_tar.sh new file mode 100755 index 00000000..2d999cf6 --- /dev/null +++ b/bin/taxref_reformat_sintax_tar.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +# Handles preformatted database files suitable for sintax + +# Just rename the preformatted file +# Assumes only one (gzipped) file + +# Extract the fasta file without _dev in its name +tar xzf *.tgz $(tar tfz *.tgz | grep -v '_dev') + +# Change the name and gzip +mv *.fasta sintaxdb.fa +gzip sintaxdb.fa diff --git a/conf/ref_databases.config b/conf/ref_databases.config index c2db0ed2..2a95bc75 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -130,11 +130,19 @@ params { dbversion = "RDP 18/11.5 (https://zenodo.org/record/4310151/)" } 'sbdi-gtdb' { - title = "SBDI-GTDB - Sativa curated 16S GTDB database - Release R08-RS214-1" - file = [ "https://figshare.scilifelab.se/ndownloader/files/45818841", "https://figshare.scilifelab.se/ndownloader/files/45818850" ] + title = "SBDI-GTDB - Sativa curated 16S GTDB database - Release R09-RS220-1" + file = [ "https://figshare.scilifelab.se/ndownloader/files/47244061", "https://figshare.scilifelab.se/ndownloader/files/47244076" ] citation = "Lundin D, Andersson A. SBDI Sativa curated 16S GTDB database. FigShare. doi: 10.17044/scilifelab.14869077.v6" fmtscript = "taxref_reformat_sbdi-gtdb.sh" - dbversion = "SBDI-GTDB-R08-RS214-1 (https://figshare.scilifelab.se/articles/dataset/SBDI_Sativa_curated_16S_GTDB_database/14869077/6)" + dbversion = "SBDI-GTDB-R09-RS220-1 (https://figshare.scilifelab.se/articles/dataset/SBDI_Sativa_curated_16S_GTDB_database/14869077/7)" + taxlevels = "Domain,Kingdom,Phylum,Class,Order,Family,Genus,Species" + } + 'sbdi-gtdb=R09-RS220-1' { + title = "SBDI-GTDB - Sativa curated 16S GTDB database - Release R09-RS220-1" + file = [ "https://figshare.scilifelab.se/ndownloader/files/47244061", "https://figshare.scilifelab.se/ndownloader/files/47244076" ] + citation = "Lundin D, Andersson A. SBDI Sativa curated 16S GTDB database. FigShare. doi: 10.17044/scilifelab.14869077.v6" + fmtscript = "taxref_reformat_sbdi-gtdb.sh" + dbversion = "SBDI-GTDB-R09-RS220-1 (https://figshare.scilifelab.se/articles/dataset/SBDI_Sativa_curated_16S_GTDB_database/14869077/7)" taxlevels = "Domain,Kingdom,Phylum,Class,Order,Family,Genus,Species" } 'sbdi-gtdb=R08-RS214-1' { @@ -192,7 +200,7 @@ params { } 'unite-fungi' { title = "UNITE general FASTA release for Fungi - Version 9.0" - file = [ "https://files.plutof.ut.ee/public/orig/1E/25/1E25CA4CC30A31C2E2B8CB2C89824C83D080A7F5A62E6263A0E95B37C6628067.tgz" ] + file = [ "https://s3.hpc.ut.ee/plutof-public/original/fa1038da-d18d-46b7-88a9-c21bcf38c43d.tgz" ] citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2022): UNITE general FASTA release for Fungi. Version 16.10.2022. UNITE Community. https://doi.org/10.15156/BIO/2483911" fmtscript = "taxref_reformat_unite.sh" dbversion = "UNITE-fungi v9.0 (https://doi.org/10.15156/BIO/2483911)" @@ -200,7 +208,7 @@ params { } 'unite-fungi=9.0' { title = "UNITE general FASTA release for Fungi - Version 9.0" - file = [ "https://files.plutof.ut.ee/public/orig/1E/25/1E25CA4CC30A31C2E2B8CB2C89824C83D080A7F5A62E6263A0E95B37C6628067.tgz" ] + file = [ "https://s3.hpc.ut.ee/plutof-public/original/fa1038da-d18d-46b7-88a9-c21bcf38c43d.tgz" ] citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2022): UNITE general FASTA release for Fungi. Version 16.10.2022. UNITE Community. https://doi.org/10.15156/BIO/2483911" fmtscript = "taxref_reformat_unite.sh" dbversion = "UNITE-fungi v9.0 (https://doi.org/10.15156/BIO/2483911)" @@ -356,7 +364,7 @@ params { title = "COIDB - CO1 Taxonomy Database - Release 221216" file = [ "https://figshare.scilifelab.se/ndownloader/files/38787078" ] citation = "Sundh J, Manoharan L, Iwaszkiewicz-Eggebrecht E, Miraldo A, Andersson A, Ronquist F. COI reference sequences from BOLD DB. doi: https://doi.org/10.17044/scilifelab.20514192.v2" - fmtscript = "taxref_reformat_sintax.sh" + fmtscript = "taxref_reformat_sintax_fasta.sh" dbversion = "COIDB 221216 (https://doi.org/10.17044/scilifelab.20514192.v2)" taxlevels = "Kingdom,Phylum,Class,Order,Family,Genus,Species,BOLD_bin" } @@ -364,64 +372,78 @@ params { title = "COIDB - CO1 Taxonomy Database - Release 221216" file = [ "https://figshare.scilifelab.se/ndownloader/files/38787078" ] citation = "Sundh J, Manoharan L, Iwaszkiewicz-Eggebrecht E, Miraldo A, Andersson A, Ronquist F. COI reference sequences from BOLD DB. doi: https://doi.org/10.17044/scilifelab.20514192.v2" - fmtscript = "taxref_reformat_sintax.sh" + fmtscript = "taxref_reformat_sintax_fasta.sh" dbversion = "COIDB 221216 (https://doi.org/10.17044/scilifelab.20514192.v2)" taxlevels = "Kingdom,Phylum,Class,Order,Family,Genus,Species,BOLD_bin" } 'unite-fungi' { - title = "UNITE USEARCH/UTAX release for Fungi - Version 9.0" - file = [ "https://files.plutof.ut.ee/public/orig/19/1B/191B0D889A6B7B05DF4C103B118ABB3E0CF8EDBEA5B3E3FAB3EAFE3B72D7F3C8.gz" ] - citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2022): UNITE USEARCH/UTAX release for Fungi. Version 16.10.2022. UNITE Community. https://doi.org/10.15156/BIO/2483923" - fmtscript = "taxref_reformat_sintax.sh" - dbversion = "UNITE-fungi v9.0 (https://doi.org/10.15156/BIO/2483923)" + title = "UNITE USEARCH/UTAX release for Fungi - Version 10.0" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/d18aa648-3f4c-4f46-84d4-c8c5d48439ba.tgz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2024): UNITE USEARCH/UTAX release for Fungi. UNITE Community. 10.15156/BIO/2959340" + fmtscript = "taxref_reformat_sintax_tar.sh" + dbversion = "UNITE-fungi v10.0 (https://doi.plutof.ut.ee/doi/10.15156/BIO/2959340)" + } + 'unite-fungi=10.0' { + title = "UNITE USEARCH/UTAX release for Fungi - Version 10.0" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/d18aa648-3f4c-4f46-84d4-c8c5d48439ba.tgz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2024): UNITE USEARCH/UTAX release for Fungi. UNITE Community. 10.15156/BIO/2959340" + fmtscript = "taxref_reformat_sintax_tar.sh" + dbversion = "UNITE-fungi v10.0 (https://doi.plutof.ut.ee/doi/10.15156/BIO/2959340)" } 'unite-fungi=9.0' { title = "UNITE USEARCH/UTAX release for Fungi - Version 9.0" - file = [ "https://files.plutof.ut.ee/public/orig/19/1B/191B0D889A6B7B05DF4C103B118ABB3E0CF8EDBEA5B3E3FAB3EAFE3B72D7F3C8.gz" ] + file = [ "https://s3.hpc.ut.ee/plutof-public/original/fa1038da-d18d-46b7-88a9-c21bcf38c43d.tgz" ] citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2022): UNITE USEARCH/UTAX release for Fungi. Version 16.10.2022. UNITE Community. https://doi.org/10.15156/BIO/2483923" - fmtscript = "taxref_reformat_sintax.sh" + fmtscript = "taxref_reformat_sintax_tar.sh" dbversion = "UNITE-fungi v9.0 (https://doi.org/10.15156/BIO/2483923)" } 'unite-fungi=8.3' { title = "UNITE USEARCH/UTAX release for Fungi - Version 8.3" - file = [ "https://files.plutof.ut.ee/public/orig/82/CB/82CB44BBAAA7D3AEAC297B5689BDA2963E8D0666E01FE0B54096147AFAF85263.gz" ] + file = [ "https://s3.hpc.ut.ee/plutof-public/original/5f5085d1-ff89-4e9d-9c85-12165dcbf880.gz" ] citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2021): UNITE USEARCH/UTAX release for Fungi. UNITE Community. 10.15156/BIO/1280276" - fmtscript = "taxref_reformat_sintax.sh" + fmtscript = "taxref_reformat_sintax_fasta.sh" dbversion = "UNITE-fungi v8.3 (https://dx.doi.org/10.15156/BIO/1280276)" } 'unite-fungi=8.2' { title = "UNITE USEARCH/UTAX release for Fungi - Version 8.2" - file = [ "https://files.plutof.ut.ee/public/orig/E8/83/E883EB19E3EA7B64C1F652521301239831FAFE0BFF015C9E2B4786DC0976C0FC.gz" ] + file = [ "https://s3.hpc.ut.ee/plutof-public/original/a6c4d680-c99a-4252-ba67-748271c7d7f4.gz" ] citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2020): UNITE USEARCH/UTAX release for Fungi. UNITE Community. 10.15156/BIO/786375" - fmtscript = "taxref_reformat_sintax.sh" + fmtscript = "taxref_reformat_sintax_fasta.sh" dbversion = "UNITE-fungi v8.2 (https://doi.org/10.15156/BIO/786375)" } 'unite-alleuk' { - title = "UNITE USEARCH/UTAX release for eukaryotes - Version 9.0" - file = [ "https://files.plutof.ut.ee/public/orig/AB/8C/AB8C119FC82CF5AFAFCB93CA4FFFF2B42A03CF1275DE23F60B887392E8FDEA21.gz"] - citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2022): UNITE USEARCH/UTAX release for eukaryotes. Version 16.10.2022. UNITE Community. https://doi.org/10.15156/BIO/2483924" - fmtscript = "taxref_reformat_sintax.sh" - dbversion = "UNITE-alleuk v9.0 (https://doi.org/10.15156/BIO/2483924)" + title = "UNITE USEARCH/UTAX release for eukaryotes - Version 10.0" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/6f19ddb6-1ac0-4834-a74c-b639688878a4.gz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2024): UNITE USEARCH/UTAX release for eukaryotes. Version 04.04.2024. UNITE Community. https://doi.org/10.15156/BIO/2959341" + fmtscript = "taxref_reformat_sintax_fasta.sh" + dbversion = "UNITE-alleuk v10.0 (https://doi.org/10.15156/BIO/2959341)" + } + 'unite-alleuk=10.0' { + title = "UNITE USEARCH/UTAX release for eukaryotes - Version 10.0" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/6f19ddb6-1ac0-4834-a74c-b639688878a4.gz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2024): UNITE USEARCH/UTAX release for eukaryotes. Version 04.04.2024. UNITE Community. https://doi.org/10.15156/BIO/2959341" + fmtscript = "taxref_reformat_sintax_fasta.sh" + dbversion = "UNITE-alleuk v10.0 (https://doi.org/10.15156/BIO/2959341)" } 'unite-alleuk=9.0' { title = "UNITE USEARCH/UTAX release for eukaryotes - Version 9.0" file = [ "https://files.plutof.ut.ee/public/orig/AB/8C/AB8C119FC82CF5AFAFCB93CA4FFFF2B42A03CF1275DE23F60B887392E8FDEA21.gz"] citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2022): UNITE USEARCH/UTAX release for eukaryotes. Version 16.10.2022. UNITE Community. https://doi.org/10.15156/BIO/2483924" - fmtscript = "taxref_reformat_sintax.sh" + fmtscript = "taxref_reformat_sintax_tar.sh" dbversion = "UNITE-alleuk v9.0 (https://doi.org/10.15156/BIO/2483924)" } 'unite-alleuk=8.3' { title = "UNITE USEARCH/UTAX release for eukaryotes - Version 8.3" file = [ "https://files.plutof.ut.ee/public/orig/B9/35/B9351C91550A52713CB66DB7A1CEF35765310EBB23B6667AC93E714E9A9D020B.gz"] citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2021): UNITE USEARCH/UTAX release for eukaryotes. UNITE Community. 10.15156/BIO/1280317" - fmtscript = "taxref_reformat_sintax.sh" + fmtscript = "taxref_reformat_sintax_fasta.sh" dbversion = "UNITE-alleuk v8.3 (https://doi.org/10.15156/BIO/1280127)" } 'unite-alleuk=8.2' { title = "UNITE USEARCH/UTAX release for eukaryotes - Version 8.2" file = [ "https://files.plutof.ut.ee/public/orig/7B/B5/7BB51166C988E448392B213A72D4604E2ABB494E20E19E4392F7819FEBFCD036.gz"] citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2020): UNITE USEARCH/UTAX release for eukaryotes. UNITE Community. 10.15156/BIO/786376" - fmtscript = "taxref_reformat_sintax.sh" + fmtscript = "taxref_reformat_sintax_fasta.sh" dbversion = "UNITE-alleuk v8.2 (https://dx.doi.org/10.15156/BIO/786376)" } } diff --git a/nextflow_schema.json b/nextflow_schema.json index ceaf9366..c3917d98 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -380,13 +380,13 @@ "silva=132", "silva=138", "unite-alleuk", - "unite-alleuk=8.2", - "unite-alleuk=8.3", "unite-alleuk=9.0", + "unite-alleuk=8.3", + "unite-alleuk=8.2", "unite-fungi", - "unite-fungi=8.2", - "unite-fungi=8.3", "unite-fungi=9.0", + "unite-fungi=8.3", + "unite-fungi=8.2", "zehr-nifh", "zehr-nifh=2.5.0" ] @@ -519,14 +519,16 @@ "enum": [ "coidb", "coidb=221216", + "unite-fungi", + "unite-fungi=10.0", "unite-fungi=9.0", "unite-fungi=8.3", "unite-fungi=8.2", - "unite-fungi", + "unite-alleuk", + "unite-alleuk=10.0", "unite-alleuk=9.0", "unite-alleuk=8.3", - "unite-alleuk=8.2", - "unite-alleuk" + "unite-alleuk=8.2" ] }, "addsh": { From a6419cea11a2d91326d2ddd92ab25f247c008f78 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Tue, 23 Jul 2024 19:59:01 +0200 Subject: [PATCH 09/31] SINTAX/UNITE fixed urls --- bin/taxref_reformat_sintax_tar.sh | 5 +++-- conf/ref_databases.config | 10 +++++----- .../local/utils_nfcore_ampliseq_pipeline/main.nf | 4 ++-- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/bin/taxref_reformat_sintax_tar.sh b/bin/taxref_reformat_sintax_tar.sh index 2d999cf6..8539084d 100755 --- a/bin/taxref_reformat_sintax_tar.sh +++ b/bin/taxref_reformat_sintax_tar.sh @@ -6,8 +6,9 @@ # Assumes only one (gzipped) file # Extract the fasta file without _dev in its name -tar xzf *.tgz $(tar tfz *.tgz | grep -v '_dev') +f=$(tar tfz *.tgz | grep fasta | grep -v '_dev') +tar xzf *.tgz $f # Change the name and gzip -mv *.fasta sintaxdb.fa +mv $f sintaxdb.fa gzip sintaxdb.fa diff --git a/conf/ref_databases.config b/conf/ref_databases.config index 2a95bc75..97ba3de0 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -427,21 +427,21 @@ params { } 'unite-alleuk=9.0' { title = "UNITE USEARCH/UTAX release for eukaryotes - Version 9.0" - file = [ "https://files.plutof.ut.ee/public/orig/AB/8C/AB8C119FC82CF5AFAFCB93CA4FFFF2B42A03CF1275DE23F60B887392E8FDEA21.gz"] + file = [ "https://s3.hpc.ut.ee/plutof-public/original/00853437-b8b3-4d94-bcd8-7b942fcd8aa2.gz" ] citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2022): UNITE USEARCH/UTAX release for eukaryotes. Version 16.10.2022. UNITE Community. https://doi.org/10.15156/BIO/2483924" - fmtscript = "taxref_reformat_sintax_tar.sh" + fmtscript = "taxref_reformat_sintax_fasta.sh" dbversion = "UNITE-alleuk v9.0 (https://doi.org/10.15156/BIO/2483924)" } 'unite-alleuk=8.3' { title = "UNITE USEARCH/UTAX release for eukaryotes - Version 8.3" - file = [ "https://files.plutof.ut.ee/public/orig/B9/35/B9351C91550A52713CB66DB7A1CEF35765310EBB23B6667AC93E714E9A9D020B.gz"] + file = [ "https://s3.hpc.ut.ee/plutof-public/original/6e72e839-acfc-4044-84b4-5152b74c1552.tgz" ] citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2021): UNITE USEARCH/UTAX release for eukaryotes. UNITE Community. 10.15156/BIO/1280317" - fmtscript = "taxref_reformat_sintax_fasta.sh" + fmtscript = "taxref_reformat_sintax_tar.sh" dbversion = "UNITE-alleuk v8.3 (https://doi.org/10.15156/BIO/1280127)" } 'unite-alleuk=8.2' { title = "UNITE USEARCH/UTAX release for eukaryotes - Version 8.2" - file = [ "https://files.plutof.ut.ee/public/orig/7B/B5/7BB51166C988E448392B213A72D4604E2ABB494E20E19E4392F7819FEBFCD036.gz"] + file = [ "https://s3.hpc.ut.ee/plutof-public/original/fd6411cf-2c68-42e5-ae94-13ffb933a5c5.gz" ] citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2020): UNITE USEARCH/UTAX release for eukaryotes. UNITE Community. 10.15156/BIO/786376" fmtscript = "taxref_reformat_sintax_fasta.sh" dbversion = "UNITE-alleuk v8.2 (https://dx.doi.org/10.15156/BIO/786376)" diff --git a/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf index 6a31f97f..a1c190fc 100644 --- a/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf @@ -245,8 +245,8 @@ def validateInputParameters() { "rdp","rdp=18", "sbdi-gtdb","sbdi-gtdb=R09-RS220-1","sbdi-gtdb=R08-RS214-1","sbdi-gtdb=R07-RS207-1", "silva","silva=138","silva=132", - "unite-fungi","unite-fungi=9.0","unite-fungi=8.3","unite-fungi=8.2", - "unite-alleuk","unite-alleuk=9.0","unite-alleuk=8.3","unite-alleuk=8.2" + "unite-fungi","unite-fungi=10.0","unite-fungi=9.0","unite-fungi=8.3","unite-fungi=8.2", + "unite-alleuk","unite-alleuk=10.0","unite-alleuk=9.0","unite-alleuk=8.3","unite-alleuk=8.2" ] if (params.sbdiexport){ if (params.sintax_ref_taxonomy ) { From be87d1b47625b0954a6e6ac47b1d0e161a25164d Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Tue, 23 Jul 2024 21:03:39 +0200 Subject: [PATCH 10/31] Updated DADA UNITE urls --- conf/ref_databases.config | 38 +++++++++++------------ conf/test_its_dada_taxonomy.config | 50 ++++++++++++++++++++++++++++++ nextflow.config | 31 +++++++++--------- 3 files changed, 85 insertions(+), 34 deletions(-) create mode 100644 conf/test_its_dada_taxonomy.config diff --git a/conf/ref_databases.config b/conf/ref_databases.config index 97ba3de0..688d571c 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -201,63 +201,63 @@ params { 'unite-fungi' { title = "UNITE general FASTA release for Fungi - Version 9.0" file = [ "https://s3.hpc.ut.ee/plutof-public/original/fa1038da-d18d-46b7-88a9-c21bcf38c43d.tgz" ] - citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2022): UNITE general FASTA release for Fungi. Version 16.10.2022. UNITE Community. https://doi.org/10.15156/BIO/2483911" + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2023): UNITE general FASTA release for Fungi. Version 18.07.2023. UNITE Community. https://doi.org/10.15156/BIO/2938067" fmtscript = "taxref_reformat_unite.sh" - dbversion = "UNITE-fungi v9.0 (https://doi.org/10.15156/BIO/2483911)" + dbversion = "UNITE-fungi v9.0 (https://doi.org/10.15156/BIO/2938067)" shfile = [ "https://figshare.scilifelab.se/ndownloader/files/40788767", "https://figshare.scilifelab.se/ndownloader/files/40788770"] } 'unite-fungi=9.0' { title = "UNITE general FASTA release for Fungi - Version 9.0" file = [ "https://s3.hpc.ut.ee/plutof-public/original/fa1038da-d18d-46b7-88a9-c21bcf38c43d.tgz" ] - citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2022): UNITE general FASTA release for Fungi. Version 16.10.2022. UNITE Community. https://doi.org/10.15156/BIO/2483911" + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2023): UNITE general FASTA release for Fungi. Version 18.07.2023. UNITE Community. https://doi.org/10.15156/BIO/2938067" fmtscript = "taxref_reformat_unite.sh" - dbversion = "UNITE-fungi v9.0 (https://doi.org/10.15156/BIO/2483911)" + dbversion = "UNITE-fungi v9.0 (https://doi.org/10.15156/BIO/2938067)" shfile = [ "https://figshare.scilifelab.se/ndownloader/files/40788767", "https://figshare.scilifelab.se/ndownloader/files/40788770"] } 'unite-fungi=8.3' { title = "UNITE general FASTA release for Fungi - Version 8.3" - file = [ "https://files.plutof.ut.ee/public/orig/7B/23/7B235835FAF5C85D7B01E40FEF17F687914CB81A182554C5BD95E3168328E604.tgz" ] - citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2021): UNITE general FASTA release for Fungi. UNITE Community. 10.15156/BIO/1280049" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/a0d487ac-1339-4bea-a0e9-8627f87a2f88.tgz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2021): UNITE general FASTA release for Fungi. Version 10.05.2021. UNITE Community. https://doi.org/10.15156/BIO/1280049" fmtscript = "taxref_reformat_unite.sh" dbversion = "UNITE-fungi v8.3 (https://doi.org/10.15156/BIO/1280049)" shfile = [ "https://scilifelab.figshare.com/ndownloader/files/34497977", "https://scilifelab.figshare.com/ndownloader/files/34497980"] } 'unite-fungi=8.2' { title = "UNITE general FASTA release for Fungi - Version 8.2" - file = [ "https://files.plutof.ut.ee/public/orig/E7/28/E728E2CAB797C90A01CD271118F574B8B7D0DAEAB7E81193EB89A2AC769A0896.gz" ] - citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2020): UNITE general FASTA release for Fungi. UNITE Community. 10.15156/BIO/786368" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/6fdb458a-9299-41dc-8774-5152b867d882.gz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2020): UNITE general FASTA release for Fungi. Version 04.02.2020. UNITE Community. https://doi.org/10.15156/BIO/786368" fmtscript = "taxref_reformat_unite.sh" - dbversion = "UNITE-fungi v8.2 (https://doi.org/10.15156/BIO/786368)" + dbversion = "UNITE-fungi v8.2 (https://doi.plutof.ut.ee/doi/10.15156/BIO/786368)" shfile = [ "https://scilifelab.figshare.com/ndownloader/files/34497971", "https://scilifelab.figshare.com/ndownloader/files/34497974"] } 'unite-alleuk' { title = "UNITE general FASTA release for eukaryotes - Version 9.0" - file = [ "https://files.plutof.ut.ee/public/orig/F2/62/F262D942DEB8CAB3AEB9F313F67B04050E364B72E6707F99755DDCB271C45A48.tgz" ] - citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2022): UNITE general FASTA release for eukaryotes. Version 16.10.2022. UNITE Community. https://doi.org/10.15156/BIO/2483913" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/e318f5fd-1ef4-40fd-9e77-1b94d91b3858.tgz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2023): UNITE general FASTA release for eukaryotes. Version 18.07.2023. UNITE Community. https://doi.org/10.15156/BIO/2938069" fmtscript = "taxref_reformat_unite.sh" - dbversion = "UNITE-alleuk v9.0 (https://doi.org/10.15156/BIO/2483913)" + dbversion = "UNITE-alleuk v9.0 (https://doi.org/10.15156/BIO/2938069)" shfile = [ "https://figshare.scilifelab.se/ndownloader/files/40788773", "https://figshare.scilifelab.se/ndownloader/files/40788776"] } 'unite-alleuk=9.0' { title = "UNITE general FASTA release for eukaryotes - Version 9.0" - file = [ "https://files.plutof.ut.ee/public/orig/F2/62/F262D942DEB8CAB3AEB9F313F67B04050E364B72E6707F99755DDCB271C45A48.tgz" ] - citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2022): UNITE general FASTA release for eukaryotes. Version 16.10.2022. UNITE Community. https://doi.org/10.15156/BIO/2483913" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/e318f5fd-1ef4-40fd-9e77-1b94d91b3858.tgz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2023): UNITE general FASTA release for eukaryotes. Version 18.07.2023. UNITE Community. https://doi.org/10.15156/BIO/2938069" fmtscript = "taxref_reformat_unite.sh" - dbversion = "UNITE-alleuk v9.0 (https://doi.org/10.15156/BIO/2483913)" + dbversion = "UNITE-alleuk v9.0 (https://doi.org/10.15156/BIO/2938069)" shfile = [ "https://figshare.scilifelab.se/ndownloader/files/40788773", "https://figshare.scilifelab.se/ndownloader/files/40788776"] } 'unite-alleuk=8.3' { title = "UNITE general FASTA release for eukaryotes - Version 8.3" - file = [ "https://files.plutof.ut.ee/public/orig/E5/F5/E5F5E426DEC78BA2F7EC530621DDBD3F10564A09CBC2A5C4D3B3CBE7E37C5E1A.tgz" ] - citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2021): UNITE general FASTA release for eukaryotes. UNITE Community. 10.15156/BIO/1280127" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/6e72e839-acfc-4044-84b4-5152b74c1552.tgz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2021): UNITE general FASTA release for eukaryotes. Version 10.05.2021. UNITE Community. https://doi.org/10.15156/BIO/1280127" fmtscript = "taxref_reformat_unite.sh" dbversion = "UNITE-alleuk v8.3 (https://doi.org/10.15156/BIO/1280127)" shfile = [ "https://scilifelab.figshare.com/ndownloader/files/34994575", "https://scilifelab.figshare.com/ndownloader/files/34994578"] } 'unite-alleuk=8.2' { title = "UNITE general FASTA release for eukaryotes - Version 8.2" - file = [ "https://files.plutof.ut.ee/public/orig/F9/ED/F9EDE36E5209F469056675EBD672425BC06EACB7FE0C0D18F5A13E4CA632DCFA.gz" ] - citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2020): UNITE general FASTA release for eukaryotes. UNITE Community. 10.15156/BIO/786370" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/dbd892b6-0446-4351-988e-922c4b73585e.gz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2020): UNITE general FASTA release for eukaryotes. Version 04.02.2020. UNITE Community. https://doi.org/10.15156/BIO/786370" fmtscript = "taxref_reformat_unite.sh" dbversion = "UNITE-alleuk v8.2 (https://doi.org/10.15156/BIO/786370)" shfile = [ "https://scilifelab.figshare.com/ndownloader/files/34994569", "https://scilifelab.figshare.com/ndownloader/files/34994572"] diff --git a/conf/test_its_dada_taxonomy.config b/conf/test_its_dada_taxonomy.config new file mode 100644 index 00000000..60ef7a99 --- /dev/null +++ b/conf/test_its_dada_taxonomy.config @@ -0,0 +1,50 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/ampliseq -profile test_sintax, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test sintax profile' + config_profile_description = 'Minimal test dataset to check pipeline function when using sintax for taxonomy assignment' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '12.GB' + max_time = '6.h' + + // Input data + FW_primer = "CTTGGTCATTTAGAGGAAGTAA" + RV_primer = "TCCTGAGGGAAACTTCG" + input = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Samplesheet_pacbio_ITS.tsv" + metadata = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Metadata_pacbio_ITS.tsv" + pacbio = true + max_ee = 12 + cut_its = "its2" + + skip_dada_taxonomy = false + dada_ref_taxonomy = "unite-fungi" + + //this is to remove low abundance ASVs to reduce runtime of downstream processes + min_samples = 2 + min_frequency = 10 + + //produce average barplots + metadata_category_barplot = "var2,var3" + + //restrict ANCOM analysis to higher taxonomic levels + tax_agglom_max = 4 + ancom = true + + sbdiexport = true + + qiime_adonis_formula = "var2" + + diversity_rarefaction_depth = 500 +} diff --git a/nextflow.config b/nextflow.config index 7e0a4025..32729b03 100644 --- a/nextflow.config +++ b/nextflow.config @@ -284,21 +284,22 @@ profiles { executor.cpus = 4 executor.memory = 8.GB } - test { includeConfig 'conf/test.config' } - test_single { includeConfig 'conf/test_single.config' } - test_multi { includeConfig 'conf/test_multi.config' } - test_doubleprimers { includeConfig 'conf/test_doubleprimers.config' } - test_pacbio_its { includeConfig 'conf/test_pacbio_its.config' } - test_iontorrent { includeConfig 'conf/test_iontorrent.config' } - test_fasta { includeConfig 'conf/test_fasta.config' } - test_failed { includeConfig 'conf/test_failed.config' } - test_full { includeConfig 'conf/test_full.config' } - test_reftaxcustom { includeConfig 'conf/test_reftaxcustom.config' } - test_qiimecustom { includeConfig 'conf/test_qiimecustom.config' } - test_novaseq { includeConfig 'conf/test_novaseq.config' } - test_pplace { includeConfig 'conf/test_pplace.config' } - test_sintax { includeConfig 'conf/test_sintax.config' } - test_multiregion { includeConfig 'conf/test_multiregion.config' } + test { includeConfig 'conf/test.config' } + test_single { includeConfig 'conf/test_single.config' } + test_multi { includeConfig 'conf/test_multi.config' } + test_doubleprimers { includeConfig 'conf/test_doubleprimers.config' } + test_pacbio_its { includeConfig 'conf/test_pacbio_its.config' } + test_iontorrent { includeConfig 'conf/test_iontorrent.config' } + test_fasta { includeConfig 'conf/test_fasta.config' } + test_failed { includeConfig 'conf/test_failed.config' } + test_full { includeConfig 'conf/test_full.config' } + test_reftaxcustom { includeConfig 'conf/test_reftaxcustom.config' } + test_qiimecustom { includeConfig 'conf/test_qiimecustom.config' } + test_novaseq { includeConfig 'conf/test_novaseq.config' } + test_pplace { includeConfig 'conf/test_pplace.config' } + test_sintax { includeConfig 'conf/test_sintax.config' } + test_its_dada_taxonomy { includeConfig 'conf/test_its_dada_taxonomy.config' } + test_multiregion { includeConfig 'conf/test_multiregion.config' } } // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile From 68d9b2980acfe685866a992a274ac262188d630f Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Wed, 24 Jul 2024 08:56:20 +0200 Subject: [PATCH 11/31] New QIIME/UNITE urls --- conf/ref_databases.config | 42 ++++++++++++++++++++++++++++----------- nextflow_schema.json | 7 +++++-- 2 files changed, 35 insertions(+), 14 deletions(-) diff --git a/conf/ref_databases.config b/conf/ref_databases.config index 688d571c..e9037960 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -298,44 +298,62 @@ params { } //UNITE for QIIME2, see https://unite.ut.ee/repository.php 'unite-fungi' { - title = "UNITE QIIME release for Fungi - Version 8.3" - file = [ "https://files.plutof.ut.ee/public/orig/C5/54/C5547B97AAA979E45F79DC4C8C4B12113389343D7588716B5AD330F8BDB300C9.tgz" ] - citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2021): UNITE QIIME release for Fungi. Version 10.05.2021. UNITE Community. https://doi.org/10.15156/BIO/1264708" + title = "UNITE QIIME release for Fungi - Version 10.0" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/db1d6ddb-a35d-48c5-8b1a-ad9dd3310c6d.tgz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2024): UNITE QIIME release for Fungi. Version 04.04.2024. UNITE Community. https://doi.org/10.15156/BIO/2959336" + fmtscript = "taxref_reformat_qiime_unite.sh" + } + 'unite-fungi=10.0' { + title = "UNITE QIIME release for Fungi - Version 10.0" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/db1d6ddb-a35d-48c5-8b1a-ad9dd3310c6d.tgz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2024): UNITE QIIME release for Fungi. Version 04.04.2024. UNITE Community. https://doi.org/10.15156/BIO/2959336" + fmtscript = "taxref_reformat_qiime_unite.sh" + } + 'unite-fungi=9.0' { + title = "UNITE QIIME release for Fungi - Version 9.0" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/3c22fbc5-ed9e-47a6-a85b-6c81268657e9.tgz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2023): UNITE QIIME release for Fungi. Version 18.07.2023. UNITE Community. https://doi.org/10.15156/BIO/2938079" fmtscript = "taxref_reformat_qiime_unite.sh" } 'unite-fungi=8.3' { title = "UNITE QIIME release for Fungi - Version 8.3" - file = [ "https://files.plutof.ut.ee/public/orig/C5/54/C5547B97AAA979E45F79DC4C8C4B12113389343D7588716B5AD330F8BDB300C9.tgz" ] + file = [ "https://s3.hpc.ut.ee/plutof-public/original/3652451d-7567-4871-a7aa-3ba6c63aa60b.tgz" ] citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2021): UNITE QIIME release for Fungi. Version 10.05.2021. UNITE Community. https://doi.org/10.15156/BIO/1264708" fmtscript = "taxref_reformat_qiime_unite.sh" } 'unite-fungi=8.2' { title = "UNITE QIIME release for Fungi - Version 8.2" - file = [ "https://files.plutof.ut.ee/public/orig/98/AE/98AE96C6593FC9C52D1C46B96C2D9064291F4DBA625EF189FEC1CCAFCF4A1691.gz" ] + file = [ "https://s3.hpc.ut.ee/plutof-public/original/6b4e0baf-fc68-4e0e-8e4a-f5cf2d68bf98.gz" ] citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2020): UNITE QIIME release for Fungi. Version 04.02.2020. UNITE Community. https://doi.org/10.15156/BIO/786385" fmtscript = "taxref_reformat_qiime_unite.sh" } 'unite-alleuk' { - title = "UNITE QIIME release for eukaryotes - Version 9.0" - file = [ "https://files.plutof.ut.ee/public/orig/8F/FC/8FFCC8A730E50FEEF8CFFEEFEF02A22FBCF7E02B7FD31C6649754834D2CB0E6F.tgz" ] - citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2022): UNITE QIIME release for eukaryotes. Version 16.10.2022. UNITE Community. https://doi.org/10.15156/BIO/2483917" + title = "UNITE QIIME release for eukaryotes - Version 10.0" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/711a503c-589d-4138-a76f-6a8b89460ecf.tgz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2024): UNITE QIIME release for eukaryotes. Version 04.04.2024. UNITE Community. https://doi.org/10.15156/BIO/2959338" + fmtscript = "taxref_reformat_qiime_unite.sh" + } + 'unite-alleuk=10.0' { + title = "UNITE QIIME release for eukaryotes - Version 10.0" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/711a503c-589d-4138-a76f-6a8b89460ecf.tgz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2024): UNITE QIIME release for eukaryotes. Version 04.04.2024. UNITE Community. https://doi.org/10.15156/BIO/2959338" fmtscript = "taxref_reformat_qiime_unite.sh" } 'unite-alleuk=9.0' { title = "UNITE QIIME release for eukaryotes - Version 9.0" - file = [ "https://files.plutof.ut.ee/public/orig/8F/FC/8FFCC8A730E50FEEF8CFFEEFEF02A22FBCF7E02B7FD31C6649754834D2CB0E6F.tgz" ] - citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2022): UNITE QIIME release for eukaryotes. Version 16.10.2022. UNITE Community. https://doi.org/10.15156/BIO/2483917" + file = [ "https://s3.hpc.ut.ee/plutof-public/original/ecc2ed00-1219-42c7-a951-fbe92b8cb733.tgz" ] + citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2023): UNITE QIIME release for eukaryotes. Version 18.07.2023. UNITE Community. https://doi.org/10.15156/BIO/2938081" fmtscript = "taxref_reformat_qiime_unite.sh" } 'unite-alleuk=8.3' { title = "UNITE QIIME release for eukaryotes - Version 8.3" - file = [ "https://files.plutof.ut.ee/public/orig/48/29/4829D91F763E20F0F4376A60AA53FC9FBE6029A7D1BDC1B45347DD64EDE5D560.tgz" ] + file = [ "https://s3.hpc.ut.ee/plutof-public/original/93683063-613d-4190-877e-26a57a196fc7.tgz" ] citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2021): UNITE QIIME release for eukaryotes. Version 10.05.2021. UNITE Community. https://doi.org/10.15156/BIO/1264819" fmtscript = "taxref_reformat_qiime_unite.sh" } 'unite-alleuk=8.2' { title = "UNITE QIIME release for eukaryotes - Version 8.2" - file = [ "https://files.plutof.ut.ee/public/orig/6E/0E/6E0EDD5592003B47C70A1B384C3C784AA32B726AC861CD7E2BD22AEB0278675E.gz" ] + file = [ "https://s3.hpc.ut.ee/plutof-public/original/f53a2174-9fc5-4d9f-8230-59f8772c6c5c.gz" ] citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2020): UNITE QIIME release for eukaryotes. Version 04.02.2020. UNITE Community. https://doi.org/10.15156/BIO/786386" fmtscript = "taxref_reformat_qiime_unite.sh" } diff --git a/nextflow_schema.json b/nextflow_schema.json index 32717bce..35f925ba 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -457,13 +457,16 @@ "enum": [ "silva=138", "silva", + "unite-fungi", + "unite-fungi=10.0", + "unite-fungi=9.0", "unite-fungi=8.3", "unite-fungi=8.2", - "unite-fungi", + "unite-alleuk", + "unite-alleuk=10.0", "unite-alleuk=9.0", "unite-alleuk=8.3", "unite-alleuk=8.2", - "unite-alleuk", "greengenes85", "greengenes2", "greengenes2=2022.10" From 2aae22354f9f609e4d74a17b98e5bdee628bfcb1 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Wed, 24 Jul 2024 09:01:55 +0200 Subject: [PATCH 12/31] CHANGELOG.md --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 60879ee4..43eb5ab2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` - [#765](https://github.com/nf-core/ampliseq/pull/765) - Added version R09-RS220 of curated GTDB 16S taxonomy: `sbdi-gtdb=R09-RS220-1` or `sbdi-gtdb` as parameter to `--dada_ref_taxonomy` +- [#766](https://github.com/nf-core/ampliseq/pull/766) - Added version 10 of Unite as options for `--qiime_ref_taxonomy` and `--sintax_ref_taxonomy` ### `Changed` @@ -16,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` - [#761](https://github.com/nf-core/ampliseq/pull/761) - Some sample sheet checks were not applied due to changes in the metadata ["meta"] structure in version 2.9.0 +- [#766](https://github.com/nf-core/ampliseq/pull/766) - Fixed broken urls for Unite databases (issue [#764](https://github.com/nf-core/ampliseq/issues/764)) ### `Dependencies` From 79297b6091291d7d23476c68561c392d67933424 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Wed, 24 Jul 2024 11:07:50 +0200 Subject: [PATCH 13/31] Fix duplicate filenames output from QIIME2_ANCOM_TAX --- modules/local/qiime2_ancom_tax.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/qiime2_ancom_tax.nf b/modules/local/qiime2_ancom_tax.nf index 1e3d320b..e49cfb3a 100644 --- a/modules/local/qiime2_ancom_tax.nf +++ b/modules/local/qiime2_ancom_tax.nf @@ -43,7 +43,7 @@ process QIIME2_ANCOM_TAX { --to-tsv if [ \$(grep -v '^#' -c ${table.baseName}-level-${taxlevel}.feature-table.tsv) -lt 2 ]; then - echo ${taxlevel} > ancom/\"WARNING Summing your data at taxonomic level ${taxlevel} produced less than two rows (taxa), ANCOM can't proceed -- did you specify a bad reference taxonomy?\".txt + echo ${taxlevel} > ancom/"${table.baseName}_WARNING_Summing_your_data_at_taxonomic_level_${taxlevel}_produced_less_than_two_rows_(taxa)_ANCOM_cant_proceed_--_did_you_specify_a_bad_reference_taxonomy.txt" else qiime composition add-pseudocount \\ --i-table lvl${taxlevel}-${table} \\ From d8a851f47783d9f524fe3ddb831a80b98e0c3cfb Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Wed, 24 Jul 2024 11:23:09 +0200 Subject: [PATCH 14/31] Move table name --- modules/local/qiime2_ancom_tax.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/qiime2_ancom_tax.nf b/modules/local/qiime2_ancom_tax.nf index e49cfb3a..686a5bac 100644 --- a/modules/local/qiime2_ancom_tax.nf +++ b/modules/local/qiime2_ancom_tax.nf @@ -43,7 +43,7 @@ process QIIME2_ANCOM_TAX { --to-tsv if [ \$(grep -v '^#' -c ${table.baseName}-level-${taxlevel}.feature-table.tsv) -lt 2 ]; then - echo ${taxlevel} > ancom/"${table.baseName}_WARNING_Summing_your_data_at_taxonomic_level_${taxlevel}_produced_less_than_two_rows_(taxa)_ANCOM_cant_proceed_--_did_you_specify_a_bad_reference_taxonomy.txt" + echo ${taxlevel} > ancom/"WARNING_${table.baseName}_Summing_your_data_at_taxonomic_level_${taxlevel}_produced_less_than_two_rows_(taxa)_ANCOM_cant_proceed_--_did_you_specify_a_bad_reference_taxonomy.txt" else qiime composition add-pseudocount \\ --i-table lvl${taxlevel}-${table} \\ From 21f6df83709cea0c91a0205727ec9ca1e54b0382 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Wed, 24 Jul 2024 12:27:32 +0200 Subject: [PATCH 15/31] More CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 43eb5ab2..ff7e733a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` - [#762](https://github.com/nf-core/ampliseq/pull/762) - Improved output documentation section "Optional ASV filtering" and parameter documentation +- [#766](https://github.com/nf-core/ampliseq/pull/766) - Modified warning filenames from `QIIME2_ANCOM` to avoid collisions ### `Fixed` From 9d11568c369533b65ade6895679d2fe703e5de61 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Wed, 24 Jul 2024 12:27:59 +0200 Subject: [PATCH 16/31] Fixed SINTAX/UNITE urls, tar -> fasta --- conf/ref_databases.config | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/conf/ref_databases.config b/conf/ref_databases.config index e9037960..73233b72 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -396,23 +396,23 @@ params { } 'unite-fungi' { title = "UNITE USEARCH/UTAX release for Fungi - Version 10.0" - file = [ "https://s3.hpc.ut.ee/plutof-public/original/d18aa648-3f4c-4f46-84d4-c8c5d48439ba.tgz" ] + file = [ "https://s3.hpc.ut.ee/plutof-public/original/b27cffec-1e7d-4584-93d3-12add9fa180b.gz" ] citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2024): UNITE USEARCH/UTAX release for Fungi. UNITE Community. 10.15156/BIO/2959340" - fmtscript = "taxref_reformat_sintax_tar.sh" + fmtscript = "taxref_reformat_sintax_fasta.sh" dbversion = "UNITE-fungi v10.0 (https://doi.plutof.ut.ee/doi/10.15156/BIO/2959340)" } 'unite-fungi=10.0' { title = "UNITE USEARCH/UTAX release for Fungi - Version 10.0" - file = [ "https://s3.hpc.ut.ee/plutof-public/original/d18aa648-3f4c-4f46-84d4-c8c5d48439ba.tgz" ] + file = [ "https://s3.hpc.ut.ee/plutof-public/original/b27cffec-1e7d-4584-93d3-12add9fa180b.gz" ] citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2024): UNITE USEARCH/UTAX release for Fungi. UNITE Community. 10.15156/BIO/2959340" - fmtscript = "taxref_reformat_sintax_tar.sh" + fmtscript = "taxref_reformat_sintax_fasta.sh" dbversion = "UNITE-fungi v10.0 (https://doi.plutof.ut.ee/doi/10.15156/BIO/2959340)" } 'unite-fungi=9.0' { title = "UNITE USEARCH/UTAX release for Fungi - Version 9.0" - file = [ "https://s3.hpc.ut.ee/plutof-public/original/fa1038da-d18d-46b7-88a9-c21bcf38c43d.tgz" ] + file = [ "https://s3.hpc.ut.ee/plutof-public/original/096d35b9-1d41-42bc-a0a6-dc4f4f17cc79.gz" ] citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2022): UNITE USEARCH/UTAX release for Fungi. Version 16.10.2022. UNITE Community. https://doi.org/10.15156/BIO/2483923" - fmtscript = "taxref_reformat_sintax_tar.sh" + fmtscript = "taxref_reformat_sintax_fasta.sh" dbversion = "UNITE-fungi v9.0 (https://doi.org/10.15156/BIO/2483923)" } 'unite-fungi=8.3' { @@ -452,9 +452,9 @@ params { } 'unite-alleuk=8.3' { title = "UNITE USEARCH/UTAX release for eukaryotes - Version 8.3" - file = [ "https://s3.hpc.ut.ee/plutof-public/original/6e72e839-acfc-4044-84b4-5152b74c1552.tgz" ] + file = [ "https://s3.hpc.ut.ee/plutof-public/original/33ef3b65-5877-4bc9-8210-8e74f5ea63d7.gz" ] citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2021): UNITE USEARCH/UTAX release for eukaryotes. UNITE Community. 10.15156/BIO/1280317" - fmtscript = "taxref_reformat_sintax_tar.sh" + fmtscript = "taxref_reformat_sintax_fasta.sh" dbversion = "UNITE-alleuk v8.3 (https://doi.org/10.15156/BIO/1280127)" } 'unite-alleuk=8.2' { From 82a5665c9137da7da889dd5e3a6e5627538e8358 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Wed, 24 Jul 2024 12:56:55 +0200 Subject: [PATCH 17/31] Disable unite-fungi=9.0 -- seems broken at Unite --- conf/ref_databases.config | 3 +++ nextflow_schema.json | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/conf/ref_databases.config b/conf/ref_databases.config index 73233b72..61944805 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -309,12 +309,15 @@ params { citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2024): UNITE QIIME release for Fungi. Version 04.04.2024. UNITE Community. https://doi.org/10.15156/BIO/2959336" fmtscript = "taxref_reformat_qiime_unite.sh" } + // This fails, and is therefore commented out. The error message is: "this classifier does not support confidence values" + /*** 'unite-fungi=9.0' { title = "UNITE QIIME release for Fungi - Version 9.0" file = [ "https://s3.hpc.ut.ee/plutof-public/original/3c22fbc5-ed9e-47a6-a85b-6c81268657e9.tgz" ] citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2023): UNITE QIIME release for Fungi. Version 18.07.2023. UNITE Community. https://doi.org/10.15156/BIO/2938079" fmtscript = "taxref_reformat_qiime_unite.sh" } + ***/ 'unite-fungi=8.3' { title = "UNITE QIIME release for Fungi - Version 8.3" file = [ "https://s3.hpc.ut.ee/plutof-public/original/3652451d-7567-4871-a7aa-3ba6c63aa60b.tgz" ] diff --git a/nextflow_schema.json b/nextflow_schema.json index 35f925ba..92f0fdc1 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -459,7 +459,6 @@ "silva", "unite-fungi", "unite-fungi=10.0", - "unite-fungi=9.0", "unite-fungi=8.3", "unite-fungi=8.2", "unite-alleuk", From 5fde4547365127c3d76705f571bc6383b0066e52 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Wed, 24 Jul 2024 13:13:59 +0200 Subject: [PATCH 18/31] Warning in bin/taxref_reformat_sintax_tar.sh --- bin/taxref_reformat_sintax_tar.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/bin/taxref_reformat_sintax_tar.sh b/bin/taxref_reformat_sintax_tar.sh index 8539084d..f8a3e423 100755 --- a/bin/taxref_reformat_sintax_tar.sh +++ b/bin/taxref_reformat_sintax_tar.sh @@ -1,9 +1,8 @@ #!/bin/sh -# Handles preformatted database files suitable for sintax - -# Just rename the preformatted file -# Assumes only one (gzipped) file +# Handles preformatted database tar files suitable for sintax +# +# This turned out to be a MISTAKE and is NOT USED, but I'm keeping the file for a while anyway. # Extract the fasta file without _dev in its name f=$(tar tfz *.tgz | grep fasta | grep -v '_dev') From 9a8094d5cb144a310bcc981cfb2cc355241647d6 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Wed, 24 Jul 2024 15:45:15 +0200 Subject: [PATCH 19/31] Revert most of the filename changes in QIIME2_ANCOM_TAX --- modules/local/qiime2_ancom_tax.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/qiime2_ancom_tax.nf b/modules/local/qiime2_ancom_tax.nf index 686a5bac..cf488276 100644 --- a/modules/local/qiime2_ancom_tax.nf +++ b/modules/local/qiime2_ancom_tax.nf @@ -43,7 +43,7 @@ process QIIME2_ANCOM_TAX { --to-tsv if [ \$(grep -v '^#' -c ${table.baseName}-level-${taxlevel}.feature-table.tsv) -lt 2 ]; then - echo ${taxlevel} > ancom/"WARNING_${table.baseName}_Summing_your_data_at_taxonomic_level_${taxlevel}_produced_less_than_two_rows_(taxa)_ANCOM_cant_proceed_--_did_you_specify_a_bad_reference_taxonomy.txt" + echo ${taxlevel} > ancom/\"WARNING ${table.baseName} Summing your data at taxonomic level ${taxlevel} produced less than two rows (taxa), ANCOM can't proceed -- did you specify a bad reference taxonomy?\".txt else qiime composition add-pseudocount \\ --i-table lvl${taxlevel}-${table} \\ From 641fc4c52fedeb05cb2db2592fe572111b4071dd Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Wed, 24 Jul 2024 16:31:51 +0200 Subject: [PATCH 20/31] Disable Unite databases for --qiime_ref_taxonomy --- CHANGELOG.md | 3 ++- conf/ref_databases.config | 9 +++++++-- nextflow_schema.json | 9 --------- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ff7e733a..5f6f1aca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,12 +8,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` - [#765](https://github.com/nf-core/ampliseq/pull/765) - Added version R09-RS220 of curated GTDB 16S taxonomy: `sbdi-gtdb=R09-RS220-1` or `sbdi-gtdb` as parameter to `--dada_ref_taxonomy` -- [#766](https://github.com/nf-core/ampliseq/pull/766) - Added version 10 of Unite as options for `--qiime_ref_taxonomy` and `--sintax_ref_taxonomy` +- [#766](https://github.com/nf-core/ampliseq/pull/766) - Added version 10 of Unite as options for `--sintax_ref_taxonomy` ### `Changed` - [#762](https://github.com/nf-core/ampliseq/pull/762) - Improved output documentation section "Optional ASV filtering" and parameter documentation - [#766](https://github.com/nf-core/ampliseq/pull/766) - Modified warning filenames from `QIIME2_ANCOM` to avoid collisions +- [#766](https://github.com/nf-core/ampliseq/pull/766) - Disabled Unite databases from the `--qiime_ref_taxonomy` because of divergent results compared to the other classifiers ### `Fixed` diff --git a/conf/ref_databases.config b/conf/ref_databases.config index 61944805..b91ea8ea 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -309,7 +309,12 @@ params { citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2024): UNITE QIIME release for Fungi. Version 04.04.2024. UNITE Community. https://doi.org/10.15156/BIO/2959336" fmtscript = "taxref_reformat_qiime_unite.sh" } - // This fails, and is therefore commented out. The error message is: "this classifier does not support confidence values" + + // We have disabled all Unite databases for the QIIME2 classifiers + // since we get results that differ considerably from the Sintax and + // DADA2 classifiers with the corresponding databases. + + // The unite-fungi=9.0 fails /*** 'unite-fungi=9.0' { title = "UNITE QIIME release for Fungi - Version 9.0" @@ -317,7 +322,6 @@ params { citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2023): UNITE QIIME release for Fungi. Version 18.07.2023. UNITE Community. https://doi.org/10.15156/BIO/2938079" fmtscript = "taxref_reformat_qiime_unite.sh" } - ***/ 'unite-fungi=8.3' { title = "UNITE QIIME release for Fungi - Version 8.3" file = [ "https://s3.hpc.ut.ee/plutof-public/original/3652451d-7567-4871-a7aa-3ba6c63aa60b.tgz" ] @@ -360,6 +364,7 @@ params { citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2020): UNITE QIIME release for eukaryotes. Version 04.02.2020. UNITE Community. https://doi.org/10.15156/BIO/786386" fmtscript = "taxref_reformat_qiime_unite.sh" } + ***/ 'greengenes85' { title = "Greengenes 16S - Version 13_8 - clustered at 85% similarity - for testing purposes only" file = [ "https://data.qiime2.org/2023.7/tutorials/training-feature-classifiers/85_otus.fasta", "https://data.qiime2.org/2023.7/tutorials/training-feature-classifiers/85_otu_taxonomy.txt" ] diff --git a/nextflow_schema.json b/nextflow_schema.json index 92f0fdc1..8df67e3c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -457,15 +457,6 @@ "enum": [ "silva=138", "silva", - "unite-fungi", - "unite-fungi=10.0", - "unite-fungi=8.3", - "unite-fungi=8.2", - "unite-alleuk", - "unite-alleuk=10.0", - "unite-alleuk=9.0", - "unite-alleuk=8.3", - "unite-alleuk=8.2", "greengenes85", "greengenes2", "greengenes2=2022.10" From 1d697626c272c1ec99092c94242f4381ec110664 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Wed, 24 Jul 2024 19:10:45 +0200 Subject: [PATCH 21/31] prettier --- .devcontainer/devcontainer.json | 8 ++++---- nextflow_schema.json | 8 +------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index b290e090..2a997736 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -10,11 +10,11 @@ "vscode": { // Set *default* container specific settings.json values on container create. "settings": { - "python.defaultInterpreterPath": "/opt/conda/bin/python" + "python.defaultInterpreterPath": "/opt/conda/bin/python", }, // Add the IDs of extensions you want installed when the container is created. - "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] - } - } + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"], + }, + }, } diff --git a/nextflow_schema.json b/nextflow_schema.json index 8df67e3c..ccef5f95 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -454,13 +454,7 @@ "type": "string", "help_text": "Choose any of the supported databases, and optionally also specify the version. Database and version are separated by an equal sign (`=`, e.g. `silva=138`) . This will download the desired database and initiate taxonomic classification with QIIME2 and the chosen database.\n\nIf both, `--dada_ref_taxonomy` and `--qiime_ref_taxonomy` are used, DADA2 classification will be used for downstream analysis.\n\nThe following databases are supported:\n- SILVA ribosomal RNA gene database project - 16S rRNA\n- UNITE - eukaryotic nuclear ribosomal ITS region - ITS\n- Greengenes (only testing!)\n\nGenerally, using `silva`, `unite-fungi`, or `unite-alleuk` will select the most recent supported version. For testing purposes, the tiny database `greengenes85` (dereplicated at 85% sequence similarity) is available. For details on what values are valid, please either use an invalid value such as `x` (causing the pipeline to send an error message with all valid values) or see `conf/ref_databases.config`.", "description": "Name of supported database, and optionally also version number", - "enum": [ - "silva=138", - "silva", - "greengenes85", - "greengenes2", - "greengenes2=2022.10" - ] + "enum": ["silva=138", "silva", "greengenes85", "greengenes2", "greengenes2=2022.10"] }, "qiime_ref_tax_custom": { "type": "string", From 8e9b28d0fde5e29a8f83351e552497cb86e80a15 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Wed, 24 Jul 2024 17:24:23 +0000 Subject: [PATCH 22/31] [automated] Fix code linting --- .devcontainer/devcontainer.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 2a997736..b290e090 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -10,11 +10,11 @@ "vscode": { // Set *default* container specific settings.json values on container create. "settings": { - "python.defaultInterpreterPath": "/opt/conda/bin/python", + "python.defaultInterpreterPath": "/opt/conda/bin/python" }, // Add the IDs of extensions you want installed when the container is created. - "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"], - }, - }, + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] + } + } } From 058ccd5d554ebc1ac4c5988198df6baafd004504 Mon Sep 17 00:00:00 2001 From: d4straub Date: Thu, 25 Jul 2024 11:44:49 +0200 Subject: [PATCH 23/31] fix validation of --*_ref_taxonomy --- workflows/ampliseq.nf | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index f0073841..eec27d62 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -47,7 +47,7 @@ if (params.dada_ref_tax_custom) { val_dada_ref_taxonomy = "user" } else if (params.dada_ref_taxonomy && !params.skip_dada_taxonomy && !params.skip_taxonomy) { //standard ref taxonomy input from params.dada_ref_taxonomy & conf/ref_databases.config - ch_dada_ref_taxonomy = Channel.fromList(params.dada_ref_databases[params.dada_ref_taxonomy]["file"]).map { file(it) } + ch_dada_ref_taxonomy = params.dada_ref_databases.containsKey(params.dada_ref_taxonomy) ? Channel.fromList(params.dada_ref_databases[params.dada_ref_taxonomy]["file"]).map { file(it) } : Channel.empty() val_dada_ref_taxonomy = params.dada_ref_taxonomy.replace('=','_').replace('.','_') } else { ch_dada_ref_taxonomy = Channel.empty() @@ -67,7 +67,7 @@ if (params.qiime_ref_tax_custom) { } val_qiime_ref_taxonomy = "user" } else if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) { - ch_qiime_ref_taxonomy = Channel.fromList(params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]).map { file(it) } + ch_qiime_ref_taxonomy = params.qiime_ref_databases.containsKey(params.qiime_ref_taxonomy) ? Channel.fromList(params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]).map { file(it) } : Channel.empty() val_qiime_ref_taxonomy = params.qiime_ref_taxonomy.replace('=','_').replace('.','_') } else { ch_qiime_ref_taxonomy = Channel.empty() @@ -75,7 +75,7 @@ if (params.qiime_ref_tax_custom) { } if (params.sintax_ref_taxonomy && !params.skip_taxonomy) { - ch_sintax_ref_taxonomy = Channel.fromList(params.sintax_ref_databases[params.sintax_ref_taxonomy]["file"]).map { file(it) } + ch_sintax_ref_taxonomy = params.sintax_ref_databases.containsKey(params.sintax_ref_taxonomy) ? Channel.fromList(params.sintax_ref_databases[params.sintax_ref_taxonomy]["file"]).map { file(it) } : Channel.empty() val_sintax_ref_taxonomy = params.sintax_ref_taxonomy.replace('=','_').replace('.','_') } else { ch_sintax_ref_taxonomy = Channel.empty() @@ -88,7 +88,7 @@ if (params.kraken2_ref_tax_custom) { val_kraken2_ref_taxonomy = "user" } else if (params.kraken2_ref_taxonomy && !params.skip_taxonomy) { //standard ref taxonomy input from params.dada_ref_taxonomy & conf/ref_databases.config - ch_kraken2_ref_taxonomy = Channel.fromList(params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["file"]).map { file(it) } + ch_kraken2_ref_taxonomy = params.kraken2_ref_databases.containsKey(params.kraken2_ref_taxonomy) ? Channel.fromList(params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["file"]).map { file(it) } : Channel.empty() val_kraken2_ref_taxonomy = params.kraken2_ref_taxonomy.replace('=','_').replace('.','_') } else { ch_kraken2_ref_taxonomy = Channel.empty() @@ -122,16 +122,16 @@ tax_agglom_max = params.tax_agglom_max //use custom taxlevels from --dada_assign_taxlevels or database specific taxlevels if specified in conf/ref_databases.config if ( params.dada_ref_taxonomy ) { taxlevels = params.dada_assign_taxlevels ? "${params.dada_assign_taxlevels}" : - params.dada_ref_databases[params.dada_ref_taxonomy]["taxlevels"] ?: "" + params.dada_ref_databases.containsKey(params.dada_ref_taxonomy) && params.dada_ref_databases[params.dada_ref_taxonomy]["taxlevels"] ?: "" } else { taxlevels = params.dada_assign_taxlevels ? "${params.dada_assign_taxlevels}" : "" } if ( params.sintax_ref_taxonomy ) { - sintax_taxlevels = params.sintax_ref_databases[params.sintax_ref_taxonomy]["taxlevels"] ?: "" + sintax_taxlevels = params.sintax_ref_databases.containsKey(params.sintax_ref_taxonomy) && params.sintax_ref_databases[params.sintax_ref_taxonomy]["taxlevels"] ?: "" } else { sintax_taxlevels = "" } if ( params.kraken2_ref_taxonomy ) { kraken2_taxlevels = params.kraken2_assign_taxlevels ? "${params.kraken2_assign_taxlevels}" : - params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["taxlevels"] ?: "" + params.kraken2_ref_databases.containsKey(params.kraken2_ref_taxonomy) && params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["taxlevels"] ?: "" } else { kraken2_taxlevels = params.kraken2_assign_taxlevels ? "${params.kraken2_assign_taxlevels}" : "" } //make sure that taxlevels adheres to requirements when mixed with addSpecies From 536bdb532f1fc25b3174e4a5ebde45fa40709c06 Mon Sep 17 00:00:00 2001 From: d4straub Date: Thu, 25 Jul 2024 11:45:13 +0200 Subject: [PATCH 24/31] remove unused code --- subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf | 8 -------- 1 file changed, 8 deletions(-) diff --git a/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf index a1c190fc..6180ebd2 100644 --- a/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf @@ -162,14 +162,6 @@ def validateInputParameters() { error("Invalid command: `--trunclenr` is set, but `--trunclenf` is not. Either both parameters `--trunclenf` and `--trunclenr` must be set or none.") } - if (!["pooled", "independent", "pseudo"].contains(params.sample_inference)) { - def error_string = "Please set `--sample_inference` to one of the following:\n" + - "\t-\"independent\" (lowest sensitivity and lowest resources),\n" + - "\t-\"pseudo\" (balance between required resources and sensitivity),\n" + - "\t-\"pooled\" (highest sensitivity and resources)." - error(error_string) - } - if (params.double_primer && params.retain_untrimmed) { error("Incompatible parameters `--double_primer` and `--retain_untrimmed` cannot be set at the same time.") } From 088d7d4824f05f10c87c2bfa43704d110c668c09 Mon Sep 17 00:00:00 2001 From: d4straub Date: Thu, 25 Jul 2024 11:46:00 +0200 Subject: [PATCH 25/31] update missing UNITE in QIIME2 --- conf/ref_databases.config | 13 ++++++------- docs/usage.md | 4 ++-- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/conf/ref_databases.config b/conf/ref_databases.config index b91ea8ea..cd2edf43 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -296,6 +296,12 @@ params { license = "https://www.arb-silva.de/silva-license-information/" fmtscript = "taxref_reformat_qiime_silva138.sh" } + + // We have disabled all Unite databases for the QIIME2 classifiers + // since we get results that differ considerably from the Sintax and + // DADA2 classifiers with the corresponding databases. + + /*** //UNITE for QIIME2, see https://unite.ut.ee/repository.php 'unite-fungi' { title = "UNITE QIIME release for Fungi - Version 10.0" @@ -309,13 +315,6 @@ params { citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2024): UNITE QIIME release for Fungi. Version 04.04.2024. UNITE Community. https://doi.org/10.15156/BIO/2959336" fmtscript = "taxref_reformat_qiime_unite.sh" } - - // We have disabled all Unite databases for the QIIME2 classifiers - // since we get results that differ considerably from the Sintax and - // DADA2 classifiers with the corresponding databases. - - // The unite-fungi=9.0 fails - /*** 'unite-fungi=9.0' { title = "UNITE QIIME release for Fungi - Version 9.0" file = [ "https://s3.hpc.ut.ee/plutof-public/original/3c22fbc5-ed9e-47a6-a85b-6c81268657e9.tgz" ] diff --git a/docs/usage.md b/docs/usage.md index a7dad2b6..a53a925b 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -229,8 +229,8 @@ Pre-configured reference taxonomy databases are: | greengenes | - | - | + | (+)² | 16S rRNA | | greengenes2 | - | - | - | + | 16S rRNA | | pr2 | + | - | - | - | 18S rRNA | -| unite-fungi | + | + | - | + | eukaryotic nuclear ribosomal ITS region | -| unite-alleuk | + | + | - | + | eukaryotic nuclear ribosomal ITS region | +| unite-fungi | + | + | - | - | eukaryotic nuclear ribosomal ITS region | +| unite-alleuk | + | + | - | - | eukaryotic nuclear ribosomal ITS region | | coidb | + | + | - | - | eukaryotic Cytochrome Oxidase I (COI) | | midori2-co1 | + | - | - | - | eukaryotic Cytochrome Oxidase I (COI) | | phytoref | + | - | - | - | eukaryotic plastid 16S rRNA | From 3bd4afdc4979d7f4ef2de1b332c5dea6a6010d68 Mon Sep 17 00:00:00 2001 From: d4straub Date: Thu, 25 Jul 2024 11:46:37 +0200 Subject: [PATCH 26/31] update CHANGELOG --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f6f1aca..d0b34107 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,12 +14,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#762](https://github.com/nf-core/ampliseq/pull/762) - Improved output documentation section "Optional ASV filtering" and parameter documentation - [#766](https://github.com/nf-core/ampliseq/pull/766) - Modified warning filenames from `QIIME2_ANCOM` to avoid collisions -- [#766](https://github.com/nf-core/ampliseq/pull/766) - Disabled Unite databases from the `--qiime_ref_taxonomy` because of divergent results compared to the other classifiers +- [#766](https://github.com/nf-core/ampliseq/pull/766),[#769](https://github.com/nf-core/ampliseq/pull/769) - Disabled Unite databases from the `--qiime_ref_taxonomy` because of divergent results compared to the other classifiers ### `Fixed` - [#761](https://github.com/nf-core/ampliseq/pull/761) - Some sample sheet checks were not applied due to changes in the metadata ["meta"] structure in version 2.9.0 - [#766](https://github.com/nf-core/ampliseq/pull/766) - Fixed broken urls for Unite databases (issue [#764](https://github.com/nf-core/ampliseq/issues/764)) +- [#769](https://github.com/nf-core/ampliseq/pull/769) - Reference taxonomy database values were not properly validated in versions 2.9.0 and 2.10.0 ### `Dependencies` From 0a612fe972b8f65b6c95c303227bf68987b1d2fb Mon Sep 17 00:00:00 2001 From: d4straub Date: Thu, 25 Jul 2024 13:46:32 +0200 Subject: [PATCH 27/31] fix taxlevels --- workflows/ampliseq.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index eec27d62..8d5afed3 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -122,16 +122,16 @@ tax_agglom_max = params.tax_agglom_max //use custom taxlevels from --dada_assign_taxlevels or database specific taxlevels if specified in conf/ref_databases.config if ( params.dada_ref_taxonomy ) { taxlevels = params.dada_assign_taxlevels ? "${params.dada_assign_taxlevels}" : - params.dada_ref_databases.containsKey(params.dada_ref_taxonomy) && params.dada_ref_databases[params.dada_ref_taxonomy]["taxlevels"] ?: "" + params.dada_ref_databases.containsKey(params.dada_ref_taxonomy) && params.dada_ref_databases[params.dada_ref_taxonomy]["taxlevels"] ? params.dada_ref_databases[params.dada_ref_taxonomy]["taxlevels"] : "" } else { taxlevels = params.dada_assign_taxlevels ? "${params.dada_assign_taxlevels}" : "" } if ( params.sintax_ref_taxonomy ) { - sintax_taxlevels = params.sintax_ref_databases.containsKey(params.sintax_ref_taxonomy) && params.sintax_ref_databases[params.sintax_ref_taxonomy]["taxlevels"] ?: "" + sintax_taxlevels = params.sintax_ref_databases.containsKey(params.sintax_ref_taxonomy) && params.sintax_ref_databases[params.sintax_ref_taxonomy]["taxlevels"] ? params.sintax_ref_databases[params.sintax_ref_taxonomy]["taxlevels"] : "" } else { sintax_taxlevels = "" } if ( params.kraken2_ref_taxonomy ) { kraken2_taxlevels = params.kraken2_assign_taxlevels ? "${params.kraken2_assign_taxlevels}" : - params.kraken2_ref_databases.containsKey(params.kraken2_ref_taxonomy) && params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["taxlevels"] ?: "" + params.kraken2_ref_databases.containsKey(params.kraken2_ref_taxonomy) && params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["taxlevels"] ? params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["taxlevels"] : "" } else { kraken2_taxlevels = params.kraken2_assign_taxlevels ? "${params.kraken2_assign_taxlevels}" : "" } //make sure that taxlevels adheres to requirements when mixed with addSpecies From 98bf8bb325d1abc72b0d877cb559278542c1c139 Mon Sep 17 00:00:00 2001 From: d4straub Date: Thu, 25 Jul 2024 14:34:14 +0200 Subject: [PATCH 28/31] nf-core bump-version 2.11.0 --- assets/multiqc_config.yml | 4 ++-- nextflow.config | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index ec828d82..57691707 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/ampliseq + This report has been generated by the nf-core/ampliseq analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-ampliseq-methods-description": order: -1000 diff --git a/nextflow.config b/nextflow.config index 32729b03..f81c0033 100644 --- a/nextflow.config +++ b/nextflow.config @@ -357,7 +357,7 @@ manifest { description = """Amplicon sequencing analysis workflow using DADA2 and QIIME2""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '2.11.0dev' + version = '2.11.0' doi = '10.5281/zenodo.1493841,10.3389/fmicb.2020.550420' } From 6de1d0dabd3c98024bfc6042e99a089d5fc60143 Mon Sep 17 00:00:00 2001 From: d4straub Date: Thu, 25 Jul 2024 14:34:30 +0200 Subject: [PATCH 29/31] update CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d0b34107..a40ff833 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## nf-core/ampliseq version 2.11.0dev +## nf-core/ampliseq version 2.11.0 - 2024-08-06 ### `Added` From a3e82844a5c3fdaba25643bb5fbcd43a54c6393a Mon Sep 17 00:00:00 2001 From: d4straub Date: Thu, 25 Jul 2024 14:34:53 +0200 Subject: [PATCH 30/31] fix typo in report --- assets/report_template.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index a1bc6b4b..76119739 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -1544,7 +1544,7 @@ for (folder in ancom) { any_ancombc <- !isFALSE(params$ancombc) || !isFALSE(params$ancombc_formula) ``` -```{r, eval = !isFALSE(params$any_ancombc), results='asis'} +```{r, eval = !isFALSE(any_ancombc), results='asis'} cat(paste0(" ## ANCOM-BC From 61e8bc76df53531049707dca86bb8fa13882551e Mon Sep 17 00:00:00 2001 From: Daniel Straub <42973691+d4straub@users.noreply.github.com> Date: Fri, 26 Jul 2024 11:38:08 +0200 Subject: [PATCH 31/31] Apply suggestions from code review Co-authored-by: Daniel Lundin --- CHANGELOG.md | 2 +- conf/test_its_dada_taxonomy.config | 2 +- docs/output.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a40ff833..f888dc21 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` - [#765](https://github.com/nf-core/ampliseq/pull/765) - Added version R09-RS220 of curated GTDB 16S taxonomy: `sbdi-gtdb=R09-RS220-1` or `sbdi-gtdb` as parameter to `--dada_ref_taxonomy` -- [#766](https://github.com/nf-core/ampliseq/pull/766) - Added version 10 of Unite as options for `--sintax_ref_taxonomy` +- [#766](https://github.com/nf-core/ampliseq/pull/766) - Added version 10 of Unite as parameter for `--sintax_ref_taxonomy`: `unite-fungi=10.0` and `unite-alleuk=10.0` ### `Changed` diff --git a/conf/test_its_dada_taxonomy.config b/conf/test_its_dada_taxonomy.config index 60ef7a99..d704320b 100644 --- a/conf/test_its_dada_taxonomy.config +++ b/conf/test_its_dada_taxonomy.config @@ -12,7 +12,7 @@ params { config_profile_name = 'Test sintax profile' - config_profile_description = 'Minimal test dataset to check pipeline function when using sintax for taxonomy assignment' + config_profile_description = 'Minimal test dataset to check pipeline function for ITS data with the DADA2 taxonomy classifier' // Limit resources so that this can run on GitHub Actions max_cpus = 2 diff --git a/docs/output.md b/docs/output.md index 302af707..33278748 100644 --- a/docs/output.md +++ b/docs/output.md @@ -202,7 +202,7 @@ Optionally, ASV sequences can be filtered for rRNA sequences identified by Barrn #### Length filter -Optionally, a length filter can be used to reduce potential contamination after ASV computation. For example with 515f and 806r primers the majority of 16S rRNA amplicon sequences should have a length of 253 bp and amplicons that vary significantly are likely spurious. +Optionally, a length filter can be used to reduce potential contamination after ASV computation. For example with 515f and 806r primers the majority of 16S rRNA amplicon sequences should have a length of 253 bp and amplicons that differ significantly from this are likely spurious. The minimum ASV length threshold can be set by `--min_len_asv` and the maximum length threshold with `--max_len_asv`. If no threshold is set, the filter (and output) is omitted.