diff --git a/conf/modules.config b/conf/modules.config index 0fbea292..fa48b5cb 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -16,14 +16,14 @@ process { publishDir = [path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: false] withName: FASTQC_RAW { - ext.args = '--quiet' + ext.args = '--quiet' publishDir = [path: { "${params.outdir}/QC_shortreads/fastqc" }, mode: params.publish_dir_mode, pattern: "*.html"] ext.prefix = { "${meta.id}_run${meta.run}_raw" } - tag = { "${meta.id}_run${meta.run}_raw" } + tag = { "${meta.id}_run${meta.run}_raw" } } withName: FASTP { - ext.args = [ + ext.args = [ "-q ${params.fastp_qualified_quality}", "--cut_front", "--cut_tail", @@ -44,11 +44,11 @@ process { ] ] ext.prefix = { "${meta.id}_run${meta.run}_fastp" } - tag = { "${meta.id}_run${meta.run}" } + tag = { "${meta.id}_run${meta.run}" } } withName: ADAPTERREMOVAL_PE { - ext.args = [ + ext.args = [ "--minlength ${params.reads_minlength}", "--adapter1 ${params.adapterremoval_adapter1} --adapter2 ${params.adapterremoval_adapter2}", "--minquality ${params.adapterremoval_minquality} --trimns", @@ -68,11 +68,11 @@ process { ] ] ext.prefix = { "${meta.id}_run${meta.run}_ar2" } - tag = { "${meta.id}_run${meta.run}" } + tag = { "${meta.id}_run${meta.run}" } } withName: ADAPTERREMOVAL_SE { - ext.args = [ + ext.args = [ "--minlength ${params.reads_minlength}", "--adapter1 ${params.adapterremoval_adapter1}", "--minquality ${params.adapterremoval_minquality} --trimns", @@ -84,72 +84,72 @@ process { pattern: "*.{settings}" ] ext.prefix = { "${meta.id}_run${meta.run}_ar2" } - tag = { "${meta.id}_run${meta.run}" } + tag = { "${meta.id}_run${meta.run}" } } withName: BOWTIE2_PHIX_REMOVAL_ALIGN { ext.prefix = { "${meta.id}_run${meta.run}_phix_removed" } publishDir = [ [ - path: { "${params.outdir}/QC_shortreads/remove_phix" }, + path: { "${params.outdir}/QC_shortreads/remove_phix/${meta.id}/" }, mode: params.publish_dir_mode, pattern: "*.log" ], [ - path: { "${params.outdir}/QC_shortreads/remove_phix" }, + path: { "${params.outdir}/QC_shortreads/remove_phix/${meta.id}/" }, mode: params.publish_dir_mode, pattern: "*.unmapped*.fastq.gz", enabled: params.save_phixremoved_reads ] ] - tag = { "${meta.id}_run${meta.run}" } + tag = { "${meta.id}_run${meta.run}" } } withName: BOWTIE2_HOST_REMOVAL_ALIGN { - ext.args = params.host_removal_verysensitive ? "--very-sensitive" : "--sensitive" - ext.args2 = params.host_removal_save_ids ? "--host_removal_save_ids" : '' + ext.args = params.host_removal_verysensitive ? "--very-sensitive" : "--sensitive" + ext.args2 = params.host_removal_save_ids ? "--host_removal_save_ids" : '' ext.prefix = { "${meta.id}_run${meta.run}_host_removed" } publishDir = [ [ - path: { "${params.outdir}/QC_shortreads/remove_host" }, + path: { "${params.outdir}/QC_shortreads/remove_host/${meta.id}/" }, mode: params.publish_dir_mode, pattern: "*{.log,read_ids.txt}" ], [ - path: { "${params.outdir}/QC_shortreads/remove_host" }, + path: { "${params.outdir}/QC_shortreads/remove_host/${meta.id}/" }, mode: params.publish_dir_mode, pattern: "*.unmapped*.fastq.gz", enabled: params.save_hostremoved_reads ] ] - tag = { "${meta.id}_run${meta.run}" } + tag = { "${meta.id}_run${meta.run}" } } withName: FASTQC_TRIMMED { - ext.args = '--quiet' + ext.args = '--quiet' ext.prefix = { "${meta.id}_run${meta.run}_trimmed" } publishDir = [ path: { "${params.outdir}/QC_shortreads/fastqc" }, mode: params.publish_dir_mode, pattern: "*.html" ] - tag = { "${meta.id}_run${meta.run}" } + tag = { "${meta.id}_run${meta.run}" } } withName: BBMAP_BBNORM { - ext.args = [ + ext.args = [ params.bbnorm_target ? "target=${params.bbnorm_target}" : '', params.bbnorm_min ? "min=${params.bbnorm_min}" : '' ].join(' ').trim() publishDir = [ [ - path: { "${params.outdir}/bbmap/bbnorm/logs" }, + path: { "${params.outdir}/bbmap/bbnorm/${meta.id}/" }, enabled: params.save_bbnorm_reads, mode: params.publish_dir_mode, pattern: "*.log" ], [ - path: { "${params.outdir}/bbmap/bbnorm/" }, + path: { "${params.outdir}/bbmap/bbnorm/${meta.id}/" }, mode: 'copy', enabled: params.save_bbnorm_reads, mode: params.publish_dir_mode, @@ -179,7 +179,7 @@ process { } withName: FILTLONG { - ext.args = [ + ext.args = [ "--min_length ${params.longreads_min_length}", "--keep_percent ${params.longreads_keep_percent}", "--trim", @@ -201,7 +201,7 @@ process { withName: NANOPLOT_RAW { ext.prefix = 'raw' - ext.args = { + ext.args = { [ "-p raw_", "--title ${meta.id}_raw", @@ -216,7 +216,7 @@ process { } withName: NANOPLOT_FILTERED { - ext.args = { + ext.args = { [ "-p filtered_", "--title ${meta.id}_filtered", @@ -240,7 +240,7 @@ process { } withName: KRAKEN2 { - ext.args = '--quiet' + ext.args = '--quiet' publishDir = [ path: { "${params.outdir}/Taxonomy/kraken2/${meta.id}" }, mode: params.publish_dir_mode, @@ -257,7 +257,7 @@ process { } withName: MEGAHIT { - ext.args = { params.megahit_options ? params.megahit_options + "-m ${task.memory.toBytes()}" : "-m ${task.memory.toBytes()}" } + ext.args = { params.megahit_options ? params.megahit_options + "-m ${task.memory.toBytes()}" : "-m ${task.memory.toBytes()}" } ext.prefix = { "MEGAHIT-${meta.id}" } publishDir = [path: { "${params.outdir}/Assembly/MEGAHIT" }, mode: params.publish_dir_mode, pattern: "*.{fa.gz,log}"] } @@ -279,7 +279,7 @@ process { } withName: GENOMAD_ENDTOEND { - ext.args = [ + ext.args = [ "--cleanup", "--min-score ${params.genomad_min_score}", "--splits ${params.genomad_splits}" @@ -292,7 +292,7 @@ process { } withName: BOWTIE2_ASSEMBLY_ALIGN { - ext.args = params.bowtie2_mode ? params.bowtie2_mode : params.ancient_dna ? '--very-sensitive -N 1' : '' + ext.args = params.bowtie2_mode ? params.bowtie2_mode : params.ancient_dna ? '--very-sensitive -N 1' : '' ext.prefix = { "${meta.id}.assembly" } publishDir = [ [ @@ -326,7 +326,7 @@ process { } withName: BUSCO { - ext.args = [ + ext.args = [ params.busco_db ? '--offline' : '' ].join(' ').trim() publishDir = [ @@ -349,14 +349,14 @@ process { } withName: CHECKM_LINEAGEWF { - tag = { "${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}" } + tag = { "${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}" } ext.prefix = { "${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}_wf" } publishDir = [path: { "${params.outdir}/GenomeBinning/QC/CheckM" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }] } withName: CHECKM_QA { ext.prefix = { "${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}_qa" } - ext.args = "-o 2 --tab_table" + ext.args = "-o 2 --tab_table" publishDir = [ path: { "${params.outdir}/GenomeBinning/QC/CheckM" }, mode: params.publish_dir_mode, @@ -409,7 +409,7 @@ process { } withName: GTDBTK_CLASSIFYWF { - ext.args = [ + ext.args = [ "--extension fa", "--min_perc_aa ${params.gtdbtk_min_perc_aa}", "--min_af ${params.gtdbtk_min_af}", @@ -424,30 +424,30 @@ process { } withName: GTDBTK_SUMMARY { - ext.args = "--extension fa" + ext.args = "--extension fa" publishDir = [path: { "${params.outdir}/Taxonomy/GTDB-Tk" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }] } withName: PROKKA { - ext.args = "--metagenome" + ext.args = "--metagenome" publishDir = [path: { "${params.outdir}/Annotation/Prokka/${meta.assembler}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }] } withName: PRODIGAL { - ext.args = "-p meta" + ext.args = "-p meta" ext.prefix = { "${meta.assembler}-${meta.id}_prodigal" } publishDir = [path: { "${params.outdir}/Annotation/Prodigal/${meta.assembler}/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }] } withName: FREEBAYES { ext.prefix = { "${meta.assembler}-${meta.id}" } - ext.args = "-p ${params.freebayes_ploidy} -q ${params.freebayes_min_basequality} -F ${params.freebayes_minallelefreq}" + ext.args = "-p ${params.freebayes_ploidy} -q ${params.freebayes_min_basequality} -F ${params.freebayes_minallelefreq}" publishDir = [path: { "${params.outdir}/Ancient_DNA/variant_calling/freebayes" }, mode: params.publish_dir_mode, pattern: "*.vcf.gz"] } withName: BCFTOOLS_VIEW { ext.prefix = { "${meta.assembler}-${meta.id}.filtered" } - ext.args = "-v snps,mnps -i 'QUAL>=${params.bcftools_view_high_variant_quality} || (QUAL>=${params.bcftools_view_medium_variant_quality} && FORMAT/AO>=${params.bcftools_view_minimal_allelesupport})'" + ext.args = "-v snps,mnps -i 'QUAL>=${params.bcftools_view_high_variant_quality} || (QUAL>=${params.bcftools_view_medium_variant_quality} && FORMAT/AO>=${params.bcftools_view_minimal_allelesupport})'" publishDir = [path: { "${params.outdir}/Ancient_DNA/variant_calling/filtered" }, mode: params.publish_dir_mode, pattern: "*.vcf.gz"] } @@ -462,7 +462,7 @@ process { withName: BCFTOOLS_INDEX { ext.prefix = { "${meta.assembler}-${meta.id}" } - ext.args = "-t" + ext.args = "-t" publishDir = [ path: { "${params.outdir}/Ancient_DNA/variant_calling/index" }, mode: params.publish_dir_mode, @@ -480,7 +480,7 @@ process { withName: PYDAMAGE_FILTER { ext.prefix = { "${meta.assembler}-${meta.id}" } - ext.args = "-t ${params.pydamage_accuracy}" + ext.args = "-t ${params.pydamage_accuracy}" publishDir = [ path: { "${params.outdir}/Ancient_DNA/pydamage/filter/${meta.assembler}-${meta.id}/" }, mode: params.publish_dir_mode @@ -504,7 +504,7 @@ process { withName: METABAT2_METABAT2 { publishDir = [[path: { "${params.outdir}/GenomeBinning/MetaBAT2/bins/" }, mode: params.publish_dir_mode, pattern: '*[!lowDepth|tooShort|unbinned].fa.gz'], [path: { "${params.outdir}/GenomeBinning/MetaBAT2/discarded" }, mode: params.publish_dir_mode, pattern: '*tooShort.fa.gz'], [path: { "${params.outdir}/GenomeBinning/MetaBAT2/discarded" }, mode: params.publish_dir_mode, pattern: '*lowDepth.fa.gz']] ext.prefix = { "${meta.assembler}-MetaBAT2-${meta.id}" } - ext.args = [ + ext.args = [ params.min_contig_size < 1500 ? "-m 1500" : "-m ${params.min_contig_size}", "--unbinned", "--seed ${params.metabat_rng_seed}" @@ -583,7 +583,7 @@ process { ] ] ext.prefix = { "${meta.assembler}-DASTool-${meta.id}" } - ext.args = "--write_bins --write_unbinned --write_bin_evals --score_threshold ${params.refine_bins_dastool_threshold}" + ext.args = "--write_bins --write_unbinned --write_bin_evals --score_threshold ${params.refine_bins_dastool_threshold}" } withName: RENAME_POSTDASTOOL { @@ -607,12 +607,12 @@ process { mode: params.publish_dir_mode, pattern: "*.txt" ] - ext.args = { "--min_len ${params.tiara_min_length} --probabilities" } + ext.args = { "--min_len ${params.tiara_min_length} --probabilities" } ext.prefix = { "${meta.assembler}-${meta.id}.tiara" } } withName: TIARA_CLASSIFY { - ext.args = { "--join_prokaryotes --assembler ${meta.assembler}" } + ext.args = { "--join_prokaryotes --assembler ${meta.assembler}" } ext.prefix = { "${meta.assembler}-${meta.binner}-${meta.bin}-${meta.id}" } } @@ -627,13 +627,13 @@ process { } withName: METAEUK_EASYPREDICT { - ext.args = "" + ext.args = "" ext.prefix = { "${meta.id}" } publishDir = [path: { "${params.outdir}/Annotation/MetaEuk/${meta.assembler}/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }] } withName: MULTIQC { - ext.args = { params.multiqc_title ? "--title \"${params.multiqc_title}\"" : '' } + ext.args = { params.multiqc_title ? "--title \"${params.multiqc_title}\"" : '' } publishDir = [ path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, diff --git a/conf/test_hybrid.config b/conf/test_hybrid.config index cfb0991c..ae44a5d4 100644 --- a/conf/test_hybrid.config +++ b/conf/test_hybrid.config @@ -31,4 +31,8 @@ params { skip_gtdbtk = true gtdbtk_min_completeness = 0.01 skip_concoct = true + + // Generate downstream samplesheets + generate_downstream_samplesheets = true + generate_pipeline_samplesheets = null } diff --git a/docs/output.md b/docs/output.md index 4e43ffb6..91436480 100644 --- a/docs/output.md +++ b/docs/output.md @@ -26,6 +26,9 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d Note that when specifying the parameter `--coassemble_group`, for the corresponding output filenames/directories of the assembly or downsteam processes the group ID, or more precisely the term `group-[group_id]`, will be used instead of the sample ID. +The pipeline can also generate downstream pipeline input samplesheets. +These are stored in `/downstream_samplesheets`. + ## Quality control These steps trim away the adapter sequences present in input reads, trims away bad quality bases and sicard reads that are too short. @@ -720,6 +723,9 @@ Because of aDNA damage, _de novo_ assemblers sometimes struggle to call a correc +The pipeline can also generate input samplesheets for downstream pipelines. +These are stored in `/downstream_samplesheets`. + ### MultiQC
@@ -764,3 +770,24 @@ Summary tool-specific plots and tables of following tools are currently displaye
[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. + +### Downstream samplesheets + +The pipeline can also generate input files for the following downstream pipelines: + +- [nf-core/funcscan](https://nf-co.re/funcscan) +- [nf-core/taxprofiler](https://nf-co.re/taxprofiler) + +
+Output files + +- `downstream_samplesheets/` + - `taxprofiler.csv`: Partially filled out nf-core/taxprofiler `--input` csv with paths to preprocessed reads (adapter trimmed, host removed etc.) in `.fastq.gz` formats. I.e., the direct input into MEGAHIT, SPAdes, SPAdesHybrid. + - `funcscan.csv`: Filled out nf-core/funcscan `--input` csv with absolute paths to the assembled contig FASTA files produced by nf-core/mag (i.e., the direct output from MEGAHIT, SPAdes, SPAdesHybrid - not bins). + +
+ +:::warning +Any generated downstream samplesheet is provided as 'best effort' and are not guaranteed to work straight out of the box! +They may not be complete (e.g. some columns may need to be manually filled in). +::: diff --git a/nextflow.config b/nextflow.config index 026f67d8..8d3475dd 100644 --- a/nextflow.config +++ b/nextflow.config @@ -149,6 +149,10 @@ params { metaeuk_db = null save_mmseqs_db = false + // Generate downstream samplesheets + generate_downstream_samplesheets = false + generate_pipeline_samplesheets = null + // References //genome = null // we use --host_genome instead igenomes_base = 's3://ngi-igenomes/igenomes/' diff --git a/nextflow_schema.json b/nextflow_schema.json index ceb3ac08..b96a1b07 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -83,6 +83,26 @@ } } }, + "generate_samplesheet_options": { + "title": "Downstream pipeline samplesheet generation options", + "type": "object", + "fa_icon": "fas fa-align-justify", + "description": "Options for generating input samplesheets for complementary downstream pipelines.", + "properties": { + "generate_downstream_samplesheets": { + "type": "boolean", + "description": "Turn on generation of samplesheets for downstream pipelines.", + "fa_icon": "fas fa-toggle-on" + }, + "generate_pipeline_samplesheets": { + "type": "string", + "description": "Specify which pipeline to generate a samplesheet for.", + "help": "Note that the nf-core/funcscan samplesheet will only include paths to raw assemblies, not bins\n\nThe nf-core/taxprofiler samplesheet will include of paths the pre-processed reads that are used are used as input for _de novo_ assembly.", + "fa_icon": "fas fa-toolbox", + "pattern": "^(taxprofiler|funcscan)(?:,(taxprofiler|funcscan)){0,1}" + } + } + }, "institutional_config_options": { "title": "Institutional config options", "type": "object", @@ -891,6 +911,9 @@ }, { "$ref": "#/$defs/ancient_dna_assembly" + }, + { + "$ref": "#/$defs/generate_samplesheet_options" } ] } diff --git a/subworkflows/local/generate_downstream_samplesheets/main.nf b/subworkflows/local/generate_downstream_samplesheets/main.nf new file mode 100644 index 00000000..21439cd2 --- /dev/null +++ b/subworkflows/local/generate_downstream_samplesheets/main.nf @@ -0,0 +1,95 @@ +// +// Subworkflow with functionality specific to the nf-core/mag pipeline +// + +workflow SAMPLESHEET_TAXPROFILER { + take: + ch_reads + + main: + format = 'csv' + + def fastq_rel_path = '/' + if (params.bbnorm) { + fastq_rel_path = "/bbmap/bbnorm/" + } + else if (!params.keep_phix) { + fastq_rel_path = "/QC_shortreads/remove_phix/" + } + else if (params.host_fasta != false) { + fastq_rel_path = "/QC_shortreads/remove_host/" + } + else if (!params.skip_clipping && params.clip_tool == 'fastp') { + fastq_rel_path = "/QC_shortreads/fastp/" + } + else if (!params.skip_clipping && params.clip_tool == 'adapterremoval') { + fastq_rel_path = "/QC_shortreads/adapterremoval/" + } + + ch_list_for_samplesheet = ch_reads + .map { meta, fastq -> + def sample = meta.id + def run_accession = meta.id + def instrument_platform = "" + def fastq_1 = meta.single_end ? file(params.outdir).toString() + fastq_rel_path + meta.id + '/' + fastq.getName() : file(params.outdir).toString() + fastq_rel_path + meta.id + '/' + fastq[0].getName() + def fastq_2 = meta.single_end ? "" : file(params.outdir).toString() + fastq_rel_path + meta.id + '/' + fastq[1].getName() + def fasta = "" + [sample: sample, run_accession: run_accession, instrument_platform: instrument_platform, fastq_1: fastq_1, fastq_2: fastq_2, fasta: fasta] + } + .tap { ch_colnames } + + channelToSamplesheet(ch_list_for_samplesheet, "${params.outdir}/downstream_samplesheets/taxprofiler", format) +} + +workflow SAMPLESHEET_FUNCSCAN { + take: + ch_assemblies + + main: + format = 'csv' + + ch_list_for_samplesheet = ch_assemblies + .map { meta, filename -> + // funcscan requires + def sample = filename.extension ==~ 'gz' ? filename.baseName.take(filename.baseName.lastIndexOf('.')) : filename.baseName + def fasta = file(params.outdir).toString() + '/Assembly/' + meta.assembler + '/' + filename.getName() + [sample: sample, fasta: fasta] + } + .tap { ch_colnames } + + channelToSamplesheet(ch_list_for_samplesheet, "${params.outdir}/downstream_samplesheets/funcscan", format) +} + +workflow GENERATE_DOWNSTREAM_SAMPLESHEETS { + take: + ch_reads + ch_assemblies + + main: + def downstreampipeline_names = params.generate_pipeline_samplesheets.split(",") + + if (downstreampipeline_names.contains('taxprofiler')) { + SAMPLESHEET_TAXPROFILER(ch_reads) + } + + if (downstreampipeline_names.contains('funcscan')) { + SAMPLESHEET_FUNCSCAN(ch_assemblies) + } +} + +// Constructs the header string and then the strings of each row, and +def channelToSamplesheet(ch_list_for_samplesheet, path, format) { + def format_sep = [csv: ",", tsv: "\t", txt: "\t"][format] + + def ch_header = ch_list_for_samplesheet + + ch_header + .first() + .map { it.keySet().join(format_sep) } + .concat(ch_list_for_samplesheet.map { it.values().join(format_sep) }) + .collectFile( + name: "${path}.${format}", + newLine: true, + sort: false + ) +} diff --git a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf index 11f65460..452e9b3d 100644 --- a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf @@ -316,6 +316,40 @@ def validateInputParameters(hybrid) { if (params.save_mmseqs_db && !params.metaeuk_mmseqs_db) { error('[nf-core/mag] ERROR: Invalid parameter combination: --save_mmseqs_db supplied but no database has been requested for download with --metaeuk_mmseqs_db!') } + + if (params.generate_downstream_samplesheets) { + + if (!params.generate_pipeline_samplesheets) { + error('[nf-core/mag] If supplying `--generate_downstream_samplesheets`, you must also specify which pipeline to generate for with `--generate_pipeline_samplesheets! Check input.') + } + + if (params.generate_pipeline_samplesheets.split(',').contains('taxprofiler') && params.save_clipped_reads && (!params.bbnorm && params.keep_phix && !params.host_fasta && params.skip_clipping)) { + error('[nf-core/mag] Supplied --generate_downstream_samplesheets and --save_clipped_reads is true, but also need at lesat one of the following: --bbnorm, or --host_fasta , and/or either do not supply both --keep_phix or --skip_clipping') + } + + if (params.generate_pipeline_samplesheets.split(',').contains('taxprofiler') && params.bbnorm && !params.save_bbnorm_reads) { + error('[nf-core/mag] Supplied --generate_downstream_samplesheets but missing --save_bbnorm_reads (mandatory for reads.gz output when --bbnorm).') + } + else if (params.generate_pipeline_samplesheets.split(',').contains('taxprofiler') && !params.bbnorm && !params.keep_phix && !params.save_phixremoved_reads) { + error('[nf-core/mag] Supplied --generate_downstream_samplesheets but missing --save_phixremoved_reads (mandatory for reads.gz output when phix being removed [default behaviour]).') + } + else if (params.generate_pipeline_samplesheets.split(',').contains('taxprofiler') && !params.bbnorm && params.keep_phix && params.host_fasta && !params.save_hostremoved_reads) { + error('[nf-core/mag] Supplied --generate_downstream_samplesheets but missing --save_hostremoved_reads (mandatory for reads.gz output when host reads being removed).') + } + else if (params.generate_pipeline_samplesheets.split(',').contains('taxprofiler') && !params.bbnorm && params.keep_phix && !params.host_fasta && !params.skip_clipping && !params.save_clipped_reads) { + error('[nf-core/mag] Supplied --generate_downstream_samplesheets but missing --save_clipped_reads (mandatory for reads.gz output when running clipping).') + } + } + + // Validate generate samplesheet inputs + + + + + + + + } // diff --git a/workflows/mag.nf b/workflows/mag.nf index 7afb4316..19b5deec 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -3,6 +3,7 @@ IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ + include { MULTIQC } from '../modules/nf-core/multiqc/main' include { paramsSummaryMap } from 'plugin/nf-schema' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' @@ -12,6 +13,7 @@ include { methodsDescriptionText } from '../subwo // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // + include { BINNING_PREPARATION } from '../subworkflows/local/binning_preparation' include { BINNING } from '../subworkflows/local/binning' include { BINNING_REFINEMENT } from '../subworkflows/local/binning_refinement' @@ -24,38 +26,40 @@ include { ANCIENT_DNA_ASSEMBLY_VALIDATION } from '../subwo include { DOMAIN_CLASSIFICATION } from '../subworkflows/local/domain_classification' include { DEPTHS } from '../subworkflows/local/depths' include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing' +include { GENERATE_DOWNSTREAM_SAMPLESHEETS } from '../subworkflows/local/generate_downstream_samplesheets/main.nf' // // MODULE: Installed directly from nf-core/modules // -include { ARIA2 as ARIA2_UNTAR } from '../modules/nf-core/aria2/main' -include { FASTQC as FASTQC_RAW } from '../modules/nf-core/fastqc/main' -include { FASTQC as FASTQC_TRIMMED } from '../modules/nf-core/fastqc/main' -include { SEQTK_MERGEPE } from '../modules/nf-core/seqtk/mergepe/main' -include { BBMAP_BBNORM } from '../modules/nf-core/bbmap/bbnorm/main' -include { FASTP } from '../modules/nf-core/fastp/main' -include { ADAPTERREMOVAL as ADAPTERREMOVAL_PE } from '../modules/nf-core/adapterremoval/main' -include { ADAPTERREMOVAL as ADAPTERREMOVAL_SE } from '../modules/nf-core/adapterremoval/main' -include { UNTAR as CENTRIFUGEDB_UNTAR } from '../modules/nf-core/untar/main' -include { CENTRIFUGE_CENTRIFUGE } from '../modules/nf-core/centrifuge/centrifuge/main' -include { CENTRIFUGE_KREPORT } from '../modules/nf-core/centrifuge/kreport/main' -include { KRONA_KRONADB } from '../modules/nf-core/krona/kronadb/main' -include { KRONA_KTIMPORTTAXONOMY } from '../modules/nf-core/krona/ktimporttaxonomy/main' +include { ARIA2 as ARIA2_UNTAR } from '../modules/nf-core/aria2/main' +include { FASTQC as FASTQC_RAW } from '../modules/nf-core/fastqc/main' +include { FASTQC as FASTQC_TRIMMED } from '../modules/nf-core/fastqc/main' +include { SEQTK_MERGEPE } from '../modules/nf-core/seqtk/mergepe/main' +include { BBMAP_BBNORM } from '../modules/nf-core/bbmap/bbnorm/main' +include { FASTP } from '../modules/nf-core/fastp/main' +include { ADAPTERREMOVAL as ADAPTERREMOVAL_PE } from '../modules/nf-core/adapterremoval/main' +include { ADAPTERREMOVAL as ADAPTERREMOVAL_SE } from '../modules/nf-core/adapterremoval/main' +include { UNTAR as CENTRIFUGEDB_UNTAR } from '../modules/nf-core/untar/main' +include { CENTRIFUGE_CENTRIFUGE } from '../modules/nf-core/centrifuge/centrifuge/main' +include { CENTRIFUGE_KREPORT } from '../modules/nf-core/centrifuge/kreport/main' +include { KRONA_KRONADB } from '../modules/nf-core/krona/kronadb/main' +include { KRONA_KTIMPORTTAXONOMY } from '../modules/nf-core/krona/ktimporttaxonomy/main' include { KRAKENTOOLS_KREPORT2KRONA as KREPORT2KRONA_CENTRIFUGE } from '../modules/nf-core/krakentools/kreport2krona/main' -include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' -include { MEGAHIT } from '../modules/nf-core/megahit/main' -include { SPADES as METASPADES } from '../modules/nf-core/spades/main' -include { SPADES as METASPADESHYBRID } from '../modules/nf-core/spades/main' -include { GUNZIP as GUNZIP_ASSEMBLIES } from '../modules/nf-core/gunzip' -include { GUNZIP as GUNZIP_ASSEMBLYINPUT } from '../modules/nf-core/gunzip' -include { PRODIGAL } from '../modules/nf-core/prodigal/main' -include { PROKKA } from '../modules/nf-core/prokka/main' -include { MMSEQS_DATABASES } from '../modules/nf-core/mmseqs/databases/main' -include { METAEUK_EASYPREDICT } from '../modules/nf-core/metaeuk/easypredict/main' +include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' +include { MEGAHIT } from '../modules/nf-core/megahit/main' +include { SPADES as METASPADES } from '../modules/nf-core/spades/main' +include { SPADES as METASPADESHYBRID } from '../modules/nf-core/spades/main' +include { GUNZIP as GUNZIP_ASSEMBLIES } from '../modules/nf-core/gunzip' +include { GUNZIP as GUNZIP_ASSEMBLYINPUT } from '../modules/nf-core/gunzip' +include { PRODIGAL } from '../modules/nf-core/prodigal/main' +include { PROKKA } from '../modules/nf-core/prokka/main' +include { MMSEQS_DATABASES } from '../modules/nf-core/mmseqs/databases/main' +include { METAEUK_EASYPREDICT } from '../modules/nf-core/metaeuk/easypredict/main' // // MODULE: Local to the pipeline // + include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_HOST_REMOVAL_BUILD } from '../modules/local/bowtie2_removal_build' include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_HOST_REMOVAL_ALIGN } from '../modules/local/bowtie2_removal_align' include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_PHIX_REMOVAL_BUILD } from '../modules/local/bowtie2_removal_build' @@ -539,9 +543,11 @@ workflow MAG { ch_long_reads_spades = Channel.empty() } + // Assembly - ch_assembled_contigs = Channel.empty() + ch_assembled_contigs = Channel.empty() + ch_assembled_contigs_gz = Channel.empty() if (!params.single_end && !params.skip_spades) { METASPADES(ch_short_reads_spades.map { meta, reads -> [meta, reads, [], []] }, [], []) @@ -549,7 +555,10 @@ workflow MAG { def meta_new = meta + [assembler: 'SPAdes'] [meta_new, assembly] } + .tap { ch_spades_assemblies_gz } + ch_assembled_contigs = ch_assembled_contigs.mix(ch_spades_assemblies) + ch_assembled_contigs_gz = ch_assembled_contigs_gz.mix(ch_spades_assemblies_gz) ch_versions = ch_versions.mix(METASPADES.out.versions.first()) } @@ -566,7 +575,10 @@ workflow MAG { def meta_new = meta + [assembler: "SPAdesHybrid"] [meta_new, assembly] } + .tap { ch_spadeshybrid_assemblies_gz } + ch_assembled_contigs = ch_assembled_contigs.mix(ch_spadeshybrid_assemblies) + ch_assembled_contigs_gz = ch_assembled_contigs_gz.mix(ch_spadeshybrid_assemblies_gz) ch_versions = ch_versions.mix(METASPADESHYBRID.out.versions.first()) } @@ -576,12 +588,13 @@ workflow MAG { def meta_new = meta + [assembler: 'MEGAHIT'] [meta_new, assembly] } + .tap { ch_megahit_assemblies_gz } ch_assembled_contigs = ch_assembled_contigs.mix(ch_megahit_assemblies) + ch_assembled_contigs_gz = ch_assembled_contigs_gz.mix(ch_megahit_assemblies_gz) ch_versions = ch_versions.mix(MEGAHIT.out.versions.first()) } - GUNZIP_ASSEMBLIES(ch_assembled_contigs) ch_versions = ch_versions.mix(GUNZIP_ASSEMBLIES.out.versions) @@ -791,6 +804,7 @@ workflow MAG { ch_busco_summary = BUSCO_QC.out.summary ch_versions = ch_versions.mix(BUSCO_QC.out.versions.first()) // process information if BUSCO analysis failed for individual bins due to no matching genes + BUSCO_QC.out.failed_bin.splitCsv(sep: '\t').map { bin, error -> if (!bin.contains(".unbinned.")) { busco_failed_bins[bin] = error @@ -959,6 +973,13 @@ workflow MAG { } } + // + // Samplesheet generation + // + if (params.generate_downstream_samplesheets) { + GENERATE_DOWNSTREAM_SAMPLESHEETS(ch_short_reads_assembly, ch_assembled_contigs_gz) + } + // // Collate and save software versions //