Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for generating taxprofiler/funcscan input samplesheets for preprocessed FASTQs/FASTAs #688

Draft
wants to merge 19 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 45 additions & 45 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@ process {
publishDir = [path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: false]

withName: FASTQC_RAW {
ext.args = '--quiet'
ext.args = '--quiet'
publishDir = [path: { "${params.outdir}/QC_shortreads/fastqc" }, mode: params.publish_dir_mode, pattern: "*.html"]
ext.prefix = { "${meta.id}_run${meta.run}_raw" }
tag = { "${meta.id}_run${meta.run}_raw" }
tag = { "${meta.id}_run${meta.run}_raw" }
}

withName: FASTP {
ext.args = [
ext.args = [
"-q ${params.fastp_qualified_quality}",
"--cut_front",
"--cut_tail",
Expand All @@ -44,11 +44,11 @@ process {
]
]
ext.prefix = { "${meta.id}_run${meta.run}_fastp" }
tag = { "${meta.id}_run${meta.run}" }
tag = { "${meta.id}_run${meta.run}" }
}

withName: ADAPTERREMOVAL_PE {
ext.args = [
ext.args = [
"--minlength ${params.reads_minlength}",
"--adapter1 ${params.adapterremoval_adapter1} --adapter2 ${params.adapterremoval_adapter2}",
"--minquality ${params.adapterremoval_minquality} --trimns",
Expand All @@ -68,11 +68,11 @@ process {
]
]
ext.prefix = { "${meta.id}_run${meta.run}_ar2" }
tag = { "${meta.id}_run${meta.run}" }
tag = { "${meta.id}_run${meta.run}" }
}

withName: ADAPTERREMOVAL_SE {
ext.args = [
ext.args = [
"--minlength ${params.reads_minlength}",
"--adapter1 ${params.adapterremoval_adapter1}",
"--minquality ${params.adapterremoval_minquality} --trimns",
Expand All @@ -84,72 +84,72 @@ process {
pattern: "*.{settings}"
]
ext.prefix = { "${meta.id}_run${meta.run}_ar2" }
tag = { "${meta.id}_run${meta.run}" }
tag = { "${meta.id}_run${meta.run}" }
}

withName: BOWTIE2_PHIX_REMOVAL_ALIGN {
ext.prefix = { "${meta.id}_run${meta.run}_phix_removed" }
publishDir = [
[
path: { "${params.outdir}/QC_shortreads/remove_phix" },
path: { "${params.outdir}/QC_shortreads/remove_phix/${meta.id}/" },
mode: params.publish_dir_mode,
pattern: "*.log"
],
[
path: { "${params.outdir}/QC_shortreads/remove_phix" },
path: { "${params.outdir}/QC_shortreads/remove_phix/${meta.id}/" },
mode: params.publish_dir_mode,
pattern: "*.unmapped*.fastq.gz",
enabled: params.save_phixremoved_reads
]
]
tag = { "${meta.id}_run${meta.run}" }
tag = { "${meta.id}_run${meta.run}" }
}

withName: BOWTIE2_HOST_REMOVAL_ALIGN {
ext.args = params.host_removal_verysensitive ? "--very-sensitive" : "--sensitive"
ext.args2 = params.host_removal_save_ids ? "--host_removal_save_ids" : ''
ext.args = params.host_removal_verysensitive ? "--very-sensitive" : "--sensitive"
ext.args2 = params.host_removal_save_ids ? "--host_removal_save_ids" : ''
ext.prefix = { "${meta.id}_run${meta.run}_host_removed" }
publishDir = [
[
path: { "${params.outdir}/QC_shortreads/remove_host" },
path: { "${params.outdir}/QC_shortreads/remove_host/${meta.id}/" },
mode: params.publish_dir_mode,
pattern: "*{.log,read_ids.txt}"
],
[
path: { "${params.outdir}/QC_shortreads/remove_host" },
path: { "${params.outdir}/QC_shortreads/remove_host/${meta.id}/" },
mode: params.publish_dir_mode,
pattern: "*.unmapped*.fastq.gz",
enabled: params.save_hostremoved_reads
]
]
tag = { "${meta.id}_run${meta.run}" }
tag = { "${meta.id}_run${meta.run}" }
}

withName: FASTQC_TRIMMED {
ext.args = '--quiet'
ext.args = '--quiet'
ext.prefix = { "${meta.id}_run${meta.run}_trimmed" }
publishDir = [
path: { "${params.outdir}/QC_shortreads/fastqc" },
mode: params.publish_dir_mode,
pattern: "*.html"
]
tag = { "${meta.id}_run${meta.run}" }
tag = { "${meta.id}_run${meta.run}" }
}

withName: BBMAP_BBNORM {
ext.args = [
ext.args = [
params.bbnorm_target ? "target=${params.bbnorm_target}" : '',
params.bbnorm_min ? "min=${params.bbnorm_min}" : ''
].join(' ').trim()
publishDir = [
[
path: { "${params.outdir}/bbmap/bbnorm/logs" },
path: { "${params.outdir}/bbmap/bbnorm/${meta.id}/" },
enabled: params.save_bbnorm_reads,
mode: params.publish_dir_mode,
pattern: "*.log"
],
[
path: { "${params.outdir}/bbmap/bbnorm/" },
path: { "${params.outdir}/bbmap/bbnorm/${meta.id}/" },
mode: 'copy',
enabled: params.save_bbnorm_reads,
mode: params.publish_dir_mode,
Expand Down Expand Up @@ -179,7 +179,7 @@ process {
}

withName: FILTLONG {
ext.args = [
ext.args = [
"--min_length ${params.longreads_min_length}",
"--keep_percent ${params.longreads_keep_percent}",
"--trim",
Expand All @@ -201,7 +201,7 @@ process {

withName: NANOPLOT_RAW {
ext.prefix = 'raw'
ext.args = {
ext.args = {
[
"-p raw_",
"--title ${meta.id}_raw",
Expand All @@ -216,7 +216,7 @@ process {
}

withName: NANOPLOT_FILTERED {
ext.args = {
ext.args = {
[
"-p filtered_",
"--title ${meta.id}_filtered",
Expand All @@ -240,7 +240,7 @@ process {
}

withName: KRAKEN2 {
ext.args = '--quiet'
ext.args = '--quiet'
publishDir = [
path: { "${params.outdir}/Taxonomy/kraken2/${meta.id}" },
mode: params.publish_dir_mode,
Expand All @@ -257,7 +257,7 @@ process {
}

withName: MEGAHIT {
ext.args = { params.megahit_options ? params.megahit_options + "-m ${task.memory.toBytes()}" : "-m ${task.memory.toBytes()}" }
ext.args = { params.megahit_options ? params.megahit_options + "-m ${task.memory.toBytes()}" : "-m ${task.memory.toBytes()}" }
ext.prefix = { "MEGAHIT-${meta.id}" }
publishDir = [path: { "${params.outdir}/Assembly/MEGAHIT" }, mode: params.publish_dir_mode, pattern: "*.{fa.gz,log}"]
}
Expand All @@ -279,7 +279,7 @@ process {
}

withName: GENOMAD_ENDTOEND {
ext.args = [
ext.args = [
"--cleanup",
"--min-score ${params.genomad_min_score}",
"--splits ${params.genomad_splits}"
Expand All @@ -292,7 +292,7 @@ process {
}

withName: BOWTIE2_ASSEMBLY_ALIGN {
ext.args = params.bowtie2_mode ? params.bowtie2_mode : params.ancient_dna ? '--very-sensitive -N 1' : ''
ext.args = params.bowtie2_mode ? params.bowtie2_mode : params.ancient_dna ? '--very-sensitive -N 1' : ''
ext.prefix = { "${meta.id}.assembly" }
publishDir = [
[
Expand Down Expand Up @@ -326,7 +326,7 @@ process {
}

withName: BUSCO {
ext.args = [
ext.args = [
params.busco_db ? '--offline' : ''
].join(' ').trim()
publishDir = [
Expand All @@ -349,14 +349,14 @@ process {
}

withName: CHECKM_LINEAGEWF {
tag = { "${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}" }
tag = { "${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}" }
ext.prefix = { "${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}_wf" }
publishDir = [path: { "${params.outdir}/GenomeBinning/QC/CheckM" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}

withName: CHECKM_QA {
ext.prefix = { "${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}_qa" }
ext.args = "-o 2 --tab_table"
ext.args = "-o 2 --tab_table"
publishDir = [
path: { "${params.outdir}/GenomeBinning/QC/CheckM" },
mode: params.publish_dir_mode,
Expand Down Expand Up @@ -409,7 +409,7 @@ process {
}

withName: GTDBTK_CLASSIFYWF {
ext.args = [
ext.args = [
"--extension fa",
"--min_perc_aa ${params.gtdbtk_min_perc_aa}",
"--min_af ${params.gtdbtk_min_af}",
Expand All @@ -424,30 +424,30 @@ process {
}

withName: GTDBTK_SUMMARY {
ext.args = "--extension fa"
ext.args = "--extension fa"
publishDir = [path: { "${params.outdir}/Taxonomy/GTDB-Tk" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}

withName: PROKKA {
ext.args = "--metagenome"
ext.args = "--metagenome"
publishDir = [path: { "${params.outdir}/Annotation/Prokka/${meta.assembler}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}

withName: PRODIGAL {
ext.args = "-p meta"
ext.args = "-p meta"
ext.prefix = { "${meta.assembler}-${meta.id}_prodigal" }
publishDir = [path: { "${params.outdir}/Annotation/Prodigal/${meta.assembler}/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}

withName: FREEBAYES {
ext.prefix = { "${meta.assembler}-${meta.id}" }
ext.args = "-p ${params.freebayes_ploidy} -q ${params.freebayes_min_basequality} -F ${params.freebayes_minallelefreq}"
ext.args = "-p ${params.freebayes_ploidy} -q ${params.freebayes_min_basequality} -F ${params.freebayes_minallelefreq}"
publishDir = [path: { "${params.outdir}/Ancient_DNA/variant_calling/freebayes" }, mode: params.publish_dir_mode, pattern: "*.vcf.gz"]
}

withName: BCFTOOLS_VIEW {
ext.prefix = { "${meta.assembler}-${meta.id}.filtered" }
ext.args = "-v snps,mnps -i 'QUAL>=${params.bcftools_view_high_variant_quality} || (QUAL>=${params.bcftools_view_medium_variant_quality} && FORMAT/AO>=${params.bcftools_view_minimal_allelesupport})'"
ext.args = "-v snps,mnps -i 'QUAL>=${params.bcftools_view_high_variant_quality} || (QUAL>=${params.bcftools_view_medium_variant_quality} && FORMAT/AO>=${params.bcftools_view_minimal_allelesupport})'"
publishDir = [path: { "${params.outdir}/Ancient_DNA/variant_calling/filtered" }, mode: params.publish_dir_mode, pattern: "*.vcf.gz"]
}

Expand All @@ -462,7 +462,7 @@ process {

withName: BCFTOOLS_INDEX {
ext.prefix = { "${meta.assembler}-${meta.id}" }
ext.args = "-t"
ext.args = "-t"
publishDir = [
path: { "${params.outdir}/Ancient_DNA/variant_calling/index" },
mode: params.publish_dir_mode,
Expand All @@ -480,7 +480,7 @@ process {

withName: PYDAMAGE_FILTER {
ext.prefix = { "${meta.assembler}-${meta.id}" }
ext.args = "-t ${params.pydamage_accuracy}"
ext.args = "-t ${params.pydamage_accuracy}"
publishDir = [
path: { "${params.outdir}/Ancient_DNA/pydamage/filter/${meta.assembler}-${meta.id}/" },
mode: params.publish_dir_mode
Expand All @@ -504,7 +504,7 @@ process {
withName: METABAT2_METABAT2 {
publishDir = [[path: { "${params.outdir}/GenomeBinning/MetaBAT2/bins/" }, mode: params.publish_dir_mode, pattern: '*[!lowDepth|tooShort|unbinned].fa.gz'], [path: { "${params.outdir}/GenomeBinning/MetaBAT2/discarded" }, mode: params.publish_dir_mode, pattern: '*tooShort.fa.gz'], [path: { "${params.outdir}/GenomeBinning/MetaBAT2/discarded" }, mode: params.publish_dir_mode, pattern: '*lowDepth.fa.gz']]
ext.prefix = { "${meta.assembler}-MetaBAT2-${meta.id}" }
ext.args = [
ext.args = [
params.min_contig_size < 1500 ? "-m 1500" : "-m ${params.min_contig_size}",
"--unbinned",
"--seed ${params.metabat_rng_seed}"
Expand Down Expand Up @@ -583,7 +583,7 @@ process {
]
]
ext.prefix = { "${meta.assembler}-DASTool-${meta.id}" }
ext.args = "--write_bins --write_unbinned --write_bin_evals --score_threshold ${params.refine_bins_dastool_threshold}"
ext.args = "--write_bins --write_unbinned --write_bin_evals --score_threshold ${params.refine_bins_dastool_threshold}"
}

withName: RENAME_POSTDASTOOL {
Expand All @@ -607,12 +607,12 @@ process {
mode: params.publish_dir_mode,
pattern: "*.txt"
]
ext.args = { "--min_len ${params.tiara_min_length} --probabilities" }
ext.args = { "--min_len ${params.tiara_min_length} --probabilities" }
ext.prefix = { "${meta.assembler}-${meta.id}.tiara" }
}

withName: TIARA_CLASSIFY {
ext.args = { "--join_prokaryotes --assembler ${meta.assembler}" }
ext.args = { "--join_prokaryotes --assembler ${meta.assembler}" }
ext.prefix = { "${meta.assembler}-${meta.binner}-${meta.bin}-${meta.id}" }
}

Expand All @@ -627,13 +627,13 @@ process {
}

withName: METAEUK_EASYPREDICT {
ext.args = ""
ext.args = ""
ext.prefix = { "${meta.id}" }
publishDir = [path: { "${params.outdir}/Annotation/MetaEuk/${meta.assembler}/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}

withName: MULTIQC {
ext.args = { params.multiqc_title ? "--title \"${params.multiqc_title}\"" : '' }
ext.args = { params.multiqc_title ? "--title \"${params.multiqc_title}\"" : '' }
publishDir = [
path: { "${params.outdir}/multiqc" },
mode: params.publish_dir_mode,
Expand Down
4 changes: 4 additions & 0 deletions conf/test_hybrid.config
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,8 @@ params {
skip_gtdbtk = true
gtdbtk_min_completeness = 0.01
skip_concoct = true

// Generate downstream samplesheets
generate_downstream_samplesheets = true
generate_pipeline_samplesheets = null
}
27 changes: 27 additions & 0 deletions docs/output.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d

Note that when specifying the parameter `--coassemble_group`, for the corresponding output filenames/directories of the assembly or downsteam processes the group ID, or more precisely the term `group-[group_id]`, will be used instead of the sample ID.

The pipeline can also generate downstream pipeline input samplesheets.
These are stored in `<outdir>/downstream_samplesheets`.

## Quality control

These steps trim away the adapter sequences present in input reads, trims away bad quality bases and sicard reads that are too short.
Expand Down Expand Up @@ -720,6 +723,9 @@ Because of aDNA damage, _de novo_ assemblers sometimes struggle to call a correc

</details>

The pipeline can also generate input samplesheets for downstream pipelines.
These are stored in `<outdir>/downstream_samplesheets`.

### MultiQC

<details markdown="1">
Expand Down Expand Up @@ -764,3 +770,24 @@ Summary tool-specific plots and tables of following tools are currently displaye
</details>

[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage.

### Downstream samplesheets

The pipeline can also generate input files for the following downstream pipelines:

- [nf-core/funcscan](https://nf-co.re/funcscan)
- [nf-core/taxprofiler](https://nf-co.re/taxprofiler)

<details markdown="1">
<summary>Output files</summary>

- `downstream_samplesheets/`
- `taxprofiler.csv`: Partially filled out nf-core/taxprofiler `--input` csv with paths to preprocessed reads (adapter trimmed, host removed etc.) in `.fastq.gz` formats. I.e., the direct input into MEGAHIT, SPAdes, SPAdesHybrid.
- `funcscan.csv`: Filled out nf-core/funcscan `--input` csv with absolute paths to the assembled contig FASTA files produced by nf-core/mag (i.e., the direct output from MEGAHIT, SPAdes, SPAdesHybrid - not bins).

</details>

:::warning
Any generated downstream samplesheet is provided as 'best effort' and are not guaranteed to work straight out of the box!
They may not be complete (e.g. some columns may need to be manually filled in).
:::
4 changes: 4 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,10 @@ params {
metaeuk_db = null
save_mmseqs_db = false

// Generate downstream samplesheets
generate_downstream_samplesheets = false
generate_pipeline_samplesheets = null

// References
//genome = null // we use --host_genome instead
igenomes_base = 's3://ngi-igenomes/igenomes/'
Expand Down
Loading
Loading