diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 47df952f..76760152 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -116,4 +116,3 @@ To get started: Devcontainer specs: - [DevContainer config](.devcontainer/devcontainer.json) -- [Dockerfile](.devcontainer/Dockerfile) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index c6865b29..1c725314 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -42,7 +42,7 @@ body: attributes: label: System information description: | - * Nextflow version _(eg. 22.10.1)_ + * Nextflow version _(eg. 23.04.0)_ * Hardware _(eg. HPC, Desktop, Cloud)_ * Executor _(eg. slurm, local, awsbatch)_ * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index d777aefd..929ad504 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -14,7 +14,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Launch workflow via tower - uses: seqeralabs/action-tower-launch@v1 + uses: seqeralabs/action-tower-launch@v2 # TODO nf-core: You can customise AWS full pipeline tests as required # Add full size test data (but still relatively small datasets for few samples) # on the `test_full.config` test runs with only one set of parameters @@ -22,13 +22,18 @@ jobs: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/raredisease/work-${{ github.sha }} parameters: | { + "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/raredisease/results-${{ github.sha }}" } - profiles: test_full,aws_tower + profiles: test_full + - uses: actions/upload-artifact@v3 with: name: Tower debug log file - path: tower_action_*.log + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 61ea561e..9a6e057a 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -12,18 +12,22 @@ jobs: steps: # Launch workflow using Tower CLI tool action - name: Launch workflow via tower - uses: seqeralabs/action-tower-launch@v1 + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/raredisease/work-${{ github.sha }} parameters: | { "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/raredisease/results-test-${{ github.sha }}" } - profiles: test,aws_tower + profiles: test + - uses: actions/upload-artifact@v3 with: name: Tower debug log file - path: tower_action_*.log + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f7883268..8f7a9eae 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,7 +24,7 @@ jobs: strategy: matrix: NXF_VER: - - "22.10.1" + - "23.04.0" - "latest-everything" parameters: - "-profile test,docker" diff --git a/.gitpod.yml b/.gitpod.yml index 85d95ecc..25488dcc 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -1,4 +1,9 @@ image: nfcore/gitpod:latest +tasks: + - name: Update Nextflow and setup pre-commit + command: | + pre-commit install --install-hooks + nextflow self-update vscode: extensions: # based on nf-core.nf-core-extensionpack diff --git a/CHANGELOG.md b/CHANGELOG.md index eabbc201..9c92d787 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,24 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.0.0 - [2023-06-01] +## v1.1.0 - Abu [2023-07-21] + +### `Added` + +- Add GATK's cnv calling pipeline [#362](https://github.com/nf-core/raredisease/pull/362) +- GATK's ShiftFasta to generate all the files required for mitochondrial analysis [#354](https://github.com/nf-core/raredisease/pull/354) +- Feature to calculate CADD scores for indels [#325](https://github.com/nf-core/raredisease/pull/325) +- HmtNote to annotate mitochondria [#355](https://github.com/nf-core/raredisease/pull/355) +- MT del script to detect mitochondrial deletions [#349](https://github.com/nf-core/raredisease/pull/349) +- eKLIPse to identify large mitochondrial deletions [#365](https://github.com/nf-core/raredisease/pull/365) +- UPD+Chromograph to identify and visualize UPD sites and regions in the chromosomes [#364](https://github.com/nf-core/raredisease/pull/364) and [#366](https://github.com/nf-core/raredisease/pull/366) +- Added check for presence of case id for each sample in samplesheet [#357](https://github.com/nf-core/raredisease/pull/357) + +### Fixed + +- Avoiding publishing uncompressed VCF-file from `HMTNOTE_ANNOTATE`. (The corresponding compressed VCF-file still gets published.) [#368](https://github.com/nf-core/raredisease/pull/368) + +## v1.0.0 - Aladdin [2023-06-01] Initial release of nf-core/raredisease, created with the [nf-core](https://nf-co.re/) template. diff --git a/CITATIONS.md b/CITATIONS.md index 80e78ee8..c6074138 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -10,7 +10,7 @@ ## Pipeline tools -- [BCFtools](https://academic.oup.com/gigascience/article/10/2/giab008/6137722) +- [BCFtools](https://academic.oup.com/gigascience/article/10/2/giab008/6137722) & [SAMtools](https://academic.oup.com/bioinformatics/article/25/16/2078/204688) > Danecek P, Bonfield JK, Liddle J, et al. Twelve years of SAMtools and BCFtools. GigaScience. 2021;10(2):giab008. doi:10.1093/gigascience/giab008 @@ -22,10 +22,20 @@ > Vasimuddin Md, Misra S, Li H, Aluru S. Efficient Architecture-Aware Acceleration of BWA-MEM for Multicore Systems. In: 2019 IEEE International Parallel and Distributed Processing Symposium (IPDPS). IEEE; 2019:314-324. doi:10.1109/IPDPS.2019.00041 +- [CADD1](https://genomemedicine.biomedcentral.com/articles/10.1186/s13073-021-00835-9), [2](https://academic.oup.com/nar/article/47/D1/D886/5146191) + + > Rentzsch P, Schubach M, Shendure J, Kircher M. CADD-Splice—improving genome-wide variant effect prediction using deep learning-derived splice scores. Genome Med. 2021;13(1):31. doi:10.1186/s13073-021-00835-9 + + > Rentzsch P, Witten D, Cooper GM, Shendure J, Kircher M. CADD: predicting the deleteriousness of variants throughout the human genome. Nucleic Acids Research. 2019;47(D1):D886-D894. doi:10.1093/nar/gky1016 + - [DeepVariant](https://www.nature.com/articles/nbt.4235) > Poplin R, Chang PC, Alexander D, et al. A universal SNP and small-indel variant caller using deep neural networks. Nat Biotechnol. 2018;36(10):983-987. doi:10.1038/nbt.4235 +- [eKLIPse](https://www.nature.com/articles/s41436-018-0350-8) + + > Goudenège D, Bris C, Hoffmann V, et al. eKLIPse: a sensitive tool for the detection and quantification of mitochondrial DNA deletions from next-generation sequencing data. Genet Med 21, 1407–1416 (2019). doi:10.1038/s41436-018-0350-8 + - [Ensembl VEP](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-016-0974-4) > McLaren W, Gil L, Hunt SE, et al. The Ensembl Variant Effect Predictor. Genome Biol. 2016;17(1):122. doi:10.1186/s13059-016-0974-4 @@ -36,6 +46,8 @@ - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) + > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. Available online https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. + - [GATK](https://genome.cshlp.org/content/20/9/1297) > McKenna A, Hanna M, Banks E, et al. The Genome Analysis Toolkit: A MapReduce framework for analyzing next-generation DNA sequencing data. Genome Res. 2010;20(9):1297-1303. doi:10.1101/gr.107524.110 @@ -68,9 +80,9 @@ > Pedersen BS, Quinlan AR. Mosdepth: quick coverage calculation for genomes and exomes. Hancock J, ed. Bioinformatics. 2018;34(5):867-868. doi:10.1093/bioinformatics/btx699 -- [MultiQC](https://academic.oup.com/bioinformatics/article/32/19/3047/2196507) +- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) - > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016;32(19):3047-3048. doi:10.1093/bioinformatics/btw354 + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. - [Peddy]() @@ -84,10 +96,6 @@ - [rhocall](https://github.com/dnil/rhocall) -- [SAMtools](https://academic.oup.com/bioinformatics/article/25/16/2078/204688) - - > Li H, Handsaker B, Wysoker A, et al. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009;25(16):2078-2079. doi:10.1093/bioinformatics/btp352 - - [Sentieon DNAscope](https://www.biorxiv.org/content/10.1101/2022.05.20.492556v1.abstract) > Freed D, Pan R, Chen H, Li Z, Hu J, Aldana R. DNAscope: High Accuracy Small Variant Calling Using Machine Learning. Bioinformatics; 2022. doi:10.1101/2022.05.20.492556 @@ -140,5 +148,8 @@ - [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + - [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/README.md b/README.md index 99fd603f..672230e6 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/raredisease/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7995798-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7995798) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.1-23aa62.svg)](https://www.nextflow.io/) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) @@ -54,12 +54,17 @@ On release, automated continuous integration tests run the pipeline on a full-si - [Manta](https://github.com/Illumina/manta) - [TIDDIT's sv](https://github.com/SciLifeLab/TIDDIT) +- Copy number variant calling: + - [GATK GermlineCNVCaller](https://github.com/broadinstitute/gatk) **5. Annotation - SNV:** - [bcftools roh](https://samtools.github.io/bcftools/bcftools.html#roh) - [vcfanno](https://github.com/brentp/vcfanno) +- [CADD](https://cadd.gs.washington.edu/) - [VEP](https://www.ensembl.org/info/docs/tools/vep/index.html) +- [UPD](https://github.com/bjhall/upd) +- [Chromograph](https://github.com/Clinical-Genomics/chromograph) **6. Annotation - SV:** @@ -69,9 +74,12 @@ On release, automated continuous integration tests run the pipeline on a full-si **7. Mitochondrial analysis:** - [Alignment and variant calling - GATK Mitochondrial short variant discovery pipeline ](https://gatk.broadinstitute.org/hc/en-us/articles/4403870837275-Mitochondrial-short-variant-discovery-SNVs-Indels-) +- [eKLIPse](https://github.com/dooguypapua/eKLIPse/tree/master) - Annotation: - [HaploGrep2](https://github.com/seppinho/haplogrep-cmd) + - [Hmtnote](https://github.com/robertopreste/HmtNote) - [vcfanno](https://github.com/brentp/vcfanno) + - [CADD](https://cadd.gs.washington.edu/) - [VEP](https://www.ensembl.org/info/docs/tools/vep/index.html) **8. Variant calling - repeat expansions:** @@ -124,7 +132,7 @@ nextflow run nf-core/raredisease \ > provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; > see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). -For more details, please refer to the [usage documentation](https://nf-co.re/raredisease/usage) and the [parameter documentation](https://nf-co.re/raredisease/parameters). +For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/raredisease/usage) and the [parameter documentation](https://nf-co.re/raredisease/parameters). ## Pipeline output @@ -135,7 +143,7 @@ For more details about the output files and reports, please refer to the nf-core/raredisease was written in a collaboration between the Clinical Genomics nodes in Sweden, with major contributions from [Ramprasad Neethiraj](https://github.com/ramprasadn), [Anders Jemt](https://github.com/jemten), [Lucia Pena Perez](https://github.com/Lucpen), and [Mei Wu](https://github.com/projectoriented) at Clinical Genomics Stockholm. -Additional contributors were [Sima Rahimi](https://github.com/sima-r), [Gwenna Breton](https://github.com/Gwennid) and [Emma Västerviga](https://github.com/EmmaCAndersson) (Clinical Genomics Gothenburg); [Lauri Mesilaakso](https://github.com/ljmesi) (Clinical Genomics Linköping); [Subazini Thankaswamy Kosalai](https://github.com/sysbiocoder) (Clinical Genomics Örebro); [Annick Renevey](https://github.com/rannick) and [Peter Pruisscher](https://github.com/peterpru) (Clinical Genomics Stockholm); [Ryan Kennedy](https://github.com/ryanjameskennedy) (Clinical Genomics Lund); and [Lucas Taniguti](https://github.com/lmtani). +Additional contributors were [Sima Rahimi](https://github.com/sima-r), [Gwenna Breton](https://github.com/Gwennid) and [Emma Västerviga](https://github.com/EmmaCAndersson) (Clinical Genomics Gothenburg); [Halfdan Rydbeck](https://github.com/hrydbeck) and [Lauri Mesilaakso](https://github.com/ljmesi) (Clinical Genomics Linköping); [Subazini Thankaswamy Kosalai](https://github.com/sysbiocoder) (Clinical Genomics Örebro); [Annick Renevey](https://github.com/rannick) and [Peter Pruisscher](https://github.com/peterpru) (Clinical Genomics Stockholm); [Ryan Kennedy](https://github.com/ryanjameskennedy) (Clinical Genomics Lund); [Anders Sune Pedersen](https://github.com/asp8200) (Danish National Genome Center) and [Lucas Taniguti](https://github.com/lmtani). We thank the nf-core community for their extensive assistance in the development of this pipeline. @@ -147,8 +155,6 @@ For further information or help, don't hesitate to get in touch on the [Slack `# ## Citations - - If you use nf-core/raredisease for your analysis, please cite it using the following doi: [10.5281/zenodo.7995798](https://doi.org/10.5281/zenodo.7995798) An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. diff --git a/assets/cadd_to_vcf_header_-1.0-.txt b/assets/cadd_to_vcf_header_-1.0-.txt new file mode 100644 index 00000000..8deee482 --- /dev/null +++ b/assets/cadd_to_vcf_header_-1.0-.txt @@ -0,0 +1 @@ +##INFO= diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index d6ebfa6c..4b5a4272 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -7,13 +7,17 @@ plot_type: "html" ## You inject any metadata in the Nextflow '${workflow}' object data: |

Methods

-

Data was processed using nf-core/raredisease v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020).

+

Data was processed using nf-core/raredisease v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

${workflow.commandLine}
+

${tool_citations}

References

Notes:
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index c2b26325..8388320b 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -3,9 +3,9 @@ custom_logo_url: https://github.com/nf-core/raredisease/ custom_logo_title: "nf-core/raredisease" report_comment: > - This report has been generated by the nf-core/raredisease + This report has been generated by the nf-core/raredisease analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-raredisease-methods-description": order: -1000 diff --git a/assets/nf-core-raredisease_logo_light.png b/assets/nf-core-raredisease_logo_light.png index 0b66df05..3e1c6ec6 100644 Binary files a/assets/nf-core-raredisease_logo_light.png and b/assets/nf-core-raredisease_logo_light.png differ diff --git a/assets/slackreport.json b/assets/slackreport.json index 043d02f2..73c55ea7 100644 --- a/assets/slackreport.json +++ b/assets/slackreport.json @@ -3,7 +3,7 @@ { "fallback": "Plain-text summary of the attachment.", "color": "<% if (success) { %>good<% } else { %>danger<%} %>", - "author_name": "sanger-tol/readmapping v${version} - ${runName}", + "author_name": "nf-core/raredisease v${version} - ${runName}", "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", "fields": [ diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index ac46acd3..90486cb6 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -38,6 +38,7 @@ def __init__( first_col="fastq_1", second_col="fastq_2", single_col="single_end", + case_id_col="case_id", **kwargs, ): """ @@ -53,6 +54,8 @@ def __init__( single_col (str): The name of the new column that will be inserted and records whether the sample contains single- or paired-end sequencing reads (default "single_end"). + case_id_col (str): The name of the column that contains the case_id + (default "case_id"). """ super().__init__(**kwargs) @@ -60,6 +63,7 @@ def __init__( self._first_col = first_col self._second_col = second_col self._single_col = single_col + self._case_id_col = case_id_col self._seen = set() self.modified = [] @@ -76,6 +80,7 @@ def validate_and_transform(self, row): self._validate_first(row) self._validate_second(row) self._validate_pair(row) + self._validate_case_id(row) self._seen.add((row[self._sample_col], row[self._first_col])) self.modified.append(row) @@ -108,6 +113,13 @@ def _validate_pair(self, row): else: row[self._single_col] = True + def _validate_case_id(self, row): + """Assert that the case id exists and convert spaces to underscores.""" + if len(row[self._case_id_col]) <= 0: + raise AssertionError("Case ID input is required.") + # Sanitize id slightly. + row[self._case_id_col] = row[self._case_id_col].replace(" ", "_") + def _validate_fastq_format(self, filename): """Assert that a given filename has one of the expected FASTQ extensions.""" if not any(filename.endswith(extension) for extension in self.VALID_FORMATS): diff --git a/conf/modules/align.config b/conf/modules/align.config index be3fee28..249dc81e 100644 --- a/conf/modules/align.config +++ b/conf/modules/align.config @@ -17,4 +17,15 @@ process{ enabled: false ] } + + withName: '.*ALIGN:SAMTOOLS_VIEW' { + ext.args = { '--output-fmt cram --write-index' } + ext.when = params.save_mapped_as_cram + publishDir = [ + path: { "${params.outdir}/alignment" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + + } } diff --git a/conf/modules/align_and_call_MT.config b/conf/modules/align_and_call_MT.config index c4915c11..1a2993f5 100644 --- a/conf/modules/align_and_call_MT.config +++ b/conf/modules/align_and_call_MT.config @@ -16,18 +16,15 @@ // process { - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:.*' { - ext.when = { params.mt_fasta_shift && params.mt_intervals && !(params.analysis_type == "wes") } - } withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:BWAMEM2_MEM_MT' { - ext.when = { params.mt_fasta_shift && params.mt_intervals && !(params.analysis_type == "wes") && params.aligner == "bwamem2" } + ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "bwamem2" } ext.args = { "-M -K 100000000 -R ${meta.read_group}" } } withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:SENTIEON_BWAMEM_MT' { ext.args = { "-M -K 10000000 -R ${meta.read_group}" } - ext.when = { params.mt_fasta_shift && params.mt_intervals && !(params.analysis_type == "wes") && params.aligner == "sentieon" } + ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "sentieon" } ext.prefix = { "${meta.id}.sorted" } } @@ -55,6 +52,15 @@ process { ext.prefix = { "${meta.id}_sorted" } } + withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:MT_DELETION' { + ext.args = '-s --insert-size 16000' + publishDir = [ + path: { "${params.outdir}/mt_sv" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:GATK4_MUTECT2_MT' { ext.args = '--mitochondria-mode TRUE' } @@ -69,18 +75,15 @@ process { // process { - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:.*' { - ext.when = { params.mt_fasta_shift && params.mt_intervals_shift && !(params.analysis_type == "wes") } - } withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:BWAMEM2_MEM_MT' { - ext.when = { params.mt_fasta_shift && params.mt_intervals_shift && !(params.analysis_type == "wes") && params.aligner == "bwamem2" } + ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "bwamem2" } ext.args = { "-M -K 100000000 -R ${meta.read_group}" } } withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:SENTIEON_BWAMEM_MT' { ext.args = { "-M -K 10000000 -R ${meta.read_group}" } - ext.when = { params.mt_fasta_shift && params.mt_intervals_shift && !(params.analysis_type == "wes") && params.aligner == "sentieon" } + ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "sentieon" } ext.prefix = { "${meta.id}.sorted" } } diff --git a/conf/modules/align_bwamem2.config b/conf/modules/align_bwamem2.config index f4f062e6..7d478222 100644 --- a/conf/modules/align_bwamem2.config +++ b/conf/modules/align_bwamem2.config @@ -35,6 +35,7 @@ process { withName: '.*ALIGN:ALIGN_BWAMEM2:MARKDUPLICATES' { ext.prefix = { "${meta.id}_sorted_md" } publishDir = [ + enabled: !params.save_mapped_as_cram, path: { "${params.outdir}/alignment" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } @@ -43,6 +44,7 @@ process { withName: '.*ALIGN:ALIGN_BWAMEM2:SAMTOOLS_INDEX_MARKDUP' { publishDir = [ + enabled: !params.save_mapped_as_cram, path: { "${params.outdir}/alignment" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } diff --git a/conf/modules/align_sentieon.config b/conf/modules/align_sentieon.config index 42073097..a882a017 100644 --- a/conf/modules/align_sentieon.config +++ b/conf/modules/align_sentieon.config @@ -41,6 +41,7 @@ process { ext.args = { $params.rmdup ? "--rmdup" : '' } ext.prefix = { "${meta.id}_dedup" } publishDir = [ + enabled: !params.save_mapped_as_cram, path: { "${params.outdir}/alignment" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } diff --git a/conf/modules/analyse_MT.config b/conf/modules/analyse_MT.config index 85568e3e..4ee1b693 100644 --- a/conf/modules/analyse_MT.config +++ b/conf/modules/analyse_MT.config @@ -17,6 +17,7 @@ process { withName: '.*ANALYSE_MT:.*' { + ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") } publishDir = [ enabled: false ] @@ -25,7 +26,6 @@ process { process { withName: '.*ANALYSE_MT:PICARD_LIFTOVERVCF' { - ext.when = { params.mt_fasta_shift && params.mt_intervals_shift && !(params.analysis_type == "wes") } ext.prefix = { "${meta.id}_liftover" } } } diff --git a/conf/modules/annotate_cadd.config b/conf/modules/annotate_cadd.config new file mode 100644 index 00000000..2535f7f9 --- /dev/null +++ b/conf/modules/annotate_cadd.config @@ -0,0 +1,41 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = Conditional clause +---------------------------------------------------------------------------------------- +*/ + +// +// CADD annotation +// + +process { + withName: '.*:ANNOTATE_CADD.*' { + ext.when = { (params.cadd_resources != null) && ( !(workflow.profile.tokenize(',').intersect(['test', 'test_one_sample']).size() >= 1) || workflow.stubRun) } + } + + withName: '.*:ANNOTATE_CADD:BCFTOOLS_VIEW' { + ext.args = { "--output-type z --types indels" } + ext.prefix = { "${vcf.simpleName}_indels" } + } + + withName: '.*:ANNOTATE_CADD:CADD' { + ext.args = { "-g ${params.genome}" } + ext.prefix = { "${vcf.simpleName}_cadd" } + } + + withName: '.*:ANNOTATE_CADD:TABIX_CADD' { + ext.args = { "--force --sequence 1 --begin 2 --end 2" } + } + + withName: '.*:ANNOTATE_CADD:BCFTOOLS_ANNOTATE' { + ext.args = { "--columns Chrom,Pos,Ref,Alt,-,CADD --output-type z" } + ext.prefix = { "${input.simpleName}_ann" } + } +} diff --git a/conf/modules/annotate_snvs.config b/conf/modules/annotate_snvs.config index 7df621a8..093e94a1 100644 --- a/conf/modules/annotate_snvs.config +++ b/conf/modules/annotate_snvs.config @@ -28,7 +28,7 @@ process { } withName: '.*ANNOTATE_SNVS:BCFTOOLS_ROH' { - ext.args = { "--samples ${meta.id} --skip-indels " } + ext.args = { "--samples ${meta.probands.join(",")} --skip-indels " } ext.prefix = { "${meta.id}_roh" } } @@ -41,6 +41,40 @@ process { ext.prefix = { "${meta.id}_rohann_vcfanno" } } + withName: '.*ANNOTATE_SNVS:UPD_SITES' { + ext.prefix = { "${meta.id}_rohann_vcfanno_upd_sites" } + ext.args = {"--af-tag GNOMADAF --proband ${meta.upd_child} --mother ${meta.mother} --father ${meta.father} sites"} + } + + withName: '.*ANNOTATE_SNVS:UPD_REGIONS' { + ext.prefix = { "${meta.id}_rohann_vcfanno_upd_regions" } + ext.args = {"--af-tag GNOMADAF --proband ${meta.upd_child} --mother ${meta.mother} --father ${meta.father} regions --min-size 5 --min-sites 1"} + ext.when = { !(workflow.profile.tokenize(',').intersect(['test', 'test_one_sample']).size() >= 1) || workflow.stubRun } + } + + withName: '.*ANNOTATE_SNVS:CHROMOGRAPH_SITES' { + ext.prefix = { "${meta7.id}_rohann_vcfanno_upd_sites_chromograph" } + ext.args = { "--euploid" } + tag = {"${meta7.id}"} + publishDir = [ + path: { "${params.outdir}/annotate_snv" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*ANNOTATE_SNVS:CHROMOGRAPH_REGIONS' { + ext.prefix = { "${meta6.id}_rohann_vcfanno_upd_regions_chromograph" } + ext.args = { '--euploid' } + ext.when = { !(workflow.profile.tokenize(',').intersect(['test', 'test_one_sample']).size() >= 1) || workflow.stubRun } + tag = {"${meta6.id}"} + publishDir = [ + path: { "${params.outdir}/annotate_snv" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*ANNOTATE_SNVS:BCFTOOLS_VIEW' { ext.prefix = { "${meta.id}_rohann_vcfanno_filter" } ext.args = { '--output-type z --exclude "INFO/GNOMADAF > 0.70 | INFO/GNOMADAF_popmax > 0.70" ' } @@ -48,7 +82,6 @@ process { withName: '.*ANNOTATE_SNVS:GATK4_SELECTVARIANTS' { ext.prefix = { "${meta.id}_${intervals.simpleName}" } - ext.when = { !(params.analysis_type == "wes") } } withName: '.*ANNOTATE_SNVS:ENSEMBLVEP_SNV' { @@ -68,28 +101,11 @@ process { '--merged --polyphen p --protein --offline --regulatory --sift p --symbol --tsl', '--uniprot --vcf' ].join(' ') - publishDir = [ - enabled: params.analysis_type.equals('wes'), - path: { "${params.outdir}/annotate_snv" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*ANNOTATE_SNVS:TABIX_VEP' { - publishDir = [ - enabled: params.analysis_type.equals('wes'), - path: { "${params.outdir}/annotate_snv" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] } withName: '.*ANNOTATE_SNVS:BCFTOOLS_CONCAT' { ext.prefix = { "${meta.id}_rohann_vcfanno_filter_vep" } - ext.when = { !(params.analysis_type == "wes") } publishDir = [ - enabled: !params.analysis_type.equals('wes'), path: { "${params.outdir}/annotate_snv" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } @@ -97,9 +113,7 @@ process { } withName: '.*ANNOTATE_SNVS:TABIX_BCFTOOLS_CONCAT' { - ext.when = { !(params.analysis_type == "wes") } publishDir = [ - enabled: !params.analysis_type.equals('wes'), path: { "${params.outdir}/annotate_snv" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } diff --git a/conf/modules/call_sv_germlinecnvcaller.config b/conf/modules/call_sv_germlinecnvcaller.config new file mode 100644 index 00000000..bf37b9d9 --- /dev/null +++ b/conf/modules/call_sv_germlinecnvcaller.config @@ -0,0 +1,39 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = Conditional clause +---------------------------------------------------------------------------------------- +*/ + +// +// gcnvcaller calling options +// + +process { + + withName: ".*CALL_STRUCTURAL_VARIANTS:CALL_SV_GERMLINECNVCALLER.*" { + publishDir = [ + enabled: false + ] + ext.when = !params.skip_cnv_calling + } + + withName: ".*CALL_STRUCTURAL_VARIANTS:CALL_SV_GERMLINECNVCALLER:GATK4_COLLECTREADCOUNTS" { + ext.args = "--format TSV --interval-merging-rule OVERLAPPING_ONLY" + } + + withName: ".*CALL_STRUCTURAL_VARIANTS:CALL_SV_GERMLINECNVCALLER:GATK4_DETERMINEGERMLINECONTIGPLOIDY" { + ext.prefix = { "${meta.id}_ploidy" } + } + + withName: ".*CALL_STRUCTURAL_VARIANTS:CALL_SV_GERMLINECNVCALLER:GATK4_GERMLINECNVCALLER" { + ext.args = "--run-mode CASE" + ext.prefix = { "${meta.id}_${model.simpleName}" } + } +} diff --git a/conf/modules/convert_mt_bam_to_fastq.config b/conf/modules/convert_mt_bam_to_fastq.config index 5cba99b5..9a683b6e 100644 --- a/conf/modules/convert_mt_bam_to_fastq.config +++ b/conf/modules/convert_mt_bam_to_fastq.config @@ -16,9 +16,6 @@ // process { - withName: '.*ANALYSE_MT:CONVERT_MT_BAM_TO_FASTQ:.*' { - ext.when = { params.mt_fasta_shift && !(params.analysis_type == "wes") } - } withName: '.*ANALYSE_MT:CONVERT_MT_BAM_TO_FASTQ:GATK4_PRINTREADS_MT' { beforeScript = {"mkdir ./tmp"} diff --git a/conf/modules/merge_annotate_MT.config b/conf/modules/merge_annotate_MT.config index 367fbe7c..027c6805 100644 --- a/conf/modules/merge_annotate_MT.config +++ b/conf/modules/merge_annotate_MT.config @@ -16,9 +16,6 @@ // process { - withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:.*' { - ext.when = { params.mt_fasta_shift && params.mt_intervals && !(params.analysis_type == "wes") } - } withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:GATK4_MERGEVCFS_LIFT_UNLIFT_MT' { ext.prefix = { "${meta.id}_merged" } @@ -74,10 +71,7 @@ process { ext.prefix = { "${meta.id}_vep_vcfanno_hmtnote_mt" } ext.args = '--offline' publishDir = [ - path: { "${params.outdir}/annotate_mt" }, - mode: params.publish_dir_mode, - pattern: "*{vcf}", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + enabled: false ] } diff --git a/conf/modules/prepare_references.config b/conf/modules/prepare_references.config index 8504a34c..7d40508c 100644 --- a/conf/modules/prepare_references.config +++ b/conf/modules/prepare_references.config @@ -29,16 +29,16 @@ process { ext.when = {!params.bwamem2 && params.aligner == "bwamem2"} } - withName: '.*PREPARE_REFERENCES:BWAMEM2_INDEX_SHIFT_MT' { - ext.when = {!params.mt_bwamem2_index_shift && params.mt_fasta_shift && !(params.analysis_type == "wes") && params.aligner == "bwamem2"} + withName: '.*PREPARE_REFERENCES:BWAMEM2_INDEX_MT_SHIFT' { + ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "bwamem2"} } withName: '.*PREPARE_REFERENCES:SENTIEON_BWAINDEX_GENOME' { ext.when = {!params.bwa && params.aligner == "sentieon"} } - withName: '.*PREPARE_REFERENCES:SENTIEON_BWAINDEX_SHIFT_MT' { - ext.when = {!params.mt_bwa_index_shift && params.mt_fasta_shift && !(params.analysis_type == "wes") && params.aligner == "sentieon"} + withName: '.*PREPARE_REFERENCES:SENTIEON_BWAINDEX_MT_SHIFT' { + ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "sentieon"} } withName: '.*PREPARE_REFERENCES:BWA_INDEX_GENOME' { @@ -49,16 +49,25 @@ process { ext.when = {!params.fai} } - withName: '.*PREPARE_REFERENCES:SAMTOOLS_FAIDX_SHIFT_MT' { - ext.when = {!params.mt_fai_shift && params.mt_fasta_shift && !(params.analysis_type == "wes")} + withName: '.*PREPARE_REFERENCES:SAMTOOLS_EXTRACT_MT' { + ext.args = { " ${params.mito_name} -o ${meta.id}_mt.fa" } + ext.when = {!params.mt_fasta && !params.skip_mt_analysis} + } + + withName: '.*PREPARE_REFERENCES:SAMTOOLS_FAIDX_MT_SHIFT' { + ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes")} } withName: '.*PREPARE_REFERENCES:GATK_SD' { ext.when = {!params.sequence_dictionary} } - withName: '.*PREPARE_REFERENCES:GATK_SD_SHIFT_MT' { - ext.when = {!params.mt_sequence_dictionary_shift && params.mt_fasta_shift && !(params.analysis_type == "wes")} + withName: '.*PREPARE_REFERENCES:GATK_SHIFTFASTA' { + ext.args = { "--interval-file-name ${meta.id}_mt" } + } + + withName: '.*PREPARE_REFERENCES:GATK_SD_MT_SHIFT' { + ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes")} } withName: '.*PREPARE_REFERENCES:GET_CHROM_SIZES' { @@ -108,4 +117,15 @@ process { enabled: false ] } + + withName: '.*PREPARE_REFERENCES:GATK_PREPROCESS_WGS' { + ext.args = { "--padding 0 --interval-merging-rule OVERLAPPING_ONLY --exclude-intervals ${params.mito_name}" } + ext.when = { params.analysis_type.equals("wgs") && !params.readcount_intervals } + } + + withName: '.*PREPARE_REFERENCES:GATK_PREPROCESS_WES' { + ext.args = { "--bin-length 0 --interval-merging-rule OVERLAPPING_ONLY --exclude-intervals ${params.mito_name}" } + ext.when = { params.analysis_type.equals("wes") && !params.readcount_intervals } + } + } diff --git a/conf/modules/qc_bam.config b/conf/modules/qc_bam.config index 0f5f411f..080f988f 100644 --- a/conf/modules/qc_bam.config +++ b/conf/modules/qc_bam.config @@ -29,6 +29,7 @@ process { } withName: '.*QC_BAM:PICARD_COLLECTHSMETRICS' { + ext.when = { params.target_bed } ext.prefix = { "${meta.id}_hsmetrics" } } diff --git a/conf/modules/scatter_genome.config b/conf/modules/scatter_genome.config index f8ecf792..b2fe363a 100644 --- a/conf/modules/scatter_genome.config +++ b/conf/modules/scatter_genome.config @@ -17,12 +17,12 @@ process { withName: '.*SCATTER_GENOME:BUILD_BED' { - ext.when = { !params.skip_snv_annotation && !(params.analysis_type == "wes")} + ext.when = { !params.skip_snv_annotation } } withName: '.*SCATTER_GENOME:GATK4_SPLITINTERVALS' { ext.args = { "--subdivision-mode BALANCING_WITHOUT_INTERVAL_SUBDIVISION --scatter-count 22" } - ext.when = { !params.skip_snv_annotation && !(params.analysis_type == "wes")} + ext.when = { !params.skip_snv_annotation } ext.prefix = { "${meta.id}_genome_intervals" } publishDir = [ enabled: params.save_reference, diff --git a/conf/test.config b/conf/test.config index 805f84d6..8065aa8a 100644 --- a/conf/test.config +++ b/conf/test.config @@ -23,6 +23,9 @@ params { igenomes_ignore = true mito_name = 'MT' + // analysis params + skip_cnv_calling = true + // Input data input = 'https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/testdata/samplesheet_trio.csv' @@ -34,10 +37,6 @@ params { intervals_y = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/targetY.interval_list" known_dbsnp = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/dbsnp_-138-.vcf.gz" ml_model = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model" - mt_fasta_shift = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mt_shift8000.fa" - mt_intervals = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mt.intervals" - mt_intervals_shift = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mt_shift8000.intervals" - mt_backchain_shift = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mt_shift8000.back_chain" reduced_penetrance = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reduced_penetrance.tsv" score_config_snv = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini" score_config_sv = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_sv.ini" diff --git a/conf/test_full.config b/conf/test_full.config index 808d2c53..dcf02d95 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -10,8 +10,6 @@ ---------------------------------------------------------------------------------------- */ -cleanup = true - params { config_profile_name = 'Full test profile' config_profile_description = 'Full test dataset to check pipeline function' diff --git a/conf/test_one_sample.config b/conf/test_one_sample.config index e4f73a33..7ffe7ba7 100644 --- a/conf/test_one_sample.config +++ b/conf/test_one_sample.config @@ -23,6 +23,9 @@ params { igenomes_ignore = true mito_name = 'MT' + // analysis params + skip_cnv_calling = true + // Input data input = 'https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/testdata/samplesheet_single.csv' @@ -34,10 +37,6 @@ params { intervals_y = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/targetY.interval_list" known_dbsnp = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/dbsnp_-138-.vcf.gz" ml_model = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model" - mt_fasta_shift = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mt_shift8000.fa" - mt_intervals = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mt.intervals" - mt_intervals_shift = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mt_shift8000.intervals" - mt_backchain_shift = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mt_shift8000.back_chain" reduced_penetrance = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reduced_penetrance.tsv" score_config_snv = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini" score_config_sv = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_sv.ini" diff --git a/docs/output.md b/docs/output.md index bafda861..6d07a3bd 100644 --- a/docs/output.md +++ b/docs/output.md @@ -33,6 +33,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [Variant calling - SV](#variant-calling---sv) - [Manta](#manta) - [TIDDIT sv](#tiddit-sv) + - [GATK GermlineCNVCaller - CNV calling](#gatk-germlinecnvcaller---cnv-calling) - [SVDB merge](#svdb-merge) - [Variant calling - repeat expansions](#variant-calling---repeat-expansions) - [Expansion Hunter](#expansion-hunter) @@ -40,16 +41,22 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [Annotation - SNV](#annotation---snv) - [bcftools roh](#bcftools-roh) - [vcfanno](#vcfanno) + - [CADD](#cadd) - [VEP](#vep) + - [UPD](#upd) + - [Chromograph](#chromograph) - [Annotation - SV](#annotation---sv) - [SVDB query](#svdb-query) - [VEP](#vep-1) - [Mitochondrial analysis](#mitochondrial-analysis) - [Alignment and variant calling](#alignment-and-variant-calling) + - [MT deletion script](#mt-deletion-script) - [Annotation:](#annotation-) - [HaploGrep2](#haplogrep2) - [vcfanno](#vcfanno-1) + - [CADD](#cadd-1) - [VEP](#vep-2) + - [HmtNote](#hmtnote) - [Rank variants and filtering](#rank-variants-and-filtering) - [GENMOD](#genmod) - [Pipeline information](#pipeline-information) @@ -70,27 +77,27 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d ##### Picard's MarkDuplicates -[Picard MarkDuplicates](https://broadinstitute.github.io/picard/command-line-overview.html#MarkDuplicates) is used for marking PCR duplicates that can occur during library amplification. This is essential as the presence of such duplicates results in false inflated coverages, which in turn can lead to overly-confident genotyping calls during variant calling. Only reads aligned by Bwa-mem2 are processed by this tool. +[Picard MarkDuplicates](https://broadinstitute.github.io/picard/command-line-overview.html#MarkDuplicates) is used for marking PCR duplicates that can occur during library amplification. This is essential as the presence of such duplicates results in false inflated coverages, which in turn can lead to overly-confident genotyping calls during variant calling. Only reads aligned by Bwa-mem2 are processed by this tool. By default, alignment files are published in bam format. If you would like to store cram files instead, set `--save_mapped_as_cram` to true.
Output files from Alignment - `{outputdir}/alignment/` - - `*.bam`: Bam file containing report containing quality metrics. - - `*.bai`: Zip archive containing the FastQC report, tab-delimited data file and plot images. + - `*.bam|*.cram`: Alignment file in bam/cram format. + - `*.bai|*.crai`: Index of the corresponding bam/cram file. - `*.txt`: Text file containing the dedup metrics.
##### Sentieon Dedup -[Sentieon Dedup](https://support.sentieon.com/manual/DNAseq_usage/dnaseq/#remove-or-mark-duplicates) is the algorithm used by Sentieon's driver to remove duplicate reads. Only reads aligned by Sentieon's implementation of bwa are processed by this algorithm. +[Sentieon Dedup](https://support.sentieon.com/manual/DNAseq_usage/dnaseq/#remove-or-mark-duplicates) is the algorithm used by Sentieon's driver to remove duplicate reads. Only reads aligned by Sentieon's implementation of bwa are processed by this algorithm. By default, alignment files are published in bam format. If you would like to store cram files instead, set `--save_mapped_as_cram` to true.
Output files from Alignment - `{outputdir}/alignment/` - - `*.bam`: Bam file containing report containing quality metrics. - - `*.bai`: Zip archive containing the FastQC report, tab-delimited data file and plot images. + - `*.bam|*.cram`: Alignment file in bam/cram format. + - `*.bai|*.crai`: Index of the corresponding bam/cram file. - `*.txt`: Text file containing the dedup metrics.
@@ -241,15 +248,19 @@ The pipeline performs variant calling using [Sentieon DNAscope](https://support. #### Manta -[Manta](https://github.com/Illumina/manta) calls structural variants (SVs) and indels from mapped paired-end sequencing reads. It combines paired and split-read evidence during SV discovery and scoring to improve accuracy, but does not require split-reads or successful breakpoint assemblies to report a variant in cases where there is strong evidence otherwise. Output vcf files are treated as intermediates and are not placed in the output folder by default. +[Manta](https://github.com/Illumina/manta) calls structural variants (SVs) and indels from mapped paired-end sequencing reads. It combines paired and split-read evidence during SV discovery and scoring to improve accuracy, but does not require split-reads or successful breakpoint assemblies to report a variant in cases where there is strong evidence otherwise. Output vcf files are treated as intermediates and are not placed in the output folder. #### TIDDIT sv -[TIDDIT's sv](https://github.com/SciLifeLab/TIDDIT) is used to identify chromosomal rearrangements using sequencing data. TIDDIT identifies intra and inter-chromosomal translocations, deletions, tandem-duplications and inversions, using supplementary alignments as well as discordant pairs. TIDDIT searches for discordant reads and split reads (supplementary alignments). Output vcf files are treated as intermediates and are not placed in the output folder by default. +[TIDDIT's sv](https://github.com/SciLifeLab/TIDDIT) is used to identify chromosomal rearrangements using sequencing data. TIDDIT identifies intra and inter-chromosomal translocations, deletions, tandem-duplications and inversions, using supplementary alignments as well as discordant pairs. TIDDIT searches for discordant reads and split reads (supplementary alignments). Output vcf files are treated as intermediates and are not placed in the output folder. + +#### GATK GermlineCNVCaller - CNV calling + +[GATK GermlineCNVCaller](https://github.com/broadinstitute/gatk) is used to identify copy number variants in germline samples given their read counts and a model describing a sample's ploidy. Output vcf files are treated as intermediates and are not placed in the output folder. #### SVDB merge -[SVDB merge](https://github.com/J35P312/SVDB#merge) is used to merge the variant calls from both Manta and TIDDIT. Output files are published in the output folder. +[SVDB merge](https://github.com/J35P312/SVDB#merge) is used to merge the variant calls from GATK's GermlineCNVCaller (only if skip_cnv_calling is set to false), Manta, and TIDDIT. Output files are published in the output folder.
Output files @@ -292,11 +303,17 @@ The pipeline performs variant calling using [Sentieon DNAscope](https://support. #### bcftools roh -[bcftools roh](https://samtools.github.io/bcftools/bcftools.html#roh) is a program for detecting runs of homo/autozygosity.from only bi-allelic sites. The output files are not published in the output folder by default, and is passed to vcfanno for further annotation. +[bcftools roh](https://samtools.github.io/bcftools/bcftools.html#roh) is a program for detecting runs of homo/autozygosity.from only bi-allelic sites. The output files are not published in the output folder, and is passed to vcfanno for further annotation. #### vcfanno -[vcfanno](https://github.com/brentp/vcfanno) allows you to quickly annotate your VCF with any number of INFO fields from any number of VCFs. It uses a simple conf file to allow the user to specify the source annotation files and fields and how they will be added to the info of the query VCF. Values are pulled by name from the INFO field with special-cases of ID and FILTER to pull from those VCF columns. The output files are not published in the output folder by default, and is passed to vep for further annotation. +[vcfanno](https://github.com/brentp/vcfanno) allows you to quickly annotate your VCF with any number of INFO fields from any number of VCFs. It uses a simple configuration file to allow the user to specify the source annotation files and fields and how they will be added to the info of the query VCF. Values are pulled by name from the INFO field with special-cases of ID and FILTER to pull from those VCF columns. The output files are not published in the output folder, and is passed to CADD and/or VEP for further annotation. + +We recommend using vcfanno to annotate SNVs with precomputed CADD scores (files can be downloaded from [here](https://cadd.gs.washington.edu/download)). + +#### CADD + +[CADD](https://cadd.gs.washington.edu/) is a tool for scoring the deleteriousness of single nucleotide variants as well as insertion/deletions variants in the human genome. In nf-core/raredisease, SNVs can be annotated with precomputed CADD scores using vcfanno. However, for small indels they will be calculated on the fly by CADD. The output files are not published in the output folder, and is passed to VEP for further annotation. #### VEP @@ -322,11 +339,29 @@ Based on VEP annotations, custom scripts used by the pipeline further annotate e
+#### UPD + +[UPD](https://github.com/bjhall/upd) calls regions of uniparental disomy from germline exome/wgs trios. Output from UPD is passed to chromograph for making plots. + +#### Chromograph + +[Chromograph](https://github.com/Clinical-Genomics/chromograph) is a python package to create PNG images from genetics data such as BED and WIG files. + +
+Output files + +- `annotate_snv/*sites_chromograph` + - `_rohann_vcfanno_upd_sites_.png`: file containing a plot showing upd sites across chromosomes. +- `annotate_snv/*regions_chromograph` + - `_rohann_vcfanno_upd_regions_.png`: file containing a plot showing upd regions across chromosomes. + +
+ ### Annotation - SV #### SVDB query -[SVDB query](https://github.com/J35P312/SVDB#Query) allows you to quickly annotate your VCF with data from one or more structural variant databases. The output files are not published in the output folder by default, and is passed to vep for further annotation. +[SVDB query](https://github.com/J35P312/SVDB#Query) allows you to quickly annotate your VCF with data from one or more structural variant databases. The output files are not published in the output folder, and is passed to vep for further annotation. #### VEP @@ -353,6 +388,10 @@ Mitochondrial analysis is run by default, to turn it off set `--skip_mt_analysis The pipeline for mitochondrial variant discovery, using Mutect2, uses a high sensitivity to low AF and separate alignments using opposite genome breakpoints to allow for the tracing of lineages of rare mitochondrial variants. +##### MT deletion script + +[MT deletion script](https://github.com/dnil/mitosign/blob/master/run_mt_del_check.sh) lists the fraction of mitochondrially aligning read pairs (per 1000) that appear discordant, as defined by an insert size of more than 1.2 kb (and less than 15 kb due to the circular nature of the genome) using samtools. + #### Annotation: ##### HaploGrep2 @@ -369,7 +408,17 @@ The pipeline for mitochondrial variant discovery, using Mutect2, uses a high sen ##### vcfanno -[vcfanno](https://github.com/brentp/vcfanno) allows you to quickly annotate your VCF with any number of INFO fields from any number of VCFs. It uses a simple conf file to allow the user to specify the source annotation files and fields and how they will be added to the info of the query VCF. Values are pulled by name from the INFO field with special-cases of ID and FILTER to pull from those VCF columns. The output files are not published in the output folder by default, and is passed to vep for further annotation. +[vcfanno](https://github.com/brentp/vcfanno) allows you to quickly annotate your VCF with any number of INFO fields from any number of VCFs. It uses a simple conf file to allow the user to specify the source annotation files and fields and how they will be added to the info of the query VCF. Values are pulled by name from the INFO field with special-cases of ID and FILTER to pull from those VCF columns. The output files are not published in the output folder, and is passed to vep for further annotation. + +We recommend using vcfanno to annotate SNVs with precomputed CADD scores (files can be downloaded from [here](https://cadd.gs.washington.edu/download)). + +##### CADD + +[CADD](https://cadd.gs.washington.edu/) is a tool for scoring the deleteriousness of single nucleotide variants as well as insertion/deletions variants in the human genome. In nf-core/raredisease, SNVs can be annotated with precomputed CADD scores using vcfanno. However, for small indels they will be calculated on the fly by CADD. The output files are not published in the output folder, and is passed to VEP for further annotation. + +##### Hmtnote + +[HmtNote](https://github.com/robertopreste/HmtNote) annotates vcf containing human mitochondrial variants with HmtVar. It will run offline by default with a database within the container. ##### VEP diff --git a/docs/usage.md b/docs/usage.md index 8e76910f..5b7f7aa6 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -17,9 +17,10 @@ Table of contents: - [3. Repeat expansions](#3-repeat-expansions) - [4. Variant calling - SNV](#4-variant-calling---snv) - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) - - [6. SNV annotation & Ranking](#6-snv-annotation--ranking) - - [7. SV annotation & Ranking](#7-sv-annotation--ranking) - - [8. Mitochondrial analysis](#8-mitochondrial-analysis) + - [6. Copy number variant calling](#6-copy-number-variant-calling) + - [7. SNV annotation & Ranking](#7-snv-annotation--ranking) + - [8. SV annotation & Ranking](#8-sv-annotation--ranking) + - [9. Mitochondrial analysis](#9-mitochondrial-analysis) - [Run the pipeline](#run-the-pipeline) - [Direct input in CLI](#direct-input-in-cli) - [Import from a config file (recommended)](#import-from-a-config-file-recommended) @@ -115,6 +116,10 @@ If you would like to see more examples of what a typical samplesheet looks like In nf-core/raredisease, references can be supplied using parameters listed [here](https://nf-co.re/raredisease/dev/parameters). +> ⚠️ Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). + +The above pipeline run specified with a params file in yaml format: + Note that the pipeline is modular in architecture. It offers you the flexibility to choose between different tools. For example, you can align with either bwamem2 or Sentieon BWA mem and call SNVs with either DeepVariant or Sentieon DNAscope. You also have the option to turn off sections of the pipeline if you do not want to run the. For example, snv annotation can be turned off by adding `--skip_snv_annotation` flag in the command line, or by setting it to true in a parameter file. This flexibility means that in any given analysis run, a combination of tools included in the pipeline will not be executed. So the pipeline is written in a way that can account for these differences while working with reference parameters. If a tool is not going to be executed during the course of a run, parameters used only by that tool need not be provided. For example, for SNV calling if you use DeepVariant as your variant caller, you need not provide the parameter `--ml_model`, which is only used by Sentieon DNAscope. nf-core/raredisease consists of several tools used for various purposes. For convenience, we have grouped those tools under the following categories: @@ -184,14 +189,25 @@ The mandatory and optional parameters for each category are tabulated below. | | target_bed | | | bwa | -##### 6. SNV annotation & Ranking +##### 6. Copy number variant calling + +| Mandatory | Optional | +| ------------------------------ | ------------------------------- | +| ploidy_model1 | readcount_intervals3 | +| gcnvcaller_model1,2 | | + +1 Output from steps 3 & 4 of GATK's CNV calling pipeline run in cohort mode as described [here](https://gatk.broadinstitute.org/hc/en-us/articles/360035531152--How-to-Call-common-and-rare-germline-copy-number-variants).
+2 Sample file can be found [here](https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/gcnvmodels.tsv) (Note the header 'models' in the sample file).
+3 Output from step 1 of GATK's CNV calling pipeline as described [here](https://gatk.broadinstitute.org/hc/en-us/articles/360035531152--How-to-Call-common-and-rare-germline-copy-number-variants).
+ +##### 7. SNV annotation & Ranking | Mandatory | Optional | | ----------------------------- | ------------------------------ | | genome1 | reduced_penetrance7 | | vcfanno_resources2 | vcfanno_lua | | vcfanno_toml3 | vep_filters8 | -| vep_cache_version | | +| vep_cache_version | cadd_resources9 | | vep_cache4 | | | gnomad_af5 | | | score_config_snv6 | | @@ -207,8 +223,11 @@ no header and the following columns: `CHROM POS REF_ALLELE ALT_ALLELE AF`. Sampl 6Used by GENMOD for ranking the variants. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/rank_model_snv.ini).
7Used by GENMOD while modeling the variants. Contains a list of loci that show [reduced penetrance](https://medlineplus.gov/genetics/understanding/inheritance/penetranceexpressivity/) in people. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/reduced_penetrance.tsv).
8 This file contains a list of candidate genes (with [HGNC](https://www.genenames.org/) IDs) that is used to split the variants into canditate variants and research variants. Research variants contain all the variants, while candidate variants are a subset of research variants and are associated with candidate genes. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/hgnc.txt).
+9Path to a folder containing cadd annotations. Equivalent of the data/annotations/ folder described [here](https://github.com/kircherlab/CADD-scripts/#manual-installation), and it is used to calculate CADD scores for small indels.
-##### 7. SV annotation & Ranking +> NB: We use CADD only to annotate small indels. To annotate SNVs with precomputed CADD scores, pass the file containing CADD scores as a resource to vcfanno instead. Files containing the precomputed CADD scores for SNVs can be downloaded from [here](https://cadd.gs.washington.edu/download) (description: "All possible SNVs of GRCh3<7/8>/hg3<7/8>") + +##### 8. SV annotation & Ranking | Mandatory | Optional | | -------------------------- | ------------------ | @@ -220,22 +239,16 @@ no header and the following columns: `CHROM POS REF_ALLELE ALT_ALLELE AF`. Sampl 1 A CSV file that describes the databases (VCFs) used by SVDB for annotating structural variants. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/svdb_querydb_files.csv). Information about the column headers can be found [here](https://github.com/J35P312/SVDB#Query). -##### 8. Mitochondrial analysis - -| Mandatory | Optional | -| ------------------------------ | -------- | -| genome | | -| mt_backchain_shift1 | | -| mito_name | | -| mt_fasta_shift | | -| mt_intervals | | -| mt_intervals_shift | | -| vcfanno_resources | | -| vcfanno_toml | | -| vep_cache_version | | -| vep_cache | | - -1Can be generated by GATK's [ShiftFasta](https://gatk.broadinstitute.org/hc/en-us/articles/9570501436827-ShiftFasta-BETA-). Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/mt_shift8000.back_chain). +##### 9. Mitochondrial analysis + +| Mandatory | Optional | +| ----------------- | -------- | +| genome | | +| mito_name | | +| vcfanno_resources | | +| vcfanno_toml | | +| vep_cache_version | | +| vep_cache | | #### Run the pipeline diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy deleted file mode 100755 index 9b34804d..00000000 --- a/lib/NfcoreSchema.groovy +++ /dev/null @@ -1,530 +0,0 @@ -// -// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. -// - -import nextflow.Nextflow -import org.everit.json.schema.Schema -import org.everit.json.schema.loader.SchemaLoader -import org.everit.json.schema.ValidationException -import org.json.JSONObject -import org.json.JSONTokener -import org.json.JSONArray -import groovy.json.JsonSlurper -import groovy.json.JsonBuilder - -class NfcoreSchema { - - // - // Resolve Schema path relative to main workflow directory - // - public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { - return "${workflow.projectDir}/${schema_filename}" - } - - // - // Function to loop over all parameters defined in schema and check - // whether the given parameters adhere to the specifications - // - /* groovylint-disable-next-line UnusedPrivateMethodParameter */ - public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { - def has_error = false - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Check for nextflow core params and unexpected params - def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text - def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') - def nf_params = [ - // Options for base `nextflow` command - 'bg', - 'c', - 'C', - 'config', - 'd', - 'D', - 'dockerize', - 'h', - 'log', - 'q', - 'quiet', - 'syslog', - 'v', - - // Options for `nextflow run` command - 'ansi', - 'ansi-log', - 'bg', - 'bucket-dir', - 'c', - 'cache', - 'config', - 'dsl2', - 'dump-channels', - 'dump-hashes', - 'E', - 'entry', - 'latest', - 'lib', - 'main-script', - 'N', - 'name', - 'offline', - 'params-file', - 'pi', - 'plugins', - 'poll-interval', - 'pool-size', - 'profile', - 'ps', - 'qs', - 'queue-size', - 'r', - 'resume', - 'revision', - 'stdin', - 'stub', - 'stub-run', - 'test', - 'w', - 'with-apptainer', - 'with-charliecloud', - 'with-conda', - 'with-dag', - 'with-docker', - 'with-mpi', - 'with-notification', - 'with-podman', - 'with-report', - 'with-singularity', - 'with-timeline', - 'with-tower', - 'with-trace', - 'with-weblog', - 'without-docker', - 'without-podman', - 'work-dir' - ] - def unexpectedParams = [] - - // Collect expected parameters from the schema - def expectedParams = [] - def enums = [:] - for (group in schemaParams) { - for (p in group.value['properties']) { - expectedParams.push(p.key) - if (group.value['properties'][p.key].containsKey('enum')) { - enums[p.key] = group.value['properties'][p.key]['enum'] - } - } - } - - for (specifiedParam in params.keySet()) { - // nextflow params - if (nf_params.contains(specifiedParam)) { - log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'" - has_error = true - } - // unexpected params - def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' - def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } - def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() - def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase)) - if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) { - // Temporarily remove camelCase/camel-case params #1035 - def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} - if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ - unexpectedParams.push(specifiedParam) - } - } - } - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Validate parameters against the schema - InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() - JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) - - // Remove anything that's in params.schema_ignore_params - raw_schema = removeIgnoredParams(raw_schema, params) - - Schema schema = SchemaLoader.load(raw_schema) - - // Clean the parameters - def cleanedParams = cleanParameters(params) - - // Convert to JSONObject - def jsonParams = new JsonBuilder(cleanedParams) - JSONObject params_json = new JSONObject(jsonParams.toString()) - - // Validate - try { - schema.validate(params_json) - } catch (ValidationException e) { - println '' - log.error 'ERROR: Validation of pipeline parameters failed!' - JSONObject exceptionJSON = e.toJSON() - printExceptions(exceptionJSON, params_json, log, enums) - println '' - has_error = true - } - - // Check for unexpected parameters - if (unexpectedParams.size() > 0) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - println '' - def warn_msg = 'Found unexpected parameters:' - for (unexpectedParam in unexpectedParams) { - warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}" - } - log.warn warn_msg - log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}" - println '' - } - - if (has_error) { - Nextflow.error('Exiting!') - } - } - - // - // Beautify parameters for --help - // - public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - Integer num_hidden = 0 - String output = '' - output += 'Typical pipeline command:\n\n' - output += " ${colors.cyan}${command}${colors.reset}\n\n" - Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - Integer max_chars = paramsMaxChars(params_map) + 1 - Integer desc_indent = max_chars + 14 - Integer dec_linewidth = 160 - desc_indent - for (group in params_map.keySet()) { - Integer num_params = 0 - String group_output = colors.underlined + colors.bold + group + colors.reset + '\n' - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (group_params.get(param).hidden && !params.show_hidden_params) { - num_hidden += 1 - continue; - } - def type = '[' + group_params.get(param).type + ']' - def description = group_params.get(param).description - def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : '' - def description_default = description + colors.dim + defaultValue + colors.reset - // Wrap long description texts - // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap - if (description_default.length() > dec_linewidth){ - List olines = [] - String oline = "" // " " * indent - description_default.split(" ").each() { wrd -> - if ((oline.size() + wrd.size()) <= dec_linewidth) { - oline += wrd + " " - } else { - olines += oline - oline = wrd + " " - } - } - olines += oline - description_default = olines.join("\n" + " " * desc_indent) - } - group_output += " --" + param.padRight(max_chars) + colors.dim + type.padRight(10) + colors.reset + description_default + '\n' - num_params += 1 - } - group_output += '\n' - if (num_params > 0){ - output += group_output - } - } - if (num_hidden > 0){ - output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset - } - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Groovy Map summarising parameters/workflow options used by the pipeline - // - public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { - // Get a selection of core Nextflow workflow options - def Map workflow_summary = [:] - if (workflow.revision) { - workflow_summary['revision'] = workflow.revision - } - workflow_summary['runName'] = workflow.runName - if (workflow.containerEngine) { - workflow_summary['containerEngine'] = workflow.containerEngine - } - if (workflow.container) { - workflow_summary['container'] = workflow.container - } - workflow_summary['launchDir'] = workflow.launchDir - workflow_summary['workDir'] = workflow.workDir - workflow_summary['projectDir'] = workflow.projectDir - workflow_summary['userName'] = workflow.userName - workflow_summary['profile'] = workflow.profile - workflow_summary['configFiles'] = workflow.configFiles.join(', ') - - // Get pipeline parameters defined in JSON Schema - def Map params_summary = [:] - def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - for (group in params_map.keySet()) { - def sub_params = new LinkedHashMap() - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (params.containsKey(param)) { - def params_value = params.get(param) - def schema_value = group_params.get(param).default - def param_type = group_params.get(param).type - if (schema_value != null) { - if (param_type == 'string') { - if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { - def sub_string = schema_value.replace('\$projectDir', '') - sub_string = sub_string.replace('\${projectDir}', '') - if (params_value.contains(sub_string)) { - schema_value = params_value - } - } - if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { - def sub_string = schema_value.replace('\$params.outdir', '') - sub_string = sub_string.replace('\${params.outdir}', '') - if ("${params.outdir}${sub_string}" == params_value) { - schema_value = params_value - } - } - } - } - - // We have a default in the schema, and this isn't it - if (schema_value != null && params_value != schema_value) { - sub_params.put(param, params_value) - } - // No default in the schema, and this isn't empty - else if (schema_value == null && params_value != "" && params_value != null && params_value != false) { - sub_params.put(param, params_value) - } - } - } - params_summary.put(group, sub_params) - } - return [ 'Core Nextflow options' : workflow_summary ] << params_summary - } - - // - // Beautify parameters for summary and return as string - // - public static String paramsSummaryLog(workflow, params) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - String output = '' - def params_map = paramsSummaryMap(workflow, params) - def max_chars = paramsMaxChars(params_map) - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - if (group_params) { - output += colors.bold + group + colors.reset + '\n' - for (param in group_params.keySet()) { - output += " " + colors.blue + param.padRight(max_chars) + ": " + colors.green + group_params.get(param) + colors.reset + '\n' - } - output += '\n' - } - } - output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Loop over nested exceptions and print the causingException - // - private static void printExceptions(ex_json, params_json, log, enums, limit=5) { - def causingExceptions = ex_json['causingExceptions'] - if (causingExceptions.length() == 0) { - def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ - // Missing required param - if (m.matches()) { - log.error "* Missing required parameter: --${m[0][1]}" - } - // Other base-level error - else if (ex_json['pointerToViolation'] == '#') { - log.error "* ${ex_json['message']}" - } - // Error with specific param - else { - def param = ex_json['pointerToViolation'] - ~/^#\// - def param_val = params_json[param].toString() - if (enums.containsKey(param)) { - def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices" - if (enums[param].size() > limit) { - log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )" - } else { - log.error "${error_msg}: ${enums[param].join(', ')})" - } - } else { - log.error "* --${param}: ${ex_json['message']} (${param_val})" - } - } - } - for (ex in causingExceptions) { - printExceptions(ex, params_json, log, enums) - } - } - - // - // Remove an element from a JSONArray - // - private static JSONArray removeElement(json_array, element) { - def list = [] - int len = json_array.length() - for (int i=0;i - if(raw_schema.keySet().contains('definitions')){ - raw_schema.definitions.each { definition -> - for (key in definition.keySet()){ - if (definition[key].get("properties").keySet().contains(ignore_param)){ - // Remove the param to ignore - definition[key].get("properties").remove(ignore_param) - // If the param was required, change this - if (definition[key].has("required")) { - def cleaned_required = removeElement(definition[key].required, ignore_param) - definition[key].put("required", cleaned_required) - } - } - } - } - } - if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { - raw_schema.get("properties").remove(ignore_param) - } - if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { - def cleaned_required = removeElement(raw_schema.required, ignore_param) - raw_schema.put("required", cleaned_required) - } - } - return raw_schema - } - - // - // Clean and check parameters relative to Nextflow native classes - // - private static Map cleanParameters(params) { - def new_params = params.getClass().newInstance(params) - for (p in params) { - // remove anything evaluating to false - if (!p['value']) { - new_params.remove(p.key) - } - // Cast MemoryUnit to String - if (p['value'].getClass() == nextflow.util.MemoryUnit) { - new_params.replace(p.key, p['value'].toString()) - } - // Cast Duration to String - if (p['value'].getClass() == nextflow.util.Duration) { - new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) - } - // Cast LinkedHashMap to String - if (p['value'].getClass() == LinkedHashMap) { - new_params.replace(p.key, p['value'].toString()) - } - } - return new_params - } - - // - // This function tries to read a JSON params file - // - private static LinkedHashMap paramsLoad(String json_schema) { - def params_map = new LinkedHashMap() - try { - params_map = paramsRead(json_schema) - } catch (Exception e) { - println "Could not read parameters settings from JSON. $e" - params_map = new LinkedHashMap() - } - return params_map - } - - // - // Method to actually read in JSON file using Groovy. - // Group (as Key), values are all parameters - // - Parameter1 as Key, Description as Value - // - Parameter2 as Key, Description as Value - // .... - // Group - // - - private static LinkedHashMap paramsRead(String json_schema) throws Exception { - def json = new File(json_schema).text - def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') - def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') - /* Tree looks like this in nf-core schema - * definitions <- this is what the first get('definitions') gets us - group 1 - title - description - properties - parameter 1 - type - description - parameter 2 - type - description - group 2 - title - description - properties - parameter 1 - type - description - * properties <- parameters can also be ungrouped, outside of definitions - parameter 1 - type - description - */ - - // Grouped params - def params_map = new LinkedHashMap() - schema_definitions.each { key, val -> - def Map group = schema_definitions."$key".properties // Gets the property object of the group - def title = schema_definitions."$key".title - def sub_params = new LinkedHashMap() - group.each { innerkey, value -> - sub_params.put(innerkey, value) - } - params_map.put(title, sub_params) - } - - // Ungrouped params - def ungrouped_params = new LinkedHashMap() - schema_properties.each { innerkey, value -> - ungrouped_params.put(innerkey, value) - } - params_map.put("Other parameters", ungrouped_params) - - return params_map - } - - // - // Get maximum number of characters across all parameter names - // - private static Integer paramsMaxChars(params_map) { - Integer max_chars = 0 - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (param.size() > max_chars) { - max_chars = param.size() - } - } - } - return max_chars - } -} diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 25a0a74a..408951ae 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -128,7 +128,7 @@ class NfcoreTemplate { def email_html = html_template.toString() // Render the sendmail template - def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] def sf = new File("$projectDir/assets/sendmail_template.txt") def sendmail_template = engine.createTemplate(sf).make(smail_fields) diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index f03cd0e1..b7ef134c 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -20,40 +20,11 @@ class WorkflowMain { " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" } - // - // Generate help string - // - public static String help(workflow, params) { - def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" - def help_string = '' - help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) - help_string += NfcoreSchema.paramsHelp(workflow, params, command) - help_string += '\n' + citation(workflow) + '\n' - help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) - return help_string - } - - // - // Generate parameter summary log string - // - public static String paramsSummaryLog(workflow, params) { - def summary_log = '' - summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) - summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) - summary_log += '\n' + citation(workflow) + '\n' - summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) - return summary_log - } // // Validate parameters and print summary to screen // public static void initialise(workflow, params, log) { - // Print help to screen if required - if (params.help) { - log.info help(workflow, params) - System.exit(0) - } // Print workflow version and exit on --version if (params.version) { @@ -62,14 +33,6 @@ class WorkflowMain { System.exit(0) } - // Print parameter summary log to screen - log.info paramsSummaryLog(workflow, params) - - // Validate workflow parameters via the JSON schema - if (params.validate_params) { - NfcoreSchema.validateParameters(workflow, params, log) - } - // Check that a -profile or Nextflow config has been provided to run the pipeline NfcoreTemplate.checkConfigProvided(workflow, log) diff --git a/lib/WorkflowRaredisease.groovy b/lib/WorkflowRaredisease.groovy index b4f3b6d8..99e5f500 100755 --- a/lib/WorkflowRaredisease.groovy +++ b/lib/WorkflowRaredisease.groovy @@ -11,12 +11,9 @@ class WorkflowRaredisease { // Check and validate parameters // public static void initialise(params, log) { - genomeExistsError(params, log) + genomeExistsError(params, log) - if (!params.fasta) { - Nextflow.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." - } } // @@ -46,15 +43,57 @@ class WorkflowRaredisease { return yaml_file_text } - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) { + // + // Generate methods description for MultiQC + // + + public static String toolCitationText(params) { + + // TODO Optionally add in-text citation tools to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text + } + + public static String toolBibliographyText(params) { + + // TODO Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text + } + + public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) { // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file def meta = [:] meta.workflow = run_workflow.toMap() meta["manifest_map"] = run_workflow.manifest.toMap() + // Pipeline DOI meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + //meta["tool_bibliography"] = toolBibliographyText(params) + + def methods_text = mqc_methods_yaml.text def engine = new SimpleTemplateEngine() diff --git a/main.nf b/main.nf index 93def192..fcce4cc7 100644 --- a/main.nf +++ b/main.nf @@ -22,29 +22,27 @@ params.fai = WorkflowMain.getGenomeAttribute(params, params.bwa = WorkflowMain.getGenomeAttribute(params, 'bwa') params.bwamem2 = WorkflowMain.getGenomeAttribute(params, 'bwamem2') params.call_interval = WorkflowMain.getGenomeAttribute(params, 'call_interval') +params.cadd_resources = WorkflowMain.getGenomeAttribute(params, 'cadd_resources') +params.gcnvcaller_model = WorkflowMain.getGenomeAttribute(params, 'gcnvcaller_model') +params.gens_interval_list = WorkflowMain.getGenomeAttribute(params, 'gens_interval_list') +params.gens_pon = WorkflowMain.getGenomeAttribute(params, 'gens_pon') +params.gens_gnomad_pos = WorkflowMain.getGenomeAttribute(params, 'gens_gnomad_pos') params.gnomad_af = WorkflowMain.getGenomeAttribute(params, 'gnomad_af') params.gnomad_af_idx = WorkflowMain.getGenomeAttribute(params, 'gnomad_af_idx') params.intervals_wgs = WorkflowMain.getGenomeAttribute(params, 'intervals_wgs') params.intervals_y = WorkflowMain.getGenomeAttribute(params, 'intervals_y') params.known_dbsnp = WorkflowMain.getGenomeAttribute(params, 'known_dbsnp') params.known_dbsnp_tbi = WorkflowMain.getGenomeAttribute(params, 'known_dbsnp_tbi') -params.known_indels = WorkflowMain.getGenomeAttribute(params, 'known_indels') -params.known_mills = WorkflowMain.getGenomeAttribute(params, 'known_mills') params.ml_model = WorkflowMain.getGenomeAttribute(params, 'ml_model') -params.mt_backchain_shift = WorkflowMain.getGenomeAttribute(params, 'mt_backchain_shift') -params.mt_bwa_index_shift = WorkflowMain.getGenomeAttribute(params, 'mt_bwa_index_shift') -params.mt_bwamem2_index_shift = WorkflowMain.getGenomeAttribute(params, 'mt_bwamem2_index_shift') -params.mt_fasta_shift = WorkflowMain.getGenomeAttribute(params, 'mt_fasta_shift') -params.mt_fai_shift = WorkflowMain.getGenomeAttribute(params, 'mt_fai_shift') -params.mt_intervals = WorkflowMain.getGenomeAttribute(params, 'mt_intervals') -params.mt_intervals_shift = WorkflowMain.getGenomeAttribute(params, 'mt_intervals_shift') -params.mt_sequence_dictionary_shift = WorkflowMain.getGenomeAttribute(params, 'mt_sequence_dictionary_shift') +params.mt_fasta = WorkflowMain.getGenomeAttribute(params, 'mt_fasta') +params.ploidy_model = WorkflowMain.getGenomeAttribute(params, 'ploidy_model') params.reduced_penetrance = WorkflowMain.getGenomeAttribute(params, 'reduced_penetrance') +params.readcount_intervals = WorkflowMain.getGenomeAttribute(params, 'readcount_intervals') params.sequence_dictionary = WorkflowMain.getGenomeAttribute(params, 'sequence_dictionary') params.score_config_snv = WorkflowMain.getGenomeAttribute(params, 'score_config_snv') params.score_config_sv = WorkflowMain.getGenomeAttribute(params, 'score_config_sv') -params.target_bed = WorkflowMain.getGenomeAttribute(params, 'target_bed') params.svdb_query_dbs = WorkflowMain.getGenomeAttribute(params, 'svdb_query_dbs') +params.target_bed = WorkflowMain.getGenomeAttribute(params, 'target_bed') params.variant_catalog = WorkflowMain.getGenomeAttribute(params, 'variant_catalog') params.vep_filters = WorkflowMain.getGenomeAttribute(params, 'vep_filters') params.vcfanno_resources = WorkflowMain.getGenomeAttribute(params, 'vcfanno_resources') @@ -52,9 +50,6 @@ params.vcfanno_toml = WorkflowMain.getGenomeAttribute(params, params.vcfanno_lua = WorkflowMain.getGenomeAttribute(params, 'vcfanno_lua') params.vep_cache = WorkflowMain.getGenomeAttribute(params, 'vep_cache') params.vep_cache_version = WorkflowMain.getGenomeAttribute(params, 'vep_cache_version') -params.gens_interval_list = WorkflowMain.getGenomeAttribute(params, 'gens_interval_list') -params.gens_pon = WorkflowMain.getGenomeAttribute(params, 'gens_pon') -params.gens_gnomad_pos = WorkflowMain.getGenomeAttribute(params, 'gens_gnomad_pos') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -62,6 +57,22 @@ params.gens_gnomad_pos = WorkflowMain.getGenomeAttribute(params, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +include { validateParameters; paramsHelp } from 'plugin/nf-validation' + +// Print help message if needed +if (params.help) { + def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) + def citation = '\n' + WorkflowMain.citation(workflow) + '\n' + def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" + log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) + System.exit(0) +} + +// Validate input parameters +if (params.validate_params) { + validateParameters() +} + WorkflowMain.initialise(workflow, params, log) /* diff --git a/modules.json b/modules.json index f7c5ca45..da9ac319 100644 --- a/modules.json +++ b/modules.json @@ -5,179 +5,229 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "bcftools/annotate": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, "bcftools/concat": { "branch": "master", - "git_sha": "582ff1755bdd205c65e2ba4c31e0a008dae299ec", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "bcftools/filter": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "bd4e0df3319c171072d09dade42e3c06fa373779", "installed_by": ["modules"] }, "bcftools/merge": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "f7219b428dc69f93aa19f219fb7ce8eae8720400", "installed_by": ["modules"] }, "bcftools/norm": { "branch": "master", - "git_sha": "bcad95fb35e567ad25840d3297c3e17eff211a3a", + "git_sha": "0435e4eebc94e53721c194b2d5d06f455a79e407", "installed_by": ["modules"] }, "bcftools/reheader": { "branch": "master", - "git_sha": "bd4b60c7f9358c7146ac198fd0c4ae6355ddd086", + "git_sha": "0435e4eebc94e53721c194b2d5d06f455a79e407", "installed_by": ["modules"] }, "bcftools/roh": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "bcftools/view": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "bwa/index": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "bwamem2/index": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "bwamem2/mem": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", + "installed_by": ["modules"] + }, + "cadd": { + "branch": "master", + "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", "installed_by": ["modules"] }, "cat/cat": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "chromograph": { + "branch": "master", + "git_sha": "aad210ba51500be029740d088b4b4827f6f41509", "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "76cc4938c1f6ea5c7d83fed1eeffc146787f9543", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "deepvariant": { "branch": "master", - "git_sha": "b9829e1064382745d8dff7f1d74d2138d2864f71", + "git_sha": "4b7d4863a5883b76e6bff13b6e52468fab090c5b", + "installed_by": ["modules"] + }, + "eklipse": { + "branch": "master", + "git_sha": "39656f68219340420f03bd54a68e111c86e107e6", "installed_by": ["modules"] }, "expansionhunter": { "branch": "master", - "git_sha": "5e4835b5798eaef33d23d9a2939f2ca9d3a07d4d", + "git_sha": "0260e5d22372eae434816d6970dedf3f5adc0053", "installed_by": ["modules"] }, "fastqc": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "bd8092b67b5103bdd52e300f75889442275c3117", "installed_by": ["modules"] }, "gatk4/bedtointervallist": { "branch": "master", - "git_sha": "643756685546fa61f5c8fba439af746c090b9180", + "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", + "installed_by": ["modules"] + }, + "gatk4/collectreadcounts": { + "branch": "master", + "git_sha": "d25bf48327e86a7f737047a57ec264b90e22ce3d", "installed_by": ["modules"] }, "gatk4/createsequencedictionary": { "branch": "master", - "git_sha": "643756685546fa61f5c8fba439af746c090b9180", + "git_sha": "541811d779026c5d395925895fa5ed35e7216cc0", + "installed_by": ["modules"] + }, + "gatk4/determinegermlinecontigploidy": { + "branch": "master", + "git_sha": "d25bf48327e86a7f737047a57ec264b90e22ce3d", "installed_by": ["modules"] }, "gatk4/filtermutectcalls": { "branch": "master", - "git_sha": "643756685546fa61f5c8fba439af746c090b9180", + "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", + "installed_by": ["modules"] + }, + "gatk4/germlinecnvcaller": { + "branch": "master", + "git_sha": "f6b848c6e1af9a9ecf4975aa8c8edad05e75e784", "installed_by": ["modules"] }, "gatk4/intervallisttools": { "branch": "master", - "git_sha": "643756685546fa61f5c8fba439af746c090b9180", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "gatk4/mergebamalignment": { "branch": "master", - "git_sha": "643756685546fa61f5c8fba439af746c090b9180", + "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", "installed_by": ["modules"] }, "gatk4/mergevcfs": { "branch": "master", - "git_sha": "643756685546fa61f5c8fba439af746c090b9180", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "gatk4/mutect2": { "branch": "master", - "git_sha": "643756685546fa61f5c8fba439af746c090b9180", + "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", + "installed_by": ["modules"] + }, + "gatk4/postprocessgermlinecnvcalls": { + "branch": "master", + "git_sha": "39ca55cc30514169f8420162bafe4ecf673f4b9a", + "installed_by": ["modules"] + }, + "gatk4/preprocessintervals": { + "branch": "master", + "git_sha": "1226419498a14d17f98d12d6488d333b0dbd0418", "installed_by": ["modules"] }, "gatk4/printreads": { "branch": "master", - "git_sha": "643756685546fa61f5c8fba439af746c090b9180", + "git_sha": "541811d779026c5d395925895fa5ed35e7216cc0", "installed_by": ["modules"] }, "gatk4/revertsam": { "branch": "master", - "git_sha": "643756685546fa61f5c8fba439af746c090b9180", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "gatk4/samtofastq": { "branch": "master", - "git_sha": "643756685546fa61f5c8fba439af746c090b9180", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "gatk4/selectvariants": { "branch": "master", - "git_sha": "643756685546fa61f5c8fba439af746c090b9180", + "git_sha": "541811d779026c5d395925895fa5ed35e7216cc0", + "installed_by": ["modules"] + }, + "gatk4/shiftfasta": { + "branch": "master", + "git_sha": "541811d779026c5d395925895fa5ed35e7216cc0", "installed_by": ["modules"] }, "gatk4/splitintervals": { "branch": "master", - "git_sha": "643756685546fa61f5c8fba439af746c090b9180", + "git_sha": "541811d779026c5d395925895fa5ed35e7216cc0", "installed_by": ["modules"] }, "gatk4/variantfiltration": { "branch": "master", - "git_sha": "643756685546fa61f5c8fba439af746c090b9180", + "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", "installed_by": ["modules"] }, "genmod/annotate": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "genmod/compound": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "genmod/models": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "genmod/score": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "glnexus": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "haplocheck": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "haplogrep2/classify": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "hmtnote/annotate": { @@ -187,152 +237,162 @@ }, "manta/germline": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "80dbd95c558a0ebb2123d95f50c093a7f714a0d7", "installed_by": ["modules"] }, "mosdepth": { "branch": "master", - "git_sha": "783cc040350dbee673fd57f6a6300aea3d085b7c", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "f2d63bd5b68925f98f572eed70993d205cc694b7", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "peddy": { "branch": "master", - "git_sha": "21e6e085967902fb393b27b2e7590ac4c85fab8e", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "picard/addorreplacereadgroups": { "branch": "master", - "git_sha": "28995552268a117551ded48dadcf42b0caf0e834", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "picard/collecthsmetrics": { "branch": "master", - "git_sha": "75027bf77472b1f4fd2cdd7e46f83119dfb0f2c6", + "git_sha": "0ce3ab0ac301f160225b22254fa238478b4389f2", "installed_by": ["modules"] }, "picard/collectmultiplemetrics": { "branch": "master", - "git_sha": "75027bf77472b1f4fd2cdd7e46f83119dfb0f2c6", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "picard/collectwgsmetrics": { "branch": "master", - "git_sha": "75027bf77472b1f4fd2cdd7e46f83119dfb0f2c6", + "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", "installed_by": ["modules"] }, "picard/liftovervcf": { "branch": "master", - "git_sha": "75027bf77472b1f4fd2cdd7e46f83119dfb0f2c6", + "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", "installed_by": ["modules"] }, "picard/markduplicates": { "branch": "master", - "git_sha": "75027bf77472b1f4fd2cdd7e46f83119dfb0f2c6", + "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", "installed_by": ["modules"] }, "picard/renamesampleinvcf": { "branch": "master", - "git_sha": "75027bf77472b1f4fd2cdd7e46f83119dfb0f2c6", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "picard/sortvcf": { "branch": "master", - "git_sha": "75027bf77472b1f4fd2cdd7e46f83119dfb0f2c6", + "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", "installed_by": ["modules"] }, "qualimap/bamqc": { "branch": "master", - "git_sha": "75027bf77472b1f4fd2cdd7e46f83119dfb0f2c6", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "rhocall/annotate": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "git_sha": "d73505dd68b27b53b4002e84eea21a2819907562", "installed_by": ["modules"] }, "samtools/faidx": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe", "installed_by": ["modules"] }, "samtools/index": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "samtools/merge": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", "installed_by": ["modules"] }, "samtools/sort": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "git_sha": "a0f7be95788366c1923171e358da7d049eb440f9", "installed_by": ["modules"] }, "samtools/stats": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", + "installed_by": ["modules"] + }, + "samtools/view": { + "branch": "master", + "git_sha": "3ffae3598260a99e8db3207dead9f73f87f90d1f", "installed_by": ["modules"] }, "smncopynumbercaller": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "stranger": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "0260e5d22372eae434816d6970dedf3f5adc0053", "installed_by": ["modules"] }, "svdb/merge": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", "installed_by": ["modules"] }, "svdb/query": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "tabix/bgziptabix": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "591b71642820933dcb3c954c934b397bd00d8e5e", "installed_by": ["modules"] }, "tabix/tabix": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "tiddit/cov": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", "installed_by": ["modules"] }, "tiddit/sv": { "branch": "master", - "git_sha": "0367c23758d83fc6973a8cd35ecba40a0cfcf2af", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "ucsc/wigtobigwig": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "git_sha": "66290981ab6038ea86177ade40b9449bc790b0ce", "installed_by": ["modules"] }, "untar": { "branch": "master", - "git_sha": "b9829e1064382745d8dff7f1d74d2138d2864f71", + "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", + "installed_by": ["modules"] + }, + "upd": { + "branch": "master", + "git_sha": "9b159849d74f0eef251168c81c16da08215bbad5", "installed_by": ["modules"] }, "vcfanno": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] } } diff --git a/modules/local/create_bed_from_fai.nf b/modules/local/create_bed_from_fai.nf index ed2a35d2..6b96b6ae 100644 --- a/modules/local/create_bed_from_fai.nf +++ b/modules/local/create_bed_from_fai.nf @@ -5,7 +5,7 @@ process BUILD_BED { conda "anaconda::gawk=5.1.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gawk:5.1.0' : - 'quay.io/biocontainers/gawk:5.1.0' }" + 'biocontainers/gawk:5.1.0' }" input: tuple val(meta), path(fasta_fai) diff --git a/modules/local/ensemblvep/main.nf b/modules/local/ensemblvep/main.nf index 11ef4651..81d4191f 100644 --- a/modules/local/ensemblvep/main.nf +++ b/modules/local/ensemblvep/main.nf @@ -11,11 +11,11 @@ process ENSEMBLVEP { input: tuple val(meta), path(vcf) + tuple val(meta2), path(fasta) val genome val species val cache_version path cache - path fasta path extra_files output: diff --git a/modules/local/get_chrom_sizes.nf b/modules/local/get_chrom_sizes.nf index d5dc0a76..4ab80ed1 100644 --- a/modules/local/get_chrom_sizes.nf +++ b/modules/local/get_chrom_sizes.nf @@ -5,7 +5,7 @@ process GET_CHROM_SIZES { conda "conda-forge::coreutils=8.31" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gnu-wget:1.18--0' : - 'quay.io/biocontainers/gnu-wget:1.18--0' }" + 'biocontainers/gnu-wget:1.18--0' }" input: tuple val(meta), path(fai) diff --git a/modules/local/mt_deletion_script.nf b/modules/local/mt_deletion_script.nf new file mode 100644 index 00000000..02d55876 --- /dev/null +++ b/modules/local/mt_deletion_script.nf @@ -0,0 +1,48 @@ +process MT_DELETION { + tag "$meta.id" + label 'process_single' + + conda "bioconda::samtools=1.17" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" + + input: + tuple val(meta), path(input), path(input_index) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path('*.txt'), emit: mt_del_result + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + + """ + samtools stats --threads ${task.cpus} $args ${reference} ${input} | \\ + grep -E ^IS | \\ + awk 'BEGIN {sum=0} (\$2>=1200 && \$2<=15000) {sum=sum+\$3} (\$2<1200 || \$2>15000) {sum_norm=sum_norm+\$3} END \\ + {print "intermediate discordant ", sum, "normal ", sum_norm, "ratio ppk", sum*1000/(sum_norm+sum)}' 1> ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_mt_del.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/sentieon/bqsr.nf b/modules/local/sentieon/bqsr.nf index dc659871..390108e1 100644 --- a/modules/local/sentieon/bqsr.nf +++ b/modules/local/sentieon/bqsr.nf @@ -7,10 +7,10 @@ process SENTIEON_BQSR { input: tuple val(meta), path(bam), path(bai) - path fasta - path fai - tuple val(meta2), path(known_dbsnp) - tuple val(meta3), path(known_dbsnp_tbi) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(known_dbsnp) + tuple val(meta5), path(known_dbsnp_tbi) output: tuple val(meta), path('*.bam') , emit: bam @@ -24,12 +24,12 @@ process SENTIEON_BQSR { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - def args3 = task.ext.args3 ?: '' - def input = bam.sort().collect{"-i $it"}.join(' ') - def dbsnp = known_dbsnp ? "-k $known_dbsnp" : '' + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def dbsnp = known_dbsnp ? "-k $known_dbsnp" : '' def prefix = task.ext.prefix ?: "${meta.id}" + def input = bam.sort().collect{"-i $it"}.join(' ') """ if [ \${SENTIEON_LICENSE_BASE64:-"unset"} != "unset" ]; then echo "Initializing SENTIEON_LICENSE env variable" diff --git a/modules/local/sentieon/bwamem.nf b/modules/local/sentieon/bwamem.nf index c6d95089..60ca36d6 100644 --- a/modules/local/sentieon/bwamem.nf +++ b/modules/local/sentieon/bwamem.nf @@ -7,9 +7,9 @@ process SENTIEON_BWAMEM { input: tuple val(meta), path(reads) - path fasta - path fai - tuple val(meta2), path(index) // meta2 has same purpose as meta, and holds information about the genome/index + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(index) output: tuple val(meta), path('*.bam'), emit: bam @@ -20,8 +20,8 @@ process SENTIEON_BWAMEM { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'` diff --git a/modules/local/sentieon/bwamemindex.nf b/modules/local/sentieon/bwamemindex.nf index 09d3eb1a..4b030975 100644 --- a/modules/local/sentieon/bwamemindex.nf +++ b/modules/local/sentieon/bwamemindex.nf @@ -16,7 +16,7 @@ process SENTIEON_BWAINDEX { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' def prefix = task.ext.prefix ? "bwa/${task.ext.prefix}" : "bwa/${fasta.baseName}" """ mkdir bwa diff --git a/modules/local/sentieon/datametrics.nf b/modules/local/sentieon/datametrics.nf index 498338f1..37ca6312 100644 --- a/modules/local/sentieon/datametrics.nf +++ b/modules/local/sentieon/datametrics.nf @@ -7,8 +7,8 @@ process SENTIEON_DATAMETRICS { input: tuple val(meta), path(bam), path(bai) - path fasta - path fai + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) output: tuple val(meta), path('*mq_metrics.txt') , emit: mq_metrics @@ -23,9 +23,9 @@ process SENTIEON_DATAMETRICS { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" def input = bam.sort().collect{"-i $it"}.join(' ') - def prefix = task.ext.prefix ?: "${meta.id}" """ if [ \${SENTIEON_LICENSE_BASE64:-"unset"} != "unset" ]; then echo "Initializing SENTIEON_LICENSE env variable" diff --git a/modules/local/sentieon/dedup.nf b/modules/local/sentieon/dedup.nf index 6730e7b5..bb738985 100644 --- a/modules/local/sentieon/dedup.nf +++ b/modules/local/sentieon/dedup.nf @@ -7,8 +7,8 @@ process SENTIEON_DEDUP { input: tuple val(meta), path(bam), path(bai), path(score), path(score_idx) - path fasta - path fai + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) output: tuple val(meta), path('*.bam') , emit: bam @@ -20,9 +20,9 @@ process SENTIEON_DEDUP { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def input = bam.sort().collect{"-i $it"}.join(' ') + def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def input = bam.sort().collect{"-i $it"}.join(' ') """ if [ \${SENTIEON_LICENSE_BASE64:-"unset"} != "unset" ]; then echo "Initializing SENTIEON_LICENSE env variable" diff --git a/modules/local/sentieon/dnamodelapply.nf b/modules/local/sentieon/dnamodelapply.nf index d2aaebff..32582b2c 100644 --- a/modules/local/sentieon/dnamodelapply.nf +++ b/modules/local/sentieon/dnamodelapply.nf @@ -5,8 +5,8 @@ process SENTIEON_DNAMODELAPPLY { input: tuple val(meta), path(vcf), path(vcf_idx) - path fasta - path fai + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) path ml_model output: diff --git a/modules/local/sentieon/dnascope.nf b/modules/local/sentieon/dnascope.nf index f9896c76..d03fe2d4 100644 --- a/modules/local/sentieon/dnascope.nf +++ b/modules/local/sentieon/dnascope.nf @@ -5,10 +5,10 @@ process SENTIEON_DNASCOPE { input: tuple val(meta), path(bam), path(bai) - path fasta - path fai - tuple val(meta2), path(known_dbsnp) - tuple val(meta3), path(known_dbsnp_tbi) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(known_dbsnp) + tuple val(meta5), path(known_dbsnp_tbi) path call_interval path ml_model @@ -22,11 +22,11 @@ process SENTIEON_DNASCOPE { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - def interval = call_interval ? "--interval ${call_interval}" : '' - def dbsnp = known_dbsnp ? "-d ${known_dbsnp}" : '' - def model = ml_model ? "--model ${ml_model}" : '' + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def interval = call_interval ? "--interval ${call_interval}" : '' + def dbsnp = known_dbsnp ? "-d ${known_dbsnp}" : '' + def model = ml_model ? "--model ${ml_model}" : '' def prefix = task.ext.prefix ?: "${meta.id}" """ diff --git a/modules/local/sentieon/locuscollector.nf b/modules/local/sentieon/locuscollector.nf index fa54756d..9335b0ec 100644 --- a/modules/local/sentieon/locuscollector.nf +++ b/modules/local/sentieon/locuscollector.nf @@ -17,7 +17,7 @@ process SENTIEON_LOCUSCOLLECTOR { task.ext.when == null || task.ext.when script: - def input = bam.sort().collect{"-i $it"}.join(' ') + def input = bam.sort().collect{"-i $it"}.join(' ') def prefix = task.ext.prefix ? "${task.ext.prefix}.txt.gz" : "${meta.id}.txt.gz" """ if [ \${SENTIEON_LICENSE_BASE64:-"unset"} != "unset" ]; then diff --git a/modules/local/sentieon/readwriter.nf b/modules/local/sentieon/readwriter.nf index 5490dd75..cc90fd25 100644 --- a/modules/local/sentieon/readwriter.nf +++ b/modules/local/sentieon/readwriter.nf @@ -18,7 +18,7 @@ process SENTIEON_READWRITER { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' def input = bam.sort().collect{"-i $it"}.join(' ') def prefix = task.ext.prefix ?: "${meta.id}" """ diff --git a/modules/local/sentieon/tnscope.nf b/modules/local/sentieon/tnscope.nf index 7cfc1c06..c4857dc6 100644 --- a/modules/local/sentieon/tnscope.nf +++ b/modules/local/sentieon/tnscope.nf @@ -5,8 +5,8 @@ process SENTIEON_TNSCOPE { input: tuple val(meta), path(bam), path(bai) - path fasta - path fai + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) output: tuple val(meta), path("*vcf.gz") , emit: vcf diff --git a/modules/local/sentieon/wgsmetricsalgo.nf b/modules/local/sentieon/wgsmetricsalgo.nf index 12348b1f..3663947d 100644 --- a/modules/local/sentieon/wgsmetricsalgo.nf +++ b/modules/local/sentieon/wgsmetricsalgo.nf @@ -8,7 +8,7 @@ process SENTIEON_WGSMETRICSALGO { input: tuple val(meta), path(bam), path(bai) tuple val(meta2), path(fasta) - tuple val(meta2), path(fai) + tuple val(meta3), path(fai) path intervals_list output: diff --git a/modules/nf-core/bcftools/annotate/main.nf b/modules/nf-core/bcftools/annotate/main.nf new file mode 100644 index 00000000..49eec2e8 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/main.nf @@ -0,0 +1,63 @@ +process BCFTOOLS_ANNOTATE { + tag "$meta.id" + label 'process_low' + + conda "bioconda::bcftools=1.17" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.17--haef29d1_0': + 'biocontainers/bcftools:1.17--haef29d1_0' }" + + input: + tuple val(meta), path(input), path(index), path(annotations), path(annotations_index), path(header_lines) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def header_file = header_lines ? "--header-lines ${header_lines}" : '' + def annotations_file = annotations ? "--annotations ${annotations}" : '' + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + if ("$input" == "${prefix}.${extension}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + """ + bcftools \\ + annotate \\ + $args \\ + $annotations_file \\ + $header_file \\ + --output ${prefix}.${extension} \\ + --threads $task.cpus \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$( bcftools --version |& sed '1!d; s/^.*bcftools //' ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$( bcftools --version |& sed '1!d; s/^.*bcftools //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/annotate/meta.yml b/modules/nf-core/bcftools/annotate/meta.yml new file mode 100644 index 00000000..60f053ea --- /dev/null +++ b/modules/nf-core/bcftools/annotate/meta.yml @@ -0,0 +1,56 @@ +name: bcftools_annotate +description: Add or remove annotations. +keywords: + - bcftools + - annotate + - vcf + - remove + - add +tools: + - annotate: + description: Add or remove annotations. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: https://samtools.github.io/bcftools/bcftools.html#annotate + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: Query VCF or BCF file, can be either uncompressed or compressed + - index: + type: file + description: Index of the query VCF or BCF file + - annotations: + type: file + description: Bgzip-compressed file with annotations + - annotations_index: + type: file + description: Index of the annotations file + - header_lines: + type: file + description: Contains lines to append to the output VCF header + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: Compressed annotated VCF file + pattern: "*{vcf,vcf.gz,bcf,bcf.gz}" + +authors: + - "@projectoriented" + - "@ramprasadn" diff --git a/modules/nf-core/bcftools/concat/main.nf b/modules/nf-core/bcftools/concat/main.nf index c7c39d9f..244a42cc 100644 --- a/modules/nf-core/bcftools/concat/main.nf +++ b/modules/nf-core/bcftools/concat/main.nf @@ -2,10 +2,10 @@ process BCFTOOLS_CONCAT { tag "$meta.id" label 'process_medium' - conda "bioconda::bcftools=1.16" + conda "bioconda::bcftools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.16--hfe4b78e_1': - 'quay.io/biocontainers/bcftools:1.16--hfe4b78e_1' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.17--haef29d1_0': + 'biocontainers/bcftools:1.17--haef29d1_0' }" input: tuple val(meta), path(vcfs), path(tbi) diff --git a/modules/nf-core/bcftools/filter/main.nf b/modules/nf-core/bcftools/filter/main.nf index 4e02009d..099eedc7 100644 --- a/modules/nf-core/bcftools/filter/main.nf +++ b/modules/nf-core/bcftools/filter/main.nf @@ -2,17 +2,17 @@ process BCFTOOLS_FILTER { tag "$meta.id" label 'process_medium' - conda "bioconda::bcftools=1.16" + conda "bioconda::bcftools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.16--hfe4b78e_1': - 'quay.io/biocontainers/bcftools:1.16--hfe4b78e_1' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.17--haef29d1_0': + 'biocontainers/bcftools:1.17--haef29d1_0' }" input: tuple val(meta), path(vcf) output: - tuple val(meta), path("*.gz"), emit: vcf - path "versions.yml" , emit: versions + tuple val(meta), path("*.${extension}"), emit: vcf + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -20,9 +20,19 @@ process BCFTOOLS_FILTER { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + + extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + + if ("$vcf" == "${prefix}.${extension}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + """ bcftools filter \\ - --output ${prefix}.vcf.gz \\ + --output ${prefix}.${extension} \\ + --threads ${task.cpus} \\ $args \\ $vcf @@ -35,8 +45,16 @@ process BCFTOOLS_FILTER { stub: def prefix = task.ext.prefix ?: "${meta.id}" + extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + + if ("$vcf" == "${prefix}.${extension}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + """ - touch ${prefix}.vcf.gz + touch ${prefix}.${extension} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/bcftools/merge/main.nf b/modules/nf-core/bcftools/merge/main.nf index e664f0eb..eec740ed 100644 --- a/modules/nf-core/bcftools/merge/main.nf +++ b/modules/nf-core/bcftools/merge/main.nf @@ -2,16 +2,16 @@ process BCFTOOLS_MERGE { tag "$meta.id" label 'process_medium' - conda "bioconda::bcftools=1.16" + conda "bioconda::bcftools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.16--hfe4b78e_1': - 'quay.io/biocontainers/bcftools:1.16--hfe4b78e_1' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.17--haef29d1_0': + 'biocontainers/bcftools:1.17--haef29d1_0' }" input: tuple val(meta), path(vcfs), path(tbis) - path bed - path fasta - path fasta_fai + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + path(bed) output: tuple val(meta), path("*.{bcf,vcf}{,.gz}"), emit: merged_variants @@ -27,16 +27,17 @@ process BCFTOOLS_MERGE { def regions = bed ? "--regions-file $bed" : "" def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : - "vcf.gz" + "vcf" """ bcftools merge \\ + $args \\ $regions \\ --threads $task.cpus \\ --output ${prefix}.${extension} \\ - $args \\ - *.vcf.gz + $vcfs cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -49,8 +50,9 @@ process BCFTOOLS_MERGE { def prefix = task.ext.prefix ?: "${meta.id}" def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : - "vcf.gz" + "vcf" """ touch ${prefix}.${extension} diff --git a/modules/nf-core/bcftools/merge/meta.yml b/modules/nf-core/bcftools/merge/meta.yml index 53dc23eb..7bbe5216 100644 --- a/modules/nf-core/bcftools/merge/meta.yml +++ b/modules/nf-core/bcftools/merge/meta.yml @@ -19,27 +19,37 @@ input: Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - vcfs: - type: files + type: file description: | List containing 2 or more vcf files e.g. [ 'file1.vcf', 'file2.vcf' ] - tbis: - type: files + type: file description: | List containing the tbi index files corresponding to the vcfs input files e.g. [ 'file1.vcf.tbi', 'file2.vcf.tbi' ] - - bed: - type: file - description: "(Optional) The bed regions to merge on" - pattern: "*.bed" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: type: file description: "(Optional) The fasta reference file (only necessary for the `--gvcf FILE` parameter)" pattern: "*.{fasta,fa}" - - fasta: + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: type: file description: "(Optional) The fasta reference file index (only necessary for the `--gvcf FILE` parameter)" pattern: "*.fai" + - bed: + type: file + description: "(Optional) The bed regions to merge on" + pattern: "*.bed" output: - meta: type: map @@ -70,3 +80,4 @@ authors: - "@joseespinosa" - "@drpatelh" - "@nvnieuwk" + - "@ramprasadn" diff --git a/modules/nf-core/bcftools/norm/main.nf b/modules/nf-core/bcftools/norm/main.nf index 90387d6c..608f20a1 100644 --- a/modules/nf-core/bcftools/norm/main.nf +++ b/modules/nf-core/bcftools/norm/main.nf @@ -2,14 +2,14 @@ process BCFTOOLS_NORM { tag "$meta.id" label 'process_medium' - conda "bioconda::bcftools=1.16" + conda "bioconda::bcftools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.16--hfe4b78e_1': - 'quay.io/biocontainers/bcftools:1.16--hfe4b78e_1' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.17--haef29d1_0': + 'biocontainers/bcftools:1.17--haef29d1_0' }" input: tuple val(meta), path(vcf), path(tbi) - path(fasta) + tuple val(meta2), path(fasta) output: tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}") , emit: vcf diff --git a/modules/nf-core/bcftools/norm/meta.yml b/modules/nf-core/bcftools/norm/meta.yml index c3ea2c03..33ebea36 100644 --- a/modules/nf-core/bcftools/norm/meta.yml +++ b/modules/nf-core/bcftools/norm/meta.yml @@ -30,6 +30,11 @@ input: description: | An optional index of the VCF file (for when the VCF is compressed) pattern: "*.vcf.gz.tbi" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: type: file description: FASTA reference file @@ -50,3 +55,4 @@ output: pattern: "versions.yml" authors: - "@abhi18av" + - "@ramprasadn" diff --git a/modules/nf-core/bcftools/reheader/main.nf b/modules/nf-core/bcftools/reheader/main.nf index 57634c07..28d567a2 100644 --- a/modules/nf-core/bcftools/reheader/main.nf +++ b/modules/nf-core/bcftools/reheader/main.nf @@ -2,14 +2,14 @@ process BCFTOOLS_REHEADER { tag "$meta.id" label 'process_low' - conda "bioconda::bcftools=1.16" + conda "bioconda::bcftools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.16--hfe4b78e_1': - 'quay.io/biocontainers/bcftools:1.16--hfe4b78e_1' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.17--haef29d1_0': + 'biocontainers/bcftools:1.17--haef29d1_0' }" input: tuple val(meta), path(vcf), path(header) - path fai + tuple val(meta2), path(fai) output: tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf diff --git a/modules/nf-core/bcftools/reheader/meta.yml b/modules/nf-core/bcftools/reheader/meta.yml index 44d75fdf..60704ab4 100644 --- a/modules/nf-core/bcftools/reheader/meta.yml +++ b/modules/nf-core/bcftools/reheader/meta.yml @@ -27,6 +27,11 @@ input: type: file description: New header to add to the VCF pattern: "*.{header.txt}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fai: type: file description: Fasta index to update header sequences with @@ -50,3 +55,4 @@ output: authors: - "@bjohnnyd" - "@jemten" + - "@ramprasadn" diff --git a/modules/nf-core/bcftools/roh/main.nf b/modules/nf-core/bcftools/roh/main.nf index dc516b02..d8a8bc79 100644 --- a/modules/nf-core/bcftools/roh/main.nf +++ b/modules/nf-core/bcftools/roh/main.nf @@ -2,10 +2,10 @@ process BCFTOOLS_ROH { tag "$meta.id" label 'process_medium' - conda "bioconda::bcftools=1.16" + conda "bioconda::bcftools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.16--hfe4b78e_1': - 'quay.io/biocontainers/bcftools:1.16--hfe4b78e_1' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.17--haef29d1_0': + 'biocontainers/bcftools:1.17--haef29d1_0' }" input: tuple val(meta), path(vcf), path(tbi) diff --git a/modules/nf-core/bcftools/view/main.nf b/modules/nf-core/bcftools/view/main.nf index 04ced9c9..86f807d3 100644 --- a/modules/nf-core/bcftools/view/main.nf +++ b/modules/nf-core/bcftools/view/main.nf @@ -2,10 +2,10 @@ process BCFTOOLS_VIEW { tag "$meta.id" label 'process_medium' - conda "bioconda::bcftools=1.16" + conda "bioconda::bcftools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.16--hfe4b78e_1': - 'quay.io/biocontainers/bcftools:1.16--hfe4b78e_1' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.17--haef29d1_0': + 'biocontainers/bcftools:1.17--haef29d1_0' }" input: tuple val(meta), path(vcf), path(index) diff --git a/modules/nf-core/bwa/index/main.nf b/modules/nf-core/bwa/index/main.nf index 7ccf3110..8d2e56d9 100644 --- a/modules/nf-core/bwa/index/main.nf +++ b/modules/nf-core/bwa/index/main.nf @@ -5,7 +5,7 @@ process BWA_INDEX { conda "bioconda::bwa=0.7.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bwa:0.7.17--hed695b0_7' : - 'quay.io/biocontainers/bwa:0.7.17--hed695b0_7' }" + 'biocontainers/bwa:0.7.17--hed695b0_7' }" input: tuple val(meta), path(fasta) diff --git a/modules/nf-core/bwamem2/index/main.nf b/modules/nf-core/bwamem2/index/main.nf index a236121b..30940852 100644 --- a/modules/nf-core/bwamem2/index/main.nf +++ b/modules/nf-core/bwamem2/index/main.nf @@ -5,7 +5,7 @@ process BWAMEM2_INDEX { conda "bioconda::bwa-mem2=2.2.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bwa-mem2:2.2.1--he513fc3_0' : - 'quay.io/biocontainers/bwa-mem2:2.2.1--he513fc3_0' }" + 'biocontainers/bwa-mem2:2.2.1--he513fc3_0' }" input: tuple val(meta), path(fasta) diff --git a/modules/nf-core/bwamem2/mem/main.nf b/modules/nf-core/bwamem2/mem/main.nf index 489b1704..d427dea3 100644 --- a/modules/nf-core/bwamem2/mem/main.nf +++ b/modules/nf-core/bwamem2/mem/main.nf @@ -5,7 +5,7 @@ process BWAMEM2_MEM { conda "bioconda::bwa-mem2=2.2.1 bioconda::samtools=1.16.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2cdf6bf1e92acbeb9b2834b1c58754167173a410-0' : - 'quay.io/biocontainers/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2cdf6bf1e92acbeb9b2834b1c58754167173a410-0' }" + 'biocontainers/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2cdf6bf1e92acbeb9b2834b1c58754167173a410-0' }" input: tuple val(meta), path(reads) diff --git a/modules/nf-core/bwamem2/mem/meta.yml b/modules/nf-core/bwamem2/mem/meta.yml index a4655510..bc3dfcdd 100644 --- a/modules/nf-core/bwamem2/mem/meta.yml +++ b/modules/nf-core/bwamem2/mem/meta.yml @@ -28,6 +28,11 @@ input: description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. + - meta2: + type: map + description: | + Groovy Map containing reference/index information + e.g. [ id:'test' ] - index: type: file description: BWA genome index files diff --git a/modules/nf-core/cadd/main.nf b/modules/nf-core/cadd/main.nf new file mode 100644 index 00000000..0f644811 --- /dev/null +++ b/modules/nf-core/cadd/main.nf @@ -0,0 +1,55 @@ +process CADD { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::cadd-scripts=1.6 anaconda::conda=4.14.0 conda-forge::mamba=1.4.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-8d145e7b16a8ca4bf920e6ca464763df6f0a56a2:d4e457a2edecb2b10e915c01d8f46e29e236b648-0': + 'biocontainers/mulled-v2-8d145e7b16a8ca4bf920e6ca464763df6f0a56a2:d4e457a2edecb2b10e915c01d8f46e29e236b648-0' }" + + containerOptions { + (workflow.containerEngine == 'singularity') ? + "--writable -B ${annotation_dir}:/usr/local/share/cadd-scripts-1.6-1/data/annotations" : + "--privileged -v ${annotation_dir}:/usr/local/share/cadd-scripts-1.6-1/data/annotations" + } + + input: + tuple val(meta), path(vcf) + path(annotation_dir) + + output: + tuple val(meta), path("*.tsv.gz"), emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = "1.6" // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + """ + cadd.sh \\ + -o ${prefix}.tsv.gz \\ + $args \\ + $vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cadd: $VERSION + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = "1.6" // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + """ + touch ${prefix}.tsv.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cadd: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/cadd/meta.yml b/modules/nf-core/cadd/meta.yml new file mode 100644 index 00000000..b54f5951 --- /dev/null +++ b/modules/nf-core/cadd/meta.yml @@ -0,0 +1,49 @@ +name: "cadd" +description: CADD is a tool for scoring the deleteriousness of single nucleotide variants as well as insertion/deletions variants in the human genome. +keywords: + - cadd + - annotate + - variants +tools: + - "cadd": + description: "CADD scripts release for offline scoring" + homepage: "https://cadd.gs.washington.edu/" + documentation: "https://github.com/kircherlab/CADD-scripts/blob/master/README.md" + tool_dev_url: "https://github.com/kircherlab/CADD-scripts/" + doi: "10.1093/nar/gky1016" + licence: "['Restricted. Free for non-commercial users.']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: Input file for annotation in vcf or vcf.gz format + pattern: "*.{vcf,vcf.gz}" + - annotation_dir: + type: file + description: | + Path to folder containing the vcf files with precomputed CADD scores. + This folder contains the uncompressed files that would otherwise be in data/annotation folder as described in https://github.com/kircherlab/CADD-scripts/#manual-installation. + pattern: "*.{vcf,vcf.gz}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - tsv: + type: file + description: Annotated tsv file + pattern: "*.{tsv,tsv.gz}" + +authors: + - "@ramprasadn" diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf index 840af4b9..9f062219 100644 --- a/modules/nf-core/cat/cat/main.nf +++ b/modules/nf-core/cat/cat/main.nf @@ -5,7 +5,7 @@ process CAT_CAT { conda "conda-forge::pigz=2.3.4" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/pigz:2.3.4' : - 'quay.io/biocontainers/pigz:2.3.4' }" + 'biocontainers/pigz:2.3.4' }" input: tuple val(meta), path(files_in) diff --git a/modules/nf-core/chromograph/main.nf b/modules/nf-core/chromograph/main.nf new file mode 100644 index 00000000..9049dbfa --- /dev/null +++ b/modules/nf-core/chromograph/main.nf @@ -0,0 +1,95 @@ +process CHROMOGRAPH { + tag "$meta.id" + label 'process_single' + + conda "bioconda::chromograph=1.3.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/chromograph:1.3.1--pyhdfd78af_1': + 'biocontainers/chromograph:1.3.1--pyhdfd78af_1' }" + + input: + tuple val(meta), path(autozyg) + tuple val(meta2), path(coverage) + tuple val(meta3), path(exome) + tuple val(meta4), path(fracsnp) + tuple val(meta5), path(ideogram) + tuple val(meta6), path(regions) + tuple val(meta7), path(sites) + + output: + tuple val(meta), path("${prefix}"), emit: plots + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def autozyg_param = autozyg ? "--autozyg ${autozyg}" : '' + def coverage_param = coverage ? "--coverage ${coverage}" : '' + def exome_param = exome ? "--exom ${exome}" : '' + def fracsnp_param = fracsnp ? "--fracsnp ${fracsnp}" : '' + def ideogram_param = ideogram ? "--ideogram ${ideogram}" : '' + def regions_param = regions ? "--regions ${regions}" : '' + def sites_param = sites ? "--sites ${sites}" : '' + + if (autozyg) { + prefix = task.ext.prefix ?: "${meta.id}" + } else if (coverage) { + prefix = task.ext.prefix ?: "${meta2.id}" + } else if (exome) { + prefix = task.ext.prefix ?: "${meta3.id}" + } else if (fracsnp) { + prefix = task.ext.prefix ?: "${meta4.id}" + } else if (ideogram) { + prefix = task.ext.prefix ?: "${meta5.id}" + } else if (regions) { + prefix = task.ext.prefix ?: "${meta6.id}" + } else { + prefix = task.ext.prefix ?: "${meta7.id}" + } + """ + chromograph \\ + $args \\ + $autozyg_param \\ + $coverage_param \\ + $exome_param \\ + $fracsnp_param \\ + $ideogram_param \\ + $regions_param \\ + $sites_param \\ + --outd ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + chromograph: \$(echo \$(chromograph --version 2>&1) | sed 's/chromograph //' ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + + if (autozyg) { + prefix = task.ext.prefix ?: "${meta.id}" + } else if (coverage) { + prefix = task.ext.prefix ?: "${meta2.id}" + } else if (exome) { + prefix = task.ext.prefix ?: "${meta3.id}" + } else if (fracsnp) { + prefix = task.ext.prefix ?: "${meta4.id}" + } else if (ideogram) { + prefix = task.ext.prefix ?: "${meta5.id}" + } else if (regions) { + prefix = task.ext.prefix ?: "${meta6.id}" + } else { + prefix = task.ext.prefix ?: "${meta7.id}" + } + """ + mkdir ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + chromograph: \$(echo \$(chromograph --version 2>&1) | sed 's/chromograph //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/chromograph/meta.yml b/modules/nf-core/chromograph/meta.yml new file mode 100644 index 00000000..cac5c7aa --- /dev/null +++ b/modules/nf-core/chromograph/meta.yml @@ -0,0 +1,93 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: "chromograph" +description: Chromograph is a python package to create PNG images from genetics data such as BED and WIG files. +keywords: + - chromosome_visualization + - bed + - wig + - png +tools: + - "chromograph": + description: "Chromograph is a python package to create PNG images from genetics data such as BED and WIG files." + homepage: "https://github.com/Clinical-Genomics/chromograph" + documentation: "https://github.com/Clinical-Genomics/chromograph/blob/master/README.md" + licence: "['MIT']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - meta4: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - meta5: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - meta6: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - meta7: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - autozyg: + type: file + description: Bed file containing the regions of autozygosity + - coverage: + type: file + description: Wig file containing the coverage information + - exome: + type: file + description: Bed file containing the coverage for exome. + - fracsnp: + type: file + description: Wig file containing the fraction of homozygous SNPs + - ideogram: + type: file + description: | + Bed file containing information necessary for ideogram plots. + Format ['chrom', 'start', 'end', 'name', 'gStain'] + - regions: + type: file + description: Bed file containing UPD regions + - sites: + type: file + description: Bed file containing UPD sites + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - plots: + type: file + description: Directory containing the plots in png format + pattern: "*.png" + +authors: + - "@ramprasadn" diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index 800a6099..ebc87273 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -5,7 +5,7 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { conda "bioconda::multiqc=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/deepvariant/main.nf b/modules/nf-core/deepvariant/main.nf index afc5e444..1a24ba34 100644 --- a/modules/nf-core/deepvariant/main.nf +++ b/modules/nf-core/deepvariant/main.nf @@ -1,8 +1,8 @@ process DEEPVARIANT { tag "$meta.id" - label 'process_medium' + label 'process_high' - container "docker.io/google/deepvariant:1.4.0" + container "nf-core/deepvariant:1.5.0" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { @@ -11,14 +11,16 @@ process DEEPVARIANT { input: tuple val(meta), path(input), path(index), path(intervals) - path(fasta) - path(fai) - path(gzi) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(gzi) output: - tuple val(meta), path("${prefix}.vcf.gz") , emit: vcf - tuple val(meta), path("${prefix}.g.vcf.gz"), emit: gvcf - path "versions.yml" , emit: versions + tuple val(meta), path("${prefix}.vcf.gz") , emit: vcf + tuple val(meta), path("${prefix}.vcf.gz.tbi") , emit: vcf_tbi + tuple val(meta), path("${prefix}.g.vcf.gz") , emit: gvcf + tuple val(meta), path("${prefix}.g.vcf.gz.tbi"), emit: gvcf_tbi + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -26,7 +28,7 @@ process DEEPVARIANT { script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" - def regions = intervals ? "--regions ${intervals}" : "" + def regions = intervals ? "--regions=${intervals}" : "" """ /opt/deepvariant/bin/run_deepvariant \\ @@ -36,6 +38,7 @@ process DEEPVARIANT { --output_gvcf=${prefix}.g.vcf.gz \\ ${args} \\ ${regions} \\ + --intermediate_results_dir=. \\ --num_shards=${task.cpus} cat <<-END_VERSIONS > versions.yml @@ -48,7 +51,9 @@ process DEEPVARIANT { prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi touch ${prefix}.g.vcf.gz + touch ${prefix}.g.vcf.gz.tbi cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/deepvariant/meta.yml b/modules/nf-core/deepvariant/meta.yml index 97f068ec..c7d11ae3 100644 --- a/modules/nf-core/deepvariant/meta.yml +++ b/modules/nf-core/deepvariant/meta.yml @@ -31,14 +31,29 @@ input: type: file description: Interval file for targeted regions pattern: "*.bed" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: type: file description: The reference fasta file pattern: "*.fasta" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fai: type: file description: Index of reference fasta file pattern: "*.fai" + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - gzi: type: file description: GZI index of reference fasta file @@ -65,3 +80,4 @@ output: authors: - "@abhi18av" + - "@ramprasadn" diff --git a/modules/nf-core/eklipse/main.nf b/modules/nf-core/eklipse/main.nf new file mode 100644 index 00000000..7b320a4b --- /dev/null +++ b/modules/nf-core/eklipse/main.nf @@ -0,0 +1,58 @@ + +process EKLIPSE { + tag "$meta.id" + label 'process_single' + + // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + conda "bioconda::eklipse=1.8" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/eklipse:1.8--hdfd78af_1': + 'biocontainers/eklipse:1.8--hdfd78af_1' }" + + input: + tuple val(meta), path(bam), path(bai) + path ref_gb + + output: + tuple val(meta), path("*deletions.csv") , emit: deletions + tuple val(meta), path("*genes.csv") , emit: genes + tuple val(meta), path("*.png") , emit: circos + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def ref_gb = ref_gb ? "$ref_gb" : "/usr/local/bin/data/NC_012920.1.gb" + def VERSION = "1.8" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + echo "$bam\t${prefix}" > infile.txt + eKLIPse.py \\ + -in infile.txt \\ + -ref $ref_gb + mv eKLIPse_*/eKLIPse_deletions.csv eKLIPse_deletions.csv + mv eKLIPse_*/eKLIPse_genes.csv eKLIPse_genes.csv + mv eKLIPse_*/eKLIPse_${prefix}.png eKLIPse_${prefix}.png + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + eklipse: $VERSION + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = "1.8" + """ + touch eKLIPse_deletions.csv + touch eKLIPse_genes.csv + touch eKLIPse_${prefix}.png + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + eklipse: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/eklipse/meta.yml b/modules/nf-core/eklipse/meta.yml new file mode 100644 index 00000000..ee60ef65 --- /dev/null +++ b/modules/nf-core/eklipse/meta.yml @@ -0,0 +1,60 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: "eklipse" +description: tool for detection and quantification of large mtDNA rearrangements. +keywords: + - eklipse + - mitochondria + - mtDNA + - circos + - deletion + - SV +tools: + - "eklipse": + description: "tool for detection and quantification of large mtDNA rearrangements." + homepage: "https://github.com/dooguypapua/eKLIPse/tree/master" + documentation: "https://github.com/dooguypapua/eKLIPse/tree/master" + tool_dev_url: "https://github.com/dooguypapua/eKLIPse/tree/master" + doi: "10.1038/s41436-018-0350-8" + licence: ["GNU General Public v3 or later (GPL v3+)"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: MT BAM/SAM file + pattern: "*.{bam,sam}" + - bai: + type: file + description: MT BAM/SAM index file + pattern: "*.{bai,sai}" + - ref_gb: + type: file + description: mtDNA reference genome in Genbank format, optional if empty NC_012920.1.gb will be used + pattern: "*.{gb}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - csv: + type: file + description: csv file containing deletions + pattern: "*.{csv}" + - circos: + type: file + description: png file with circos plot of mt + pattern: "*.{png}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@Lucpen" diff --git a/modules/nf-core/expansionhunter/meta.yml b/modules/nf-core/expansionhunter/meta.yml index 645f751b..0d2b10d6 100644 --- a/modules/nf-core/expansionhunter/meta.yml +++ b/modules/nf-core/expansionhunter/meta.yml @@ -25,14 +25,29 @@ input: type: file description: BAM/CRAM file pattern: "*.{bam,cram}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] - fasta: type: file description: Reference genome pattern: "*.{fna,fa,fasta}" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] - fasta: type: file description: Reference genome index pattern: "*.fai" + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] - variant_catalog: type: file description: JSON file with repeat expansion sites to genotype diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 9ae58381..249f9064 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -5,7 +5,7 @@ process FASTQC { conda "bioconda::fastqc=0.11.9" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'quay.io/biocontainers/fastqc:0.11.9--0' }" + 'biocontainers/fastqc:0.11.9--0' }" input: tuple val(meta), path(reads) @@ -29,7 +29,11 @@ process FASTQC { printf "%s %s\\n" $rename_to | while read old_name new_name; do [ -f "\${new_name}" ] || ln -s \$old_name \$new_name done - fastqc $args --threads $task.cpus $renamed_files + + fastqc \\ + $args \\ + --threads $task.cpus \\ + $renamed_files cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/gatk4/bedtointervallist/main.nf b/modules/nf-core/gatk4/bedtointervallist/main.nf index 41830019..a23abd06 100644 --- a/modules/nf-core/gatk4/bedtointervallist/main.nf +++ b/modules/nf-core/gatk4/bedtointervallist/main.nf @@ -5,11 +5,11 @@ process GATK4_BEDTOINTERVALLIST { conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(bed) - path dict + tuple val(meta2), path(dict) output: tuple val(meta), path('*.interval_list'), emit: interval_list diff --git a/modules/nf-core/gatk4/bedtointervallist/meta.yml b/modules/nf-core/gatk4/bedtointervallist/meta.yml index 986f1592..40daf752 100644 --- a/modules/nf-core/gatk4/bedtointervallist/meta.yml +++ b/modules/nf-core/gatk4/bedtointervallist/meta.yml @@ -3,6 +3,7 @@ description: Creates an interval list from a bed file and a reference dict keywords: - bed - interval list + - bedtointervallist tools: - gatk4: description: | @@ -23,6 +24,11 @@ input: type: file description: Input bed file pattern: "*.bed" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - dict: type: file description: Sequence dictionary @@ -38,3 +44,4 @@ output: pattern: "versions.yml" authors: - "@kevinmenden" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/collectreadcounts/main.nf b/modules/nf-core/gatk4/collectreadcounts/main.nf new file mode 100644 index 00000000..ce1985bc --- /dev/null +++ b/modules/nf-core/gatk4/collectreadcounts/main.nf @@ -0,0 +1,68 @@ +process GATK4_COLLECTREADCOUNTS { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::gatk4=4.4.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(input), path(input_index), path(intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + + output: + tuple val(meta), path("*.hdf5"), optional: true, emit: hdf5 + tuple val(meta), path("*.tsv") , optional: true, emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def reference = fasta ? "--reference $fasta" : "" + def extension = args.contains("--format HDF5") ? "hdf5" : + args.contains("--format TSV") ? "tsv" : + "hdf5" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK COLLECTREADCOUNTS] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M" CollectReadCounts \\ + --input $input \\ + --intervals $intervals \\ + --output ${prefix}.$extension \\ + $reference \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--format HDF5") ? "hdf5" : + args.contains("--format TSV") ? "tsv" : + "hdf5" + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/collectreadcounts/meta.yml b/modules/nf-core/gatk4/collectreadcounts/meta.yml new file mode 100644 index 00000000..938011c1 --- /dev/null +++ b/modules/nf-core/gatk4/collectreadcounts/meta.yml @@ -0,0 +1,87 @@ +name: "gatk4_collectreadcounts" +description: Collects read counts at specified intervals. The count for each interval is calculated by counting the number of read starts that lie in the interval. +keywords: + - bam + - cram + - CollectReadCounts + - gatk + - gatk4 +tools: + - gatk4: + description: + Genome Analysis Toolkit (GATK4). Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037593911-CombineGVCFs + tool_dev_url: https://github.com/broadinstitute/gatk + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - input_index: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - intervals: + type: file + description: A file containing the specified intervals + pattern: "*.{bed,intervals}" + - fasta: + type: file + description: Optional - Reference FASTA + pattern: "*.{fasta,fa}" + - fai: + type: file + description: Optional - Index of the reference FASTA file + pattern: "*.fai" + - dict: + type: file + description: Optional - Sequence dictionary of the reference FASTA file + pattern: "*.dict" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - hdf5: + type: file + description: The read counts in hdf5 format + pattern: "*.hdf5" + - tsv: + type: file + description: The read counts in TSV format + pattern: "*.tsv" + +authors: + - "@nvnieuwk" diff --git a/modules/nf-core/gatk4/createsequencedictionary/main.nf b/modules/nf-core/gatk4/createsequencedictionary/main.nf index 1e78f017..15a86bea 100644 --- a/modules/nf-core/gatk4/createsequencedictionary/main.nf +++ b/modules/nf-core/gatk4/createsequencedictionary/main.nf @@ -5,14 +5,14 @@ process GATK4_CREATESEQUENCEDICTIONARY { conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: - path fasta + tuple val(meta), path(fasta) output: - path "*.dict" , emit: dict - path "versions.yml" , emit: versions + tuple val(meta), path('*.dict') , emit: dict + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -20,7 +20,7 @@ process GATK4_CREATESEQUENCEDICTIONARY { script: def args = task.ext.args ?: '' - def avail_mem = 6 + def avail_mem = 6144 if (!task.memory) { log.info '[GATK CreateSequenceDictionary] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.' } else { diff --git a/modules/nf-core/gatk4/createsequencedictionary/meta.yml b/modules/nf-core/gatk4/createsequencedictionary/meta.yml index 69c23581..a421e681 100644 --- a/modules/nf-core/gatk4/createsequencedictionary/meta.yml +++ b/modules/nf-core/gatk4/createsequencedictionary/meta.yml @@ -3,6 +3,7 @@ description: Creates a sequence dictionary for a reference sequence keywords: - dictionary - fasta + - createsequencedictionary tools: - gatk: description: | @@ -15,6 +16,11 @@ tools: licence: ["Apache-2.0"] input: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: type: file description: Input fasta file @@ -30,3 +36,4 @@ output: pattern: "versions.yml" authors: - "@maxulysse" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf b/modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf new file mode 100644 index 00000000..593c8968 --- /dev/null +++ b/modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf @@ -0,0 +1,71 @@ + +process GATK4_DETERMINEGERMLINECONTIGPLOIDY { + tag "$meta.id" + label 'process_single' + + //Conda is not supported at the moment: https://github.com/broadinstitute/gatk/issues/7811 + container "quay.io/nf-core/gatk:4.4.0.0" //Biocontainers is missing a package + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + exit 1, "GATK4_DETERMINEGERMLINECONTIGPLOIDY module does not support Conda. Please use Docker / Singularity / Podman instead." + } + + input: + tuple val(meta), path(counts), path(bed), path(exclude_beds) + tuple val(meta2), path(ploidy_model) + path(contig_ploidy_table) + + output: + tuple val(meta), path("${prefix}-calls"), emit: calls + tuple val(meta), path("${prefix}-model"), emit: model, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def intervals = bed ? "--intervals ${bed}" : "" + def exclude = exclude_beds ? exclude_beds.collect(){"--exclude-intervals $it"}.join(" ") : "" + def contig_ploidy = contig_ploidy_table ? "--contig-ploidy-priors ${contig_ploidy_table}" : "" + def model = ploidy_model ? "--model ${ploidy_model}" : "" + def input_list = counts.collect(){"--input $it"}.join(" ") + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK DetermineGermlineContigPloidy] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M" DetermineGermlineContigPloidy \\ + ${input_list} \\ + --output ./ \\ + --output-prefix ${prefix} \\ + ${intervals} \\ + ${exclude} \\ + ${contig_ploidy} \\ + ${model} \\ + --tmp-dir . \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}-calls + touch ${prefix}-model + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/determinegermlinecontigploidy/meta.yml b/modules/nf-core/gatk4/determinegermlinecontigploidy/meta.yml new file mode 100644 index 00000000..667d622e --- /dev/null +++ b/modules/nf-core/gatk4/determinegermlinecontigploidy/meta.yml @@ -0,0 +1,75 @@ +name: "gatk4_determinegermlinecontigploidy" +description: Determines the baseline contig ploidy for germline samples given counts data +keywords: + - gatk4 + - determinegermlinecontigploidy + - counts + - copy number +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - counts: + type: file + description: One or more count TSV files created with gatk/collectreadcounts + pattern: "*.tsv" + - bed: + type: file + description: Optional - A bed file containing the intervals to include in the process + pattern: "*.bed" + - exclude_beds: + type: file + description: Optional - One or more bed files containing intervals to exclude from the process + pattern: "*.bed" + - contig_ploidy_table: + type: file + description: The contig ploidy priors table + pattern: "*.tsv" + - ploidy_model: + type: directory + description: | + Optional - A folder containing the ploidy model. + When a model is supplied to tool will run in CASE mode. + pattern: '*-model/' + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - calls: + type: directory + description: A folder containing the calls from the input files + pattern: "*-calls/" + - model: + type: directory + description: | + A folder containing the model from the input files. + This will only be created in COHORT mode (when no model is supplied to the process). + pattern: "*-model/" + +authors: + - "@nvnieuwk" diff --git a/modules/nf-core/gatk4/filtermutectcalls/main.nf b/modules/nf-core/gatk4/filtermutectcalls/main.nf index 09643857..d0cf5b4a 100644 --- a/modules/nf-core/gatk4/filtermutectcalls/main.nf +++ b/modules/nf-core/gatk4/filtermutectcalls/main.nf @@ -5,13 +5,13 @@ process GATK4_FILTERMUTECTCALLS { conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(vcf), path(vcf_tbi), path(stats), path(orientationbias), path(segmentation), path(table), val(estimate) - path fasta - path fai - path dict + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) output: tuple val(meta), path("*.vcf.gz") , emit: vcf @@ -29,7 +29,7 @@ process GATK4_FILTERMUTECTCALLS { def orientationbias_command = orientationbias ? orientationbias.collect{"--orientation-bias-artifact-priors $it"}.join(' ') : '' def segmentation_command = segmentation ? segmentation.collect{"--tumor-segmentation $it"}.join(' ') : '' def estimate_command = estimate ? " --contamination-estimate ${estimate} " : '' - def table_command = table ? " --contamination-table ${table} " : '' + def table_command = table ? table.collect{"--contamination-table $it"}.join(' ') : '' def avail_mem = 3072 if (!task.memory) { diff --git a/modules/nf-core/gatk4/filtermutectcalls/meta.yml b/modules/nf-core/gatk4/filtermutectcalls/meta.yml index d1972d70..1a6faecb 100644 --- a/modules/nf-core/gatk4/filtermutectcalls/meta.yml +++ b/modules/nf-core/gatk4/filtermutectcalls/meta.yml @@ -35,28 +35,43 @@ input: description: Stats file that pairs with output vcf file pattern: "*vcf.gz.stats" - orientationbias: - type: list + type: file description: files containing artifact priors for input vcf. Optional input. pattern: "*.artifact-prior.tar.gz" - segmentation: - type: list + type: file description: tables containing segmentation information for input vcf. Optional input. pattern: "*.segmentation.table" - table: - type: list + type: file description: table(s) containing contamination data for input vcf. Optional input, takes priority over estimate. pattern: "*.contamination.table" - estimate: - type: val + type: float description: estimation of contamination value as a double. Optional input, will only be used if table is not specified. + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: type: file description: The reference fasta file pattern: "*.fasta" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fai: type: file description: Index of reference fasta file pattern: "*.fasta.fai" + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - dict: type: file description: GATK sequence dictionary @@ -83,3 +98,4 @@ output: authors: - "@GCJMackenzie" - "@maxulysse" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/germlinecnvcaller/main.nf b/modules/nf-core/gatk4/germlinecnvcaller/main.nf new file mode 100644 index 00000000..9b31c56d --- /dev/null +++ b/modules/nf-core/gatk4/germlinecnvcaller/main.nf @@ -0,0 +1,66 @@ +process GATK4_GERMLINECNVCALLER { + tag "$meta.id" + label 'process_single' + + //Conda is not supported at the moment: https://github.com/broadinstitute/gatk/issues/7811 + container "quay.io/nf-core/gatk:4.4.0.0" //Biocontainers is missing a package + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + exit 1, "GATK4_GERMLINECNVCALLER module does not support Conda. Please use Docker / Singularity / Podman instead." + } + + input: + tuple val(meta), path(tsv), path(intervals), path(ploidy), path(model) + + output: + tuple val(meta), path("*-cnv-calls/*-calls"), emit: calls, optional: true + tuple val(meta), path("*-cnv-model/*-model"), emit: model, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def intervals_command = intervals ? "--intervals ${intervals}" : "" + def ploidy_command = ploidy ? "--contig-ploidy-calls ${ploidy}" : "" + def model_command = model ? "--model ${model}" : "" + def input_list = tsv.collect{"--input $it"}.join(' ') + def output_command = model ? "--output ${prefix}-cnv-calls" : "--output ${prefix}-cnv-model" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK GermlineCNVCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}g" GermlineCNVCaller \\ + $input_list \\ + $ploidy_command \\ + $output_command \\ + --output-prefix $prefix \\ + $args \\ + $intervals_command \\ + $model_command + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir -p ${prefix}-cnv-calls/${prefix}-calls + mkdir -p ${prefix}-cnv-model/${prefix}-model + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/germlinecnvcaller/meta.yml b/modules/nf-core/gatk4/germlinecnvcaller/meta.yml new file mode 100644 index 00000000..b7430927 --- /dev/null +++ b/modules/nf-core/gatk4/germlinecnvcaller/meta.yml @@ -0,0 +1,62 @@ +name: "gatk4_germlinecnvcaller" +description: Calls copy-number variants in germline samples given their counts and the output of DetermineGermlineContigPloidy. +keywords: + - gatk + - gatk4_germlinecnvcaller + - germline contig ploidy +tools: + - "gatk4": + description: + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: "10.1158/1538-7445.AM2017-3590" + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tsv: + type: file + description: One or more count TSV files created with gatk/collectreadcounts + pattern: "*.tsv" + - intervals: + type: file + description: Optional - A bed file containing the intervals to include in the process + pattern: "*.bed" + - model: + type: directory + description: Optional - directory containing the model produced by germlinecnvcaller cohort mode + pattern: "*-cnv-model/*-model" + - ploidy: + type: file + description: Directory containing ploidy calls produced by determinegermlinecontigploidy case or cohort mode + pattern: "*-calls" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - calls: + type: file + description: Tar gzipped directory containing calls produced by germlinecnvcaller case mode + pattern: "*-cnv-calls/*-calls" + - model: + type: directory + description: Optional - Tar gzipped directory containing the model produced by germlinecnvcaller cohort mode + pattern: "*-cnv-model/*-model" + +authors: + - "@ryanjameskennedy" + - "@ViktorHy" diff --git a/modules/nf-core/gatk4/intervallisttools/main.nf b/modules/nf-core/gatk4/intervallisttools/main.nf index e221dc01..0054659a 100644 --- a/modules/nf-core/gatk4/intervallisttools/main.nf +++ b/modules/nf-core/gatk4/intervallisttools/main.nf @@ -5,7 +5,7 @@ process GATK4_INTERVALLISTTOOLS { conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(intervals) diff --git a/modules/nf-core/gatk4/mergebamalignment/main.nf b/modules/nf-core/gatk4/mergebamalignment/main.nf index 9ee676ce..35d2e71e 100644 --- a/modules/nf-core/gatk4/mergebamalignment/main.nf +++ b/modules/nf-core/gatk4/mergebamalignment/main.nf @@ -5,12 +5,12 @@ process GATK4_MERGEBAMALIGNMENT { conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(aligned), path(unmapped) - path fasta - path dict + tuple val(meta2), path(fasta) + tuple val(meta3), path(dict) output: tuple val(meta), path('*.bam'), emit: bam diff --git a/modules/nf-core/gatk4/mergebamalignment/meta.yml b/modules/nf-core/gatk4/mergebamalignment/meta.yml index b4bff490..9d8ae84b 100644 --- a/modules/nf-core/gatk4/mergebamalignment/meta.yml +++ b/modules/nf-core/gatk4/mergebamalignment/meta.yml @@ -1,8 +1,10 @@ name: gatk4_mergebamalignment description: Merge unmapped with mapped BAM files keywords: + - alignment - bam - merge + - mergebamalignment tools: - gatk4: description: | @@ -27,9 +29,19 @@ input: type: file description: The unmaped bam file pattern: "*.{bam}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: type: file description: The reference fasta file + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - dict: type: file description: GATK sequence dictionary @@ -44,3 +56,4 @@ output: pattern: "versions.yml" authors: - "@kevinmenden" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/mergevcfs/main.nf b/modules/nf-core/gatk4/mergevcfs/main.nf index d0f48757..dfb5b33a 100644 --- a/modules/nf-core/gatk4/mergevcfs/main.nf +++ b/modules/nf-core/gatk4/mergevcfs/main.nf @@ -5,7 +5,7 @@ process GATK4_MERGEVCFS { conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(vcf) diff --git a/modules/nf-core/gatk4/mutect2/main.nf b/modules/nf-core/gatk4/mutect2/main.nf index 97e3408f..bddc3688 100644 --- a/modules/nf-core/gatk4/mutect2/main.nf +++ b/modules/nf-core/gatk4/mutect2/main.nf @@ -5,17 +5,17 @@ process GATK4_MUTECT2 { conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(input), path(input_index), path(intervals) - path fasta - path fai - path dict - path germline_resource - path germline_resource_tbi - path panel_of_normals - path panel_of_normals_tbi + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + path(germline_resource) + path(germline_resource_tbi) + path(panel_of_normals) + path(panel_of_normals_tbi) output: tuple val(meta), path("*.vcf.gz") , emit: vcf diff --git a/modules/nf-core/gatk4/mutect2/meta.yml b/modules/nf-core/gatk4/mutect2/meta.yml index aa0a02aa..4842c229 100644 --- a/modules/nf-core/gatk4/mutect2/meta.yml +++ b/modules/nf-core/gatk4/mutect2/meta.yml @@ -31,17 +31,32 @@ input: description: list of BAM file indexes, also able to take CRAM indexes as an input pattern: "*.{bam.bai/cram.crai}" - intervals: - type: File/string + type: file description: Specify region the tools is run on. - pattern: ".{bed,interval_list}/chrM" + pattern: ".{bed,interval_list}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: type: file description: The reference fasta file pattern: "*.fasta" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fai: type: file description: Index of reference fasta file pattern: "*.fasta.fai" + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - dict: type: file description: GATK sequence dictionary @@ -87,3 +102,4 @@ output: authors: - "@GCJMackenzie" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/postprocessgermlinecnvcalls/main.nf b/modules/nf-core/gatk4/postprocessgermlinecnvcalls/main.nf new file mode 100644 index 00000000..8faf0121 --- /dev/null +++ b/modules/nf-core/gatk4/postprocessgermlinecnvcalls/main.nf @@ -0,0 +1,65 @@ +process GATK4_POSTPROCESSGERMLINECNVCALLS { + tag "$meta.id" + label 'process_single' + + //Conda is not supported at the moment: https://github.com/broadinstitute/gatk/issues/7811 + container "quay.io/nf-core/gatk:4.4.0.0" //Biocontainers is missing a package + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + exit 1, "GATK4_POSTPROCESSGERMLINECNVCALLS module does not support Conda. Please use Docker / Singularity / Podman instead." + } + + input: + tuple val(meta), path(calls), path(model), path(ploidy) + + output: + tuple val(meta), path("*_genotyped_intervals.vcf.gz") , emit: intervals, optional: true + tuple val(meta), path("*_genotyped_segments.vcf.gz") , emit: segments, optional: true + tuple val(meta), path("*_denoised.vcf.gz") , emit: denoised, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def calls_command = calls ? calls.collect{"--calls-shard-path $it"}.join(' ') : "" + def model_command = model ? model.collect{"--model-shard-path $it"}.join(' ') : "" + def ploidy_command = ploidy ? "--contig-ploidy-calls ${ploidy}" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK GermlineCNVCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}g" PostprocessGermlineCNVCalls \\ + $calls_command \\ + $model_command \\ + $ploidy_command \\ + --output-genotyped-intervals ${prefix}_genotyped_intervals.vcf.gz \\ + --output-genotyped-segments ${prefix}_genotyped_segments.vcf.gz \\ + --output-denoised-copy-ratios ${prefix}_denoised.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_genotyped_intervals.vcf.gz + touch ${prefix}_genotyped_segments.vcf.gz + touch ${prefix}_denoised.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/postprocessgermlinecnvcalls/meta.yml b/modules/nf-core/gatk4/postprocessgermlinecnvcalls/meta.yml new file mode 100644 index 00000000..92e06cae --- /dev/null +++ b/modules/nf-core/gatk4/postprocessgermlinecnvcalls/meta.yml @@ -0,0 +1,65 @@ +name: "gatk4_postprocessgermlinecnvcalls" +description: Postprocesses the output of GermlineCNVCaller and generates VCFs and denoised copy ratios +keywords: + - gatk4 + - postprocessgermlinecnvcalls + - copy number +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037593411-PostprocessGermlineCNVCalls + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ploidy: + type: directory + description: | + Optional - A folder containing the ploidy model. + When a model is supplied to tool will run in CASE mode. + pattern: "*-calls/" + - calls: + type: directory + description: A folder containing the calls from the input files + pattern: "*-cnv-calls/*-calls" + - model: + type: directory + description: | + A folder containing the model from the input files. + This will only be created in COHORT mode (when no model is supplied to the process). + pattern: "*-cnv-model/*-model" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - denoised: + type: file + description: Denoised copy ratio file + pattern: "*.vcf.gz" + - segments: + type: file + description: Segments VCF file + pattern: "*.vcf.gz" + - intervals: + type: file + description: Intervals VCF file + pattern: "*.vcf.gz" + +authors: + - "@ryanjameskennedy" diff --git a/modules/nf-core/gatk4/preprocessintervals/main.nf b/modules/nf-core/gatk4/preprocessintervals/main.nf new file mode 100644 index 00000000..aff482f7 --- /dev/null +++ b/modules/nf-core/gatk4/preprocessintervals/main.nf @@ -0,0 +1,61 @@ +process GATK4_PREPROCESSINTERVALS { + tag "$fasta" + label 'process_medium' + + conda "bioconda::gatk4=4.4.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(fasta) + tuple val(meta2), path(fai) + tuple val(meta3), path(dict) + tuple val(meta4), path(intervals) + tuple val(meta5), path(exclude_intervals) + + output: + tuple val(meta), path("*.interval_list"), emit: interval_list + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def include_command = intervals ? "--intervals $intervals" : "" + def exclude_command = exclude_intervals ? "--exclude-intervals $exclude_intervals" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK PreprocessIntervals] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + + """ + gatk --java-options "-Xmx${avail_mem}M" PreprocessIntervals \\ + $include_command \\ + $exclude_command \\ + --reference $fasta \\ + --output ${prefix}.interval_list \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.interval_list + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/preprocessintervals/meta.yml b/modules/nf-core/gatk4/preprocessintervals/meta.yml new file mode 100644 index 00000000..8b6ae9b3 --- /dev/null +++ b/modules/nf-core/gatk4/preprocessintervals/meta.yml @@ -0,0 +1,84 @@ +name: "gatk4_preprocessintervals" +description: Prepares bins for coverage collection. +keywords: + - gatk4 + - preprocessintervals + - interval + - bed +tools: + - "gatk4": + description: + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: "10.1158/1538-7445.AM2017-3590" + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - intervals: + type: file + description: Interval file (bed or interval_list) with the genomic regions to be included from the analysis (optional) + pattern: "*.{bed,interval_list}" + - meta5: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - exclude_intervals: + type: file + description: Interval file (bed or interval_list) with the genomic regions to be excluded from the analysis (optional) + pattern: "*.{bed,interval_list}" + +output: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - interval_list: + type: file + description: Processed interval list file + pattern: "*.{bed,interval_list}" + +authors: + - "@ryanjameskennedy" + - "@ViktorHy" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/printreads/main.nf b/modules/nf-core/gatk4/printreads/main.nf index 13e722bd..084d0b46 100644 --- a/modules/nf-core/gatk4/printreads/main.nf +++ b/modules/nf-core/gatk4/printreads/main.nf @@ -5,13 +5,13 @@ process GATK4_PRINTREADS { conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(input), path(index) tuple val(meta2), path(fasta) - path (fai) - path (dict) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) output: tuple val(meta), path("${prefix}.bam") , emit: bam, optional: true @@ -25,6 +25,7 @@ process GATK4_PRINTREADS { script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 3072 if (!task.memory) { log.info '[GATK PrintReads] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -34,7 +35,6 @@ process GATK4_PRINTREADS { if ("${input}" == "${prefix}.${input.extension}") { error("Output filename is the same as input filename. Please specify a different prefix.") } - """ gatk --java-options "-Xmx${avail_mem}M" PrintReads \\ $args \\ diff --git a/modules/nf-core/gatk4/printreads/meta.yml b/modules/nf-core/gatk4/printreads/meta.yml index cd48959f..8150c7a7 100644 --- a/modules/nf-core/gatk4/printreads/meta.yml +++ b/modules/nf-core/gatk4/printreads/meta.yml @@ -34,16 +34,26 @@ input: - meta2: type: map description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: type: file description: reference fasta file pattern: "*.{fa,fasta}" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fai: type: file description: reference fasta index file pattern: "*.{fai}" + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - dict: type: file description: reference fasta dictionary file diff --git a/modules/nf-core/gatk4/revertsam/main.nf b/modules/nf-core/gatk4/revertsam/main.nf index 5481ea49..768b1eed 100644 --- a/modules/nf-core/gatk4/revertsam/main.nf +++ b/modules/nf-core/gatk4/revertsam/main.nf @@ -5,7 +5,7 @@ process GATK4_REVERTSAM { conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/nf-core/gatk4/samtofastq/main.nf b/modules/nf-core/gatk4/samtofastq/main.nf index 585fc582..f838b95a 100644 --- a/modules/nf-core/gatk4/samtofastq/main.nf +++ b/modules/nf-core/gatk4/samtofastq/main.nf @@ -5,7 +5,7 @@ process GATK4_SAMTOFASTQ { conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/nf-core/gatk4/selectvariants/main.nf b/modules/nf-core/gatk4/selectvariants/main.nf index 001b7f68..609cb8cc 100644 --- a/modules/nf-core/gatk4/selectvariants/main.nf +++ b/modules/nf-core/gatk4/selectvariants/main.nf @@ -5,7 +5,7 @@ process GATK4_SELECTVARIANTS { conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(vcf), path(vcf_idx), path (intervals) @@ -22,6 +22,7 @@ process GATK4_SELECTVARIANTS { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def interval = intervals ? "--intervals ${intervals}" : "" + def avail_mem = 3072 if (!task.memory) { log.info '[GATK SelectVariants] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' diff --git a/modules/nf-core/gatk4/shiftfasta/main.nf b/modules/nf-core/gatk4/shiftfasta/main.nf new file mode 100644 index 00000000..ab0e578c --- /dev/null +++ b/modules/nf-core/gatk4/shiftfasta/main.nf @@ -0,0 +1,67 @@ +process GATK4_SHIFTFASTA { + tag "$meta.id" + label 'process_single' + + conda "bioconda::gatk4=4.4.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(fasta) + tuple val(meta2), path(fasta_fai) + tuple val(meta3), path(dict) + + output: + tuple val(meta), path("*_shift.fasta") , emit: shift_fa + tuple val(meta), path("*_shift.fasta.fai") , emit: shift_fai + tuple val(meta), path("*_shift.back_chain") , emit: shift_back_chain + tuple val(meta), path("*_shift.dict") , emit: dict , optional: true + tuple val(meta), path("*.intervals") , emit: intervals , optional: true + tuple val(meta), path("*.shifted.intervals") , emit: shift_intervals , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def seq_dict = dict ? "--sequence-dictionary ${dict}" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK ShiftFasta] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M" ShiftFasta \\ + --reference $fasta \\ + --output ${prefix}_shift.fasta \\ + --shift-back-output ${prefix}_shift.back_chain \\ + $args \\ + $seq_dict \\ + --tmp-dir . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + touch test.intervals + touch test_shift.back_chain + touch test_shift.dict + touch test.shifted.intervals + touch test_shift.fasta + touch test_shift.fasta.fai + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/shiftfasta/meta.yml b/modules/nf-core/gatk4/shiftfasta/meta.yml new file mode 100644 index 00000000..6d563ded --- /dev/null +++ b/modules/nf-core/gatk4/shiftfasta/meta.yml @@ -0,0 +1,81 @@ +name: "gatk4_shiftfasta" +description: Create a fasta with the bases shifted by offset +keywords: + - mitochondria + - shiftfasta + - shiftchain + - shiftintervals +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + tool_dev_url: "https://github.com/broadinstitute/gatk" + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: fasta file + pattern: "*.{fa,fasta}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta_fai: + type: file + description: index for fasta file + pattern: "*.{fai}" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - dict: + type: file + description: sequence dictionary file + pattern: "*.{dict}" + +output: + - meta: + type: map + description: | + Groovy Map containing fasta information + e.g. [ id:'test' ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - dict: + type: file + description: sequence dictionary file + pattern: "*.{dict}" + - intervals: + type: file + description: Intervals file for the fasta file + pattern: "*.{intervals}" + - shift_back_chain: + type: file + description: The shiftback chain file to use when lifting over + pattern: "*.{back_chain}" + - shift_fa: + type: file + description: Shifted fasta file + pattern: "*.{fa,fasta}" + - shift_intervals: + type: file + description: Intervals file for the shifted fasta file + pattern: "*.{shifted.intervals}" + +authors: + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/splitintervals/main.nf b/modules/nf-core/gatk4/splitintervals/main.nf index a40abe45..3cb18373 100644 --- a/modules/nf-core/gatk4/splitintervals/main.nf +++ b/modules/nf-core/gatk4/splitintervals/main.nf @@ -5,13 +5,13 @@ process GATK4_SPLITINTERVALS { conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(intervals) - path(fasta) - path(fasta_fai) - path(dict) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) output: tuple val(meta), path("**.interval_list"), emit: split_intervals @@ -31,7 +31,6 @@ process GATK4_SPLITINTERVALS { } else { avail_mem = (task.memory.mega*0.8).intValue() } - """ gatk --java-options "-Xmx${avail_mem}M" SplitIntervals \\ --output ${prefix} \\ diff --git a/modules/nf-core/gatk4/splitintervals/meta.yml b/modules/nf-core/gatk4/splitintervals/meta.yml index 701c6893..a249f077 100644 --- a/modules/nf-core/gatk4/splitintervals/meta.yml +++ b/modules/nf-core/gatk4/splitintervals/meta.yml @@ -3,6 +3,7 @@ description: Split intervals into sub-interval files. keywords: - interval - bed + - splitintervals tools: - gatk4: description: Genome Analysis Toolkit (GATK4) @@ -22,14 +23,29 @@ input: type: file description: Interval list or BED pattern: "*.{interval,interval_list,bed}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: type: file description: Reference FASTA pattern: "*.{fa,fasta}" - - fasta_fai: + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: type: file description: Reference FASTA index pattern: "*.fai" + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - dict: type: file description: Reference sequence dictionary @@ -52,3 +68,4 @@ output: authors: - "@nvnieuwk" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/variantfiltration/main.nf b/modules/nf-core/gatk4/variantfiltration/main.nf index cc03ff3c..387ff8ca 100644 --- a/modules/nf-core/gatk4/variantfiltration/main.nf +++ b/modules/nf-core/gatk4/variantfiltration/main.nf @@ -5,13 +5,13 @@ process GATK4_VARIANTFILTRATION { conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(vcf), path(tbi) - path fasta - path fai - path dict + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) output: tuple val(meta), path("*.vcf.gz"), emit: vcf diff --git a/modules/nf-core/gatk4/variantfiltration/meta.yml b/modules/nf-core/gatk4/variantfiltration/meta.yml index 04b1c086..2260f37b 100644 --- a/modules/nf-core/gatk4/variantfiltration/meta.yml +++ b/modules/nf-core/gatk4/variantfiltration/meta.yml @@ -3,6 +3,7 @@ description: Filter variants keywords: - vcf - filter + - variantfiltration tools: - gatk4: description: | @@ -27,14 +28,29 @@ input: type: list description: List of VCF file indexes pattern: "*.{idx,tbi}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: type: file description: Fasta file of reference genome pattern: "*.fasta" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fai: type: file description: Index of fasta file pattern: "*.fasta.fai" + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - dict: type: file description: Sequence dictionary of fastea file @@ -54,3 +70,4 @@ output: pattern: "versions.yml" authors: - "@kevinmenden" + - "@ramprasadn" diff --git a/modules/nf-core/genmod/annotate/main.nf b/modules/nf-core/genmod/annotate/main.nf index 9d9350dc..43ae0d19 100644 --- a/modules/nf-core/genmod/annotate/main.nf +++ b/modules/nf-core/genmod/annotate/main.nf @@ -5,7 +5,7 @@ process GENMOD_ANNOTATE { conda "bioconda::genmod=3.7.4" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/genmod:3.7.4--pyh5e36f6f_0': - 'quay.io/biocontainers/genmod:3.7.4--pyh5e36f6f_0' }" + 'biocontainers/genmod:3.7.4--pyh5e36f6f_0' }" input: tuple val(meta), path(input_vcf) diff --git a/modules/nf-core/genmod/compound/main.nf b/modules/nf-core/genmod/compound/main.nf index 925f76bb..149a03d8 100644 --- a/modules/nf-core/genmod/compound/main.nf +++ b/modules/nf-core/genmod/compound/main.nf @@ -5,7 +5,7 @@ process GENMOD_COMPOUND { conda "bioconda::genmod=3.7.4" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/genmod:3.7.4--pyh5e36f6f_0': - 'quay.io/biocontainers/genmod:3.7.4--pyh5e36f6f_0' }" + 'biocontainers/genmod:3.7.4--pyh5e36f6f_0' }" input: tuple val(meta), path(input_vcf) diff --git a/modules/nf-core/genmod/models/main.nf b/modules/nf-core/genmod/models/main.nf index cd1b747e..3f5700d6 100644 --- a/modules/nf-core/genmod/models/main.nf +++ b/modules/nf-core/genmod/models/main.nf @@ -5,7 +5,7 @@ process GENMOD_MODELS { conda "bioconda::genmod=3.7.4 conda-forge::python=3.4.5" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/genmod:3.7.4--pyh5e36f6f_0': - 'quay.io/biocontainers/genmod:3.7.4--pyh5e36f6f_0' }" + 'biocontainers/genmod:3.7.4--pyh5e36f6f_0' }" input: tuple val(meta), path(input_vcf) diff --git a/modules/nf-core/genmod/score/main.nf b/modules/nf-core/genmod/score/main.nf index d3c9957c..d1efade5 100644 --- a/modules/nf-core/genmod/score/main.nf +++ b/modules/nf-core/genmod/score/main.nf @@ -5,7 +5,7 @@ process GENMOD_SCORE { conda "bioconda::genmod=3.7.4" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/genmod:3.7.4--pyh5e36f6f_0': - 'quay.io/biocontainers/genmod:3.7.4--pyh5e36f6f_0' }" + 'biocontainers/genmod:3.7.4--pyh5e36f6f_0' }" input: tuple val(meta), path(input_vcf) diff --git a/modules/nf-core/glnexus/main.nf b/modules/nf-core/glnexus/main.nf index 479da5a7..2bd4580a 100644 --- a/modules/nf-core/glnexus/main.nf +++ b/modules/nf-core/glnexus/main.nf @@ -5,7 +5,7 @@ process GLNEXUS { conda "bioconda::glnexus=1.4.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/glnexus:1.4.1--h40d77a6_0' : - 'quay.io/biocontainers/glnexus:1.4.1--h40d77a6_0' }" + 'biocontainers/glnexus:1.4.1--h40d77a6_0' }" input: tuple val(meta), path(gvcfs) diff --git a/modules/nf-core/haplocheck/main.nf b/modules/nf-core/haplocheck/main.nf index 0a593bd0..464f5dbf 100644 --- a/modules/nf-core/haplocheck/main.nf +++ b/modules/nf-core/haplocheck/main.nf @@ -5,7 +5,7 @@ process HAPLOCHECK { conda "bioconda::haplocheck=1.3.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/haplocheck:1.3.3--h4a94de4_0': - 'quay.io/biocontainers/haplocheck:1.3.3--h4a94de4_0' }" + 'biocontainers/haplocheck:1.3.3--h4a94de4_0' }" input: tuple val(meta), path(vcf) diff --git a/modules/nf-core/haplogrep2/classify/main.nf b/modules/nf-core/haplogrep2/classify/main.nf index 8931ca78..7f775cae 100644 --- a/modules/nf-core/haplogrep2/classify/main.nf +++ b/modules/nf-core/haplogrep2/classify/main.nf @@ -5,7 +5,7 @@ process HAPLOGREP2_CLASSIFY { conda "bioconda::haplogrep=2.4.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/haplogrep:2.4.0--hdfd78af_0': - 'quay.io/biocontainers/haplogrep:2.4.0--hdfd78af_0' }" + 'biocontainers/haplogrep:2.4.0--hdfd78af_0' }" input: tuple val(meta), path(inputfile) diff --git a/modules/nf-core/manta/germline/main.nf b/modules/nf-core/manta/germline/main.nf index 2feb6ed2..e052b7c9 100644 --- a/modules/nf-core/manta/germline/main.nf +++ b/modules/nf-core/manta/germline/main.nf @@ -1,17 +1,18 @@ process MANTA_GERMLINE { tag "$meta.id" label 'process_medium' + label 'error_retry' conda "bioconda::manta=1.6.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/manta:1.6.0--h9ee0642_1' : - 'quay.io/biocontainers/manta:1.6.0--h9ee0642_1' }" + 'biocontainers/manta:1.6.0--h9ee0642_1' }" input: //Matching the target bed with the input sample allows to parallelize the same sample run across different intervals or a single bed file tuple val(meta), path(input), path(index), path(target_bed), path(target_bed_tbi) - path fasta - path fasta_fai + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) output: tuple val(meta), path("*candidate_small_indels.vcf.gz") , emit: candidate_small_indels_vcf diff --git a/modules/nf-core/manta/germline/meta.yml b/modules/nf-core/manta/germline/meta.yml index b719f075..2eb16ada 100644 --- a/modules/nf-core/manta/germline/meta.yml +++ b/modules/nf-core/manta/germline/meta.yml @@ -39,11 +39,21 @@ input: type: file description: Index for BED file containing target regions for variant calling pattern: "*.{bed.tbi}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: type: file description: Genome reference FASTA file pattern: "*.{fa,fasta}" - - fasta_fai: + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: type: file description: Genome reference FASTA index file pattern: "*.{fa.fai,fasta.fai}" diff --git a/modules/nf-core/mosdepth/main.nf b/modules/nf-core/mosdepth/main.nf index 827c489e..c17e4e65 100644 --- a/modules/nf-core/mosdepth/main.nf +++ b/modules/nf-core/mosdepth/main.nf @@ -5,7 +5,7 @@ process MOSDEPTH { conda "bioconda::mosdepth=0.3.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mosdepth:0.3.3--hdfd78af_1' : - 'quay.io/biocontainers/mosdepth:0.3.3--hdfd78af_1'}" + 'biocontainers/mosdepth:0.3.3--hdfd78af_1'}" input: tuple val(meta), path(bam), path(bai), path(bed) diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 4b604749..1fc387be 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -4,7 +4,7 @@ process MULTIQC { conda "bioconda::multiqc=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" diff --git a/modules/nf-core/peddy/main.nf b/modules/nf-core/peddy/main.nf index 5d1cfe2f..6671de73 100644 --- a/modules/nf-core/peddy/main.nf +++ b/modules/nf-core/peddy/main.nf @@ -5,7 +5,7 @@ process PEDDY { conda "bioconda::peddy=0.4.8" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/peddy:0.4.8--pyh5e36f6f_0' : - 'quay.io/biocontainers/peddy:0.4.8--pyh5e36f6f_0' }" + 'biocontainers/peddy:0.4.8--pyh5e36f6f_0' }" input: tuple val(meta), path(vcf), path(vcf_tbi) diff --git a/modules/nf-core/picard/addorreplacereadgroups/main.nf b/modules/nf-core/picard/addorreplacereadgroups/main.nf index c6b1be37..a2a8d1c3 100644 --- a/modules/nf-core/picard/addorreplacereadgroups/main.nf +++ b/modules/nf-core/picard/addorreplacereadgroups/main.nf @@ -5,7 +5,7 @@ process PICARD_ADDORREPLACEREADGROUPS { conda "bioconda::picard=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : - 'quay.io/biocontainers/picard:3.0.0--hdfd78af_1' }" + 'biocontainers/picard:3.0.0--hdfd78af_1' }" input: tuple val(meta), path(bam) diff --git a/modules/nf-core/picard/collecthsmetrics/main.nf b/modules/nf-core/picard/collecthsmetrics/main.nf index 5f1e9c90..d721bc6f 100644 --- a/modules/nf-core/picard/collecthsmetrics/main.nf +++ b/modules/nf-core/picard/collecthsmetrics/main.nf @@ -5,14 +5,13 @@ process PICARD_COLLECTHSMETRICS { conda "bioconda::picard=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : - 'quay.io/biocontainers/picard:3.0.0--hdfd78af_1' }" + 'biocontainers/picard:3.0.0--hdfd78af_1' }" input: - tuple val(meta), path(bam), path(bai) + tuple val(meta), path(bam), path(bai), path(bait_intervals), path(target_intervals) tuple val(meta2), path(fasta) tuple val(meta3), path(fai) - path bait_intervals - path target_intervals + tuple val(meta4), path(dict) output: tuple val(meta), path("*_metrics") , emit: metrics @@ -32,14 +31,34 @@ process PICARD_COLLECTHSMETRICS { } else { avail_mem = (task.memory.mega*0.8).intValue() } + + def bait_interval_list = bait_intervals + def bait_intervallist_cmd = "" + if (bait_intervals =~ /.(bed|bed.gz)$/){ + bait_interval_list = bait_intervals.toString().replaceAll(/.(bed|bed.gz)$/, ".interval_list") + bait_intervallist_cmd = "picard -Xmx${avail_mem}M BedToIntervalList --INPUT ${bait_intervals} --OUTPUT ${bait_interval_list} --SEQUENCE_DICTIONARY ${dict} --TMP_DIR ." + } + + def target_interval_list = target_intervals + def target_intervallist_cmd = "" + if (target_intervals =~ /.(bed|bed.gz)$/){ + target_interval_list = target_intervals.toString().replaceAll(/.(bed|bed.gz)$/, ".interval_list") + target_intervallist_cmd = "picard -Xmx${avail_mem}M BedToIntervalList --INPUT ${target_intervals} --OUTPUT ${target_interval_list} --SEQUENCE_DICTIONARY ${dict} --TMP_DIR ." + } + + """ + + $bait_intervallist_cmd + $target_intervallist_cmd + picard \\ -Xmx${avail_mem}M \\ CollectHsMetrics \\ $args \\ $reference \\ - --BAIT_INTERVALS $bait_intervals \\ - --TARGET_INTERVALS $target_intervals \\ + --BAIT_INTERVALS $bait_interval_list \\ + --TARGET_INTERVALS $target_interval_list \\ --INPUT $bam \\ --OUTPUT ${prefix}.CollectHsMetrics.coverage_metrics diff --git a/modules/nf-core/picard/collecthsmetrics/meta.yml b/modules/nf-core/picard/collecthsmetrics/meta.yml index 7e045c05..fecad0e5 100644 --- a/modules/nf-core/picard/collecthsmetrics/meta.yml +++ b/modules/nf-core/picard/collecthsmetrics/meta.yml @@ -32,6 +32,14 @@ input: type: file description: Optional aligned BAM/CRAM/SAM file index pattern: "*.{bai,crai,sai}" + - bait_intervals: + type: file + description: An interval file that contains the locations of the baits used. + pattern: "*.{interval_list,bed,bed.gz}" + - target_intervals: + type: file + description: An interval file that contains the locations of the targets. + pattern: "*.{interval_list,bed,bed.gz}" - meta2: type: map description: | @@ -52,15 +60,15 @@ input: type: file description: Index of FASTA file. Only needed when fasta is supplied. pattern: "*.fai" - - bait_intervals: - type: file - description: An interval list file that contains the locations of the baits used. - pattern: "baits.interval_list" - - target_intervals: + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: type: file - description: An interval list file that contains the locations of the targets. - pattern: "targets.interval_list" - + description: Sequence dictionary of FASTA file. Only needed when bed interval lists are supplied. + pattern: "*.dict" output: - meta: type: map diff --git a/modules/nf-core/picard/collectmultiplemetrics/main.nf b/modules/nf-core/picard/collectmultiplemetrics/main.nf index ed88dbe7..91fe9170 100644 --- a/modules/nf-core/picard/collectmultiplemetrics/main.nf +++ b/modules/nf-core/picard/collectmultiplemetrics/main.nf @@ -5,7 +5,7 @@ process PICARD_COLLECTMULTIPLEMETRICS { conda "bioconda::picard=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : - 'quay.io/biocontainers/picard:3.0.0--hdfd78af_1' }" + 'biocontainers/picard:3.0.0--hdfd78af_1' }" input: tuple val(meta) , path(bam), path(bai) diff --git a/modules/nf-core/picard/collectwgsmetrics/main.nf b/modules/nf-core/picard/collectwgsmetrics/main.nf index 5d2a39b8..1d59334c 100644 --- a/modules/nf-core/picard/collectwgsmetrics/main.nf +++ b/modules/nf-core/picard/collectwgsmetrics/main.nf @@ -5,12 +5,12 @@ process PICARD_COLLECTWGSMETRICS { conda "bioconda::picard=3.0.0 r::r-base" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : - 'quay.io/biocontainers/picard:3.0.0--hdfd78af_1' }" + 'biocontainers/picard:3.0.0--hdfd78af_1' }" input: tuple val(meta), path(bam), path(bai) tuple val(meta2), path(fasta) - tuple val(meta2), path(fai) + tuple val(meta3), path(fai) path intervallist output: diff --git a/modules/nf-core/picard/collectwgsmetrics/meta.yml b/modules/nf-core/picard/collectwgsmetrics/meta.yml index 2f8dbd3c..19906f08 100644 --- a/modules/nf-core/picard/collectwgsmetrics/meta.yml +++ b/modules/nf-core/picard/collectwgsmetrics/meta.yml @@ -37,7 +37,7 @@ input: type: file description: Genome fasta file pattern: "*.{fa,fasta,fna}" - - meta2: + - meta3: type: map description: | Groovy Map containing reference information @@ -67,3 +67,4 @@ authors: - "@drpatelh" - "@flowuenne" - "@lassefolkersen" + - "@ramprasadn" diff --git a/modules/nf-core/picard/liftovervcf/main.nf b/modules/nf-core/picard/liftovervcf/main.nf index 935fb544..bfb004f7 100644 --- a/modules/nf-core/picard/liftovervcf/main.nf +++ b/modules/nf-core/picard/liftovervcf/main.nf @@ -5,13 +5,13 @@ process PICARD_LIFTOVERVCF { conda "bioconda::picard=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : - 'quay.io/biocontainers/picard:3.0.0--hdfd78af_1' }" + 'biocontainers/picard:3.0.0--hdfd78af_1' }" input: tuple val(meta), path(input_vcf) - path dict - path chain - path fasta + tuple val(meta2), path(dict) + tuple val(meta3), path(fasta) + tuple val(meta4), path(chain) output: tuple val(meta), path("*.lifted.vcf.gz") , emit: vcf_lifted diff --git a/modules/nf-core/picard/liftovervcf/meta.yml b/modules/nf-core/picard/liftovervcf/meta.yml index be42aba4..6023e5f9 100644 --- a/modules/nf-core/picard/liftovervcf/meta.yml +++ b/modules/nf-core/picard/liftovervcf/meta.yml @@ -1,8 +1,9 @@ name: picard_liftovervcf -description: convert between genome builds +description: Lifts over a VCF file from one reference build to another. keywords: - - liftOver + - vcf - picard + - liftovervcf tools: - picard: description: Move annotations from one assembly to another @@ -21,17 +22,32 @@ input: type: file description: VCF file pattern: "*.{vcf,vcf.gz}" - - chain: - type: file - description: The liftover chain file + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: type: file description: fasta file pattern: "*.fasta" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - dict: type: file description: dictionary for fasta file pattern: "*.{dict}" + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - chain: + type: file + description: The liftover chain file output: - meta: @@ -53,3 +69,4 @@ output: authors: - "@lucpen" + - "@ramprasadn" diff --git a/modules/nf-core/picard/markduplicates/main.nf b/modules/nf-core/picard/markduplicates/main.nf index 1fe6ee2d..facd7efb 100644 --- a/modules/nf-core/picard/markduplicates/main.nf +++ b/modules/nf-core/picard/markduplicates/main.nf @@ -5,12 +5,12 @@ process PICARD_MARKDUPLICATES { conda "bioconda::picard=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : - 'quay.io/biocontainers/picard:3.0.0--hdfd78af_1' }" + 'biocontainers/picard:3.0.0--hdfd78af_1' }" input: tuple val(meta), path(bam) - path fasta - path fai + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) output: tuple val(meta), path("*.bam") , emit: bam diff --git a/modules/nf-core/picard/markduplicates/meta.yml b/modules/nf-core/picard/markduplicates/meta.yml index 3f2357bb..f7693d2f 100644 --- a/modules/nf-core/picard/markduplicates/meta.yml +++ b/modules/nf-core/picard/markduplicates/meta.yml @@ -25,10 +25,20 @@ input: type: file description: BAM file pattern: "*.{bam,cram,sam}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: type: file description: Reference genome fasta file pattern: "*.{fasta,fa}" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fai: type: file description: Reference genome fasta index @@ -58,3 +68,4 @@ output: authors: - "@drpatelh" - "@projectoriented" + - "@ramprasadn" diff --git a/modules/nf-core/picard/renamesampleinvcf/main.nf b/modules/nf-core/picard/renamesampleinvcf/main.nf index 646a953d..75dd64b7 100644 --- a/modules/nf-core/picard/renamesampleinvcf/main.nf +++ b/modules/nf-core/picard/renamesampleinvcf/main.nf @@ -6,7 +6,7 @@ process PICARD_RENAMESAMPLEINVCF { conda "bioconda::picard=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : - 'quay.io/biocontainers/picard:3.0.0--hdfd78af_1' }" + 'biocontainers/picard:3.0.0--hdfd78af_1' }" input: tuple val(meta), path(vcf) diff --git a/modules/nf-core/picard/sortvcf/main.nf b/modules/nf-core/picard/sortvcf/main.nf index 0269b0f4..b8b1f833 100644 --- a/modules/nf-core/picard/sortvcf/main.nf +++ b/modules/nf-core/picard/sortvcf/main.nf @@ -5,12 +5,12 @@ process PICARD_SORTVCF { conda "bioconda::picard=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : - 'quay.io/biocontainers/picard:3.0.0--hdfd78af_1' }" + 'biocontainers/picard:3.0.0--hdfd78af_1' }" input: tuple val(meta), path(vcf) - path reference - path sequence_dict + tuple val(meta2), path(fasta) + tuple val(meta3), path(dict) output: tuple val(meta), path("*_sorted.vcf.gz"), emit: vcf @@ -22,8 +22,8 @@ process PICARD_SORTVCF { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def seq_dict = sequence_dict ? "--SEQUENCE_DICTIONARY $sequence_dict" : "" - def reference = reference ? "--REFERENCE_SEQUENCE $reference" : "" + def seq_dict = dict ? "--SEQUENCE_DICTIONARY $dict" : "" + def reference = fasta ? "--REFERENCE_SEQUENCE $fasta" : "" def avail_mem = 3072 if (!task.memory) { log.info '[Picard SortVcf] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' diff --git a/modules/nf-core/picard/sortvcf/meta.yml b/modules/nf-core/picard/sortvcf/meta.yml index f75d3401..0a19784d 100644 --- a/modules/nf-core/picard/sortvcf/meta.yml +++ b/modules/nf-core/picard/sortvcf/meta.yml @@ -3,6 +3,7 @@ description: Sorts vcf files keywords: - sort - vcf + - sortvcf tools: - picard: description: Java tools for working with NGS data in the BAM/CRAM/SAM and VCF format @@ -20,6 +21,24 @@ input: type: file description: VCF file pattern: "*.{vcf,vcf.gz}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome fasta file + pattern: "*.{fasta,fa}" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - dict: + type: file + description: Reference genome dictionary file + pattern: "*.{dict}" output: - meta: diff --git a/modules/nf-core/qualimap/bamqc/main.nf b/modules/nf-core/qualimap/bamqc/main.nf index 810cf402..fef7307a 100644 --- a/modules/nf-core/qualimap/bamqc/main.nf +++ b/modules/nf-core/qualimap/bamqc/main.nf @@ -5,7 +5,7 @@ process QUALIMAP_BAMQC { conda "bioconda::qualimap=2.2.2d" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/qualimap:2.2.2d--1' : - 'quay.io/biocontainers/qualimap:2.2.2d--1' }" + 'biocontainers/qualimap:2.2.2d--1' }" input: tuple val(meta), path(bam) @@ -34,7 +34,7 @@ process QUALIMAP_BAMQC { } """ unset DISPLAY - mkdir tmp + mkdir -p tmp export _JAVA_OPTIONS=-Djava.io.tmpdir=./tmp qualimap \\ --java-mem-size=$memory \\ diff --git a/modules/nf-core/rhocall/annotate/main.nf b/modules/nf-core/rhocall/annotate/main.nf index 155a9d69..a55578ab 100644 --- a/modules/nf-core/rhocall/annotate/main.nf +++ b/modules/nf-core/rhocall/annotate/main.nf @@ -5,11 +5,11 @@ process RHOCALL_ANNOTATE { conda "bioconda::rhocall=0.5.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/rhocall:0.5.1--py39hbf8eff0_0': - 'quay.io/biocontainers/rhocall:0.5.1--py39hbf8eff0_0' }" + 'biocontainers/rhocall:0.5.1--py39hbf8eff0_0' }" input: tuple val(meta), path(vcf), path(tbi) - tuple val(meta), path(roh) + tuple val(meta2), path(roh) path bed output: diff --git a/modules/nf-core/rhocall/annotate/meta.yml b/modules/nf-core/rhocall/annotate/meta.yml index 618d0017..96e10d96 100644 --- a/modules/nf-core/rhocall/annotate/meta.yml +++ b/modules/nf-core/rhocall/annotate/meta.yml @@ -3,6 +3,7 @@ description: "Markup VCF file using rho-calls." keywords: - roh - rhocall + - runs_of_homozygosity tools: - "rhocall": description: "Call regions of homozygosity and make tentative UPD calls." @@ -18,6 +19,11 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] + - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - vcf: type: file description: vcf file diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf index ce6580d2..59ed3088 100644 --- a/modules/nf-core/samtools/faidx/main.nf +++ b/modules/nf-core/samtools/faidx/main.nf @@ -2,18 +2,20 @@ process SAMTOOLS_FAIDX { tag "$fasta" label 'process_single' - conda "bioconda::samtools=1.16.1" + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(fasta) + tuple val(meta2), path(fai) output: - tuple val(meta), path ("*.fai"), emit: fai - tuple val(meta), path ("*.gzi"), emit: gzi, optional: true - path "versions.yml" , emit: versions + tuple val(meta), path ("*.{fa,fasta}") , emit: fa , optional: true + tuple val(meta), path ("*.fai") , emit: fai, optional: true + tuple val(meta), path ("*.gzi") , emit: gzi, optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -23,8 +25,8 @@ process SAMTOOLS_FAIDX { """ samtools \\ faidx \\ - $args \\ - $fasta + $fasta \\ + $args cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -33,8 +35,12 @@ process SAMTOOLS_FAIDX { """ stub: + def match = (task.ext.args =~ /-o(?:utput)?\s(.*)\s?/).findAll() + def fastacmd = match[0] ? "touch ${match[0][1]}" : '' """ + ${fastacmd} touch ${fasta}.fai + cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml index fe2fe9a1..957b25e5 100644 --- a/modules/nf-core/samtools/faidx/meta.yml +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -3,6 +3,7 @@ description: Index FASTA file keywords: - index - fasta + - faidx tools: - samtools: description: | @@ -17,12 +18,21 @@ input: - meta: type: map description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] + Groovy Map containing reference information + e.g. [ id:'test' ] - fasta: type: file description: FASTA file pattern: "*.{fa,fasta}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" output: - meta: type: map diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf index 8b95687a..0b20aa4b 100644 --- a/modules/nf-core/samtools/index/main.nf +++ b/modules/nf-core/samtools/index/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_INDEX { tag "$meta.id" label 'process_low' - conda "bioconda::samtools=1.16.1" + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(input) diff --git a/modules/nf-core/samtools/merge/main.nf b/modules/nf-core/samtools/merge/main.nf index a80ff3a2..b73b7cb2 100644 --- a/modules/nf-core/samtools/merge/main.nf +++ b/modules/nf-core/samtools/merge/main.nf @@ -2,15 +2,15 @@ process SAMTOOLS_MERGE { tag "$meta.id" label 'process_low' - conda "bioconda::samtools=1.16.1" + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(input_files, stageAs: "?/*") - path fasta - path fai + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) output: tuple val(meta), path("${prefix}.bam") , optional:true, emit: bam diff --git a/modules/nf-core/samtools/merge/meta.yml b/modules/nf-core/samtools/merge/meta.yml index 644b768b..3a815f74 100644 --- a/modules/nf-core/samtools/merge/meta.yml +++ b/modules/nf-core/samtools/merge/meta.yml @@ -25,13 +25,23 @@ input: type: file description: BAM/CRAM file pattern: "*.{bam,cram,sam}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: - type: optional file - description: Reference file the CRAM was created with + type: file + description: Reference file the CRAM was created with (optional) pattern: "*.{fasta,fa}" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fai: - type: optional file - description: Index of the reference file the CRAM was created with + type: file + description: Index of the reference file the CRAM was created with (optional) pattern: "*.fai" output: - meta: @@ -60,3 +70,4 @@ authors: - "@yuukiiwa " - "@maxulysse" - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf index 84c167cd..2b7753fd 100644 --- a/modules/nf-core/samtools/sort/main.nf +++ b/modules/nf-core/samtools/sort/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_SORT { tag "$meta.id" label 'process_medium' - conda "bioconda::samtools=1.16.1" + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(bam) @@ -23,7 +23,13 @@ process SAMTOOLS_SORT { def prefix = task.ext.prefix ?: "${meta.id}" if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ - samtools sort $args -@ $task.cpus -o ${prefix}.bam -T $prefix $bam + samtools sort \\ + $args \\ + -@ $task.cpus \\ + -o ${prefix}.bam \\ + -T $prefix \\ + $bam + cat <<-END_VERSIONS > versions.yml "${task.process}": samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') diff --git a/modules/nf-core/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf index 0a2a3640..4a2607de 100644 --- a/modules/nf-core/samtools/stats/main.nf +++ b/modules/nf-core/samtools/stats/main.nf @@ -2,14 +2,14 @@ process SAMTOOLS_STATS { tag "$meta.id" label 'process_single' - conda "bioconda::samtools=1.16.1" + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(input), path(input_index) - path fasta + tuple val(meta2), path(fasta) output: tuple val(meta), path("*.stats"), emit: stats diff --git a/modules/nf-core/samtools/stats/meta.yml b/modules/nf-core/samtools/stats/meta.yml index 1d68a5d8..90e6345f 100644 --- a/modules/nf-core/samtools/stats/meta.yml +++ b/modules/nf-core/samtools/stats/meta.yml @@ -30,9 +30,14 @@ input: type: file description: BAI/CRAI file from alignment pattern: "*.{bai,crai}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: - type: optional file - description: Reference file the CRAM was created with + type: file + description: Reference file the CRAM was created with (optional) pattern: "*.{fasta,fa}" output: - meta: @@ -51,3 +56,4 @@ output: authors: - "@drpatelh" - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/modules/nf-core/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf new file mode 100644 index 00000000..cb91facf --- /dev/null +++ b/modules/nf-core/samtools/view/main.nf @@ -0,0 +1,66 @@ +process SAMTOOLS_VIEW { + tag "$meta.id" + label 'process_low' + + conda "bioconda::samtools=1.17" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" + + input: + tuple val(meta), path(input), path(index) + tuple val(meta2), path(fasta) + path qname + + output: + tuple val(meta), path("*.bam"), emit: bam, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + tuple val(meta), path("*.sam"), emit: sam, optional: true + tuple val(meta), path("*.bai"), emit: bai, optional: true + tuple val(meta), path("*.csi"), emit: csi, optional: true + tuple val(meta), path("*.crai"), emit: crai, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + def readnames = qname ? "--qname-file ${qname}": "" + def file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() + if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + samtools \\ + view \\ + --threads ${task.cpus-1} \\ + ${reference} \\ + ${readnames} \\ + $args \\ + -o ${prefix}.${file_type} \\ + $input \\ + $args2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + touch ${prefix}.cram + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/view/meta.yml b/modules/nf-core/samtools/view/meta.yml new file mode 100644 index 00000000..3b05450b --- /dev/null +++ b/modules/nf-core/samtools/view/meta.yml @@ -0,0 +1,84 @@ +name: samtools_view +description: filter/convert SAM/BAM/CRAM file +keywords: + - view + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - index: + type: file + description: BAM.BAI/BAM.CSI/CRAM.CRAI file (optional) + pattern: "*.{.bai,.csi,.crai}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" + - qname: + type: file + description: Optional file with read names to output only select alignments + pattern: "*.{txt,list}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: optional filtered/converted BAM file + pattern: "*.{bam}" + - cram: + type: file + description: optional filtered/converted CRAM file + pattern: "*.{cram}" + - sam: + type: file + description: optional filtered/converted SAM file + pattern: "*.{sam}" + # bai, csi, and crai are created with `--write-index` + - bai: + type: file + description: optional BAM file index + pattern: "*.{bai}" + - csi: + type: file + description: optional tabix BAM file index + pattern: "*.{csi}" + - crai: + type: file + description: optional CRAM file index + pattern: "*.{crai}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@joseespinosa" + - "@FriederikeHanssen" + - "@priyanka-surana" diff --git a/modules/nf-core/smncopynumbercaller/main.nf b/modules/nf-core/smncopynumbercaller/main.nf index e746f9fc..15abea2c 100644 --- a/modules/nf-core/smncopynumbercaller/main.nf +++ b/modules/nf-core/smncopynumbercaller/main.nf @@ -5,7 +5,7 @@ process SMNCOPYNUMBERCALLER { conda "bioconda::smncopynumbercaller=1.1.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/smncopynumbercaller:1.1.2--py310h7cba7a3_0' : - 'quay.io/biocontainers/smncopynumbercaller:1.1.2--py310h7cba7a3_0' }" + 'biocontainers/smncopynumbercaller:1.1.2--py310h7cba7a3_0' }" input: tuple val(meta), path(bam), path(bai) diff --git a/modules/nf-core/stranger/main.nf b/modules/nf-core/stranger/main.nf index 582e389e..0c8a3412 100644 --- a/modules/nf-core/stranger/main.nf +++ b/modules/nf-core/stranger/main.nf @@ -5,11 +5,11 @@ process STRANGER { conda "bioconda::stranger=0.8.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/stranger:0.8.1--pyh5e36f6f_0': - 'quay.io/biocontainers/stranger:0.8.1--pyh5e36f6f_0' }" + 'biocontainers/stranger:0.8.1--pyh5e36f6f_0' }" input: tuple val(meta), path(vcf) - path variant_catalog + tuple val(meta2), path(variant_catalog) output: tuple val(meta), path("*.gz"), emit: vcf diff --git a/modules/nf-core/stranger/meta.yml b/modules/nf-core/stranger/meta.yml index 0707d806..eefad398 100644 --- a/modules/nf-core/stranger/meta.yml +++ b/modules/nf-core/stranger/meta.yml @@ -24,6 +24,11 @@ input: type: file description: VCF with repeat expansions pattern: "*.{vcf.gz,vcf}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] - variant_catalog: type: file description: json file with repeat expansion sites to genotype diff --git a/modules/nf-core/svdb/merge/main.nf b/modules/nf-core/svdb/merge/main.nf index 8544daff..0bd94499 100644 --- a/modules/nf-core/svdb/merge/main.nf +++ b/modules/nf-core/svdb/merge/main.nf @@ -4,7 +4,7 @@ process SVDB_MERGE { conda "bioconda::svdb=2.8.1 bioconda::samtools=1.16.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-c8daa8f9d69d3c5a1a4ff08283a166c18edb0000:af6f8534cd538a85ff43a2eae1b52b143e7abd05-0': - 'quay.io/biocontainers/mulled-v2-c8daa8f9d69d3c5a1a4ff08283a166c18edb0000:af6f8534cd538a85ff43a2eae1b52b143e7abd05-0' }" + 'biocontainers/mulled-v2-c8daa8f9d69d3c5a1a4ff08283a166c18edb0000:af6f8534cd538a85ff43a2eae1b52b143e7abd05-0' }" input: tuple val(meta), path(vcfs) diff --git a/modules/nf-core/svdb/merge/meta.yml b/modules/nf-core/svdb/merge/meta.yml index c71279e8..92a5a128 100644 --- a/modules/nf-core/svdb/merge/meta.yml +++ b/modules/nf-core/svdb/merge/meta.yml @@ -2,6 +2,8 @@ name: svdb_merge description: The merge module merges structural variants within one or more vcf files. keywords: - structural variants + - vcf + - merge tools: - svdb: description: structural variant database software diff --git a/modules/nf-core/svdb/query/main.nf b/modules/nf-core/svdb/query/main.nf index f5e7da0f..5dbd42ec 100644 --- a/modules/nf-core/svdb/query/main.nf +++ b/modules/nf-core/svdb/query/main.nf @@ -5,7 +5,7 @@ process SVDB_QUERY { conda "bioconda::svdb=2.8.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/svdb:2.8.1--py39h5371cbf_0': - 'quay.io/biocontainers/svdb:2.8.1--py39h5371cbf_0' }" + 'biocontainers/svdb:2.8.1--py39h5371cbf_0' }" input: tuple val(meta), path(vcf) diff --git a/modules/nf-core/tabix/bgziptabix/main.nf b/modules/nf-core/tabix/bgziptabix/main.nf index d3a3bbff..d6c5a760 100644 --- a/modules/nf-core/tabix/bgziptabix/main.nf +++ b/modules/nf-core/tabix/bgziptabix/main.nf @@ -5,13 +5,14 @@ process TABIX_BGZIPTABIX { conda "bioconda::tabix=1.11" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/tabix:1.11--hdfd78af_0' : - 'quay.io/biocontainers/tabix:1.11--hdfd78af_0' }" + 'biocontainers/tabix:1.11--hdfd78af_0' }" input: tuple val(meta), path(input) output: - tuple val(meta), path("*.gz"), path("*.tbi"), emit: gz_tbi + tuple val(meta), path("*.gz"), path("*.tbi"), optional: true, emit: gz_tbi + tuple val(meta), path("*.gz"), path("*.csi"), optional: true, emit: gz_csi path "versions.yml" , emit: versions when: @@ -34,8 +35,9 @@ process TABIX_BGZIPTABIX { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.gz - touch ${prefix}.gz.tbi + touch ${prefix}.${input.getExtension()}.gz + touch ${prefix}.${input.getExtension()}.gz.tbi + touch ${prefix}.${input.getExtension()}.gz.csi cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/tabix/bgziptabix/meta.yml b/modules/nf-core/tabix/bgziptabix/meta.yml index 49c03289..2761e271 100644 --- a/modules/nf-core/tabix/bgziptabix/meta.yml +++ b/modules/nf-core/tabix/bgziptabix/meta.yml @@ -37,9 +37,14 @@ output: type: file description: tabix index file pattern: "*.{gz.tbi}" + - csi: + type: file + description: tabix alternate index file + pattern: "*.{gz.csi}" - versions: type: file description: File containing software versions pattern: "versions.yml" authors: - "@maxulysse" + - "@DLBPointon" diff --git a/modules/nf-core/tabix/tabix/main.nf b/modules/nf-core/tabix/tabix/main.nf index 9a404db9..5bf332ef 100644 --- a/modules/nf-core/tabix/tabix/main.nf +++ b/modules/nf-core/tabix/tabix/main.nf @@ -5,7 +5,7 @@ process TABIX_TABIX { conda "bioconda::tabix=1.11" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/tabix:1.11--hdfd78af_0' : - 'quay.io/biocontainers/tabix:1.11--hdfd78af_0' }" + 'biocontainers/tabix:1.11--hdfd78af_0' }" input: tuple val(meta), path(tab) diff --git a/modules/nf-core/tiddit/cov/main.nf b/modules/nf-core/tiddit/cov/main.nf index dec79332..647f35a2 100644 --- a/modules/nf-core/tiddit/cov/main.nf +++ b/modules/nf-core/tiddit/cov/main.nf @@ -5,11 +5,11 @@ process TIDDIT_COV { conda "bioconda::tiddit=3.3.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/tiddit:3.3.2--py310hc2b7f4b_0' : - 'quay.io/biocontainers/tiddit:3.3.2--py310hc2b7f4b_0' }" + 'biocontainers/tiddit:3.3.2--py310hc2b7f4b_0' }" input: tuple val(meta), path(input) - path fasta + tuple val(meta2), path(fasta) output: tuple val(meta), path("*.bed"), optional: true, emit: cov diff --git a/modules/nf-core/tiddit/cov/meta.yml b/modules/nf-core/tiddit/cov/meta.yml index 98ea27c2..10171166 100644 --- a/modules/nf-core/tiddit/cov/meta.yml +++ b/modules/nf-core/tiddit/cov/meta.yml @@ -23,6 +23,11 @@ input: type: file description: BAM/CRAM file pattern: "*.{bam,cram}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: type: file description: | @@ -50,3 +55,4 @@ output: pattern: "versions.yml" authors: - "@projectoriented" + - "@ramprasadn" diff --git a/modules/nf-core/tiddit/sv/main.nf b/modules/nf-core/tiddit/sv/main.nf index 27b6450a..1ebc8565 100644 --- a/modules/nf-core/tiddit/sv/main.nf +++ b/modules/nf-core/tiddit/sv/main.nf @@ -5,7 +5,7 @@ process TIDDIT_SV { conda "bioconda::tiddit=3.3.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/tiddit:3.3.2--py310hc2b7f4b_0' : - 'quay.io/biocontainers/tiddit:3.3.2--py310hc2b7f4b_0' }" + 'biocontainers/tiddit:3.3.2--py310hc2b7f4b_0' }" input: tuple val(meta), path(input), path(input_index) diff --git a/modules/nf-core/ucsc/wigtobigwig/main.nf b/modules/nf-core/ucsc/wigtobigwig/main.nf index 7626159e..c5f215b1 100644 --- a/modules/nf-core/ucsc/wigtobigwig/main.nf +++ b/modules/nf-core/ucsc/wigtobigwig/main.nf @@ -3,10 +3,10 @@ process UCSC_WIGTOBIGWIG { label 'process_single' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - conda "bioconda::ucsc-wigtobigwig=377" + conda "bioconda::ucsc-wigtobigwig=447" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ucsc-wigtobigwig:377--h0b8a92a_2' : - 'quay.io/biocontainers/ucsc-wigtobigwig:377--h0b8a92a_2' }" + 'https://depot.galaxyproject.org/singularity/ucsc-wigtobigwig:447--h2a80c09_1' : + 'biocontainers/ucsc-wigtobigwig:447--h2a80c09_1' }" input: tuple val(meta), path(wig) @@ -22,7 +22,7 @@ process UCSC_WIGTOBIGWIG { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = '377' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def VERSION = '447' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ wigToBigWig \\ $args \\ @@ -38,7 +38,7 @@ process UCSC_WIGTOBIGWIG { stub: def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = '377' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def VERSION = '447' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ touch ${prefix}.bw diff --git a/modules/nf-core/ucsc/wigtobigwig/meta.yml b/modules/nf-core/ucsc/wigtobigwig/meta.yml index a597fde0..470967db 100644 --- a/modules/nf-core/ucsc/wigtobigwig/meta.yml +++ b/modules/nf-core/ucsc/wigtobigwig/meta.yml @@ -3,6 +3,7 @@ description: Convert ascii format wig file to binary big wig format keywords: - wig - bigwig + - ucsc tools: - ucsc: description: | diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf index 67f497ee..8cd1856c 100644 --- a/modules/nf-core/untar/main.nf +++ b/modules/nf-core/untar/main.nf @@ -5,7 +5,7 @@ process UNTAR { conda "conda-forge::sed=4.7 bioconda::grep=3.4 conda-forge::tar=1.34" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'nf-core/ubuntu:20.04' }" input: tuple val(meta), path(archive) diff --git a/modules/nf-core/upd/main.nf b/modules/nf-core/upd/main.nf new file mode 100644 index 00000000..a73fcd45 --- /dev/null +++ b/modules/nf-core/upd/main.nf @@ -0,0 +1,47 @@ + +process UPD { + tag "$meta.id" + label 'process_single' + + conda "bioconda::upd=0.1.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/upd:0.1.1--pyhdfd78af_0': + 'biocontainers/upd:0.1.1--pyhdfd78af_0' }" + + input: + tuple val(meta), path(vcf) + + output: + tuple val(meta), path("*.bed"), emit: bed + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + upd \\ + --vcf $vcf \\ + $args \\ + | sort -k 1,1 -k 2,2n >${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + upd: \$( upd --version 2>&1 | sed 's/upd, version //' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + upd: \$( upd --version 2>&1 | sed 's/upd, version //' ) + END_VERSIONS + """ + +} diff --git a/modules/nf-core/upd/meta.yml b/modules/nf-core/upd/meta.yml new file mode 100644 index 00000000..f99ca01f --- /dev/null +++ b/modules/nf-core/upd/meta.yml @@ -0,0 +1,42 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: "upd" +description: Simple software to call UPD regions from germline exome/wgs trios. +keywords: + - upd + - uniparental + - disomy +tools: + - "upd": + description: "Simple software to call UPD regions from germline exome/wgs trios." + homepage: https://github.com/bjhall/upd + documentation: https://github.com/bjhall/upd + tool_dev_url: https://github.com/bjhall/upd + licence: "['MIT']" +input: + # Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: VCF file + pattern: "*.{vcf,vcf.gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bed: + type: file + description: BED file + pattern: "*.{bed}" +authors: + - "@hrydbeck" diff --git a/modules/nf-core/vcfanno/main.nf b/modules/nf-core/vcfanno/main.nf index 2d5d162a..a2a078da 100644 --- a/modules/nf-core/vcfanno/main.nf +++ b/modules/nf-core/vcfanno/main.nf @@ -5,10 +5,10 @@ process VCFANNO { conda "bioconda::vcfanno=0.3.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/vcfanno:0.3.3--h9ee0642_0': - 'quay.io/biocontainers/vcfanno:0.3.3--h9ee0642_0' }" + 'biocontainers/vcfanno:0.3.3--h9ee0642_0' }" input: - tuple val(meta), path(vcf), path(tbi) + tuple val(meta), path(vcf), path(tbi), path(specific_resources) path toml path lua path resources @@ -26,11 +26,11 @@ process VCFANNO { def lua_cmd = lua ? "--lua ${lua}" : "" """ vcfanno \\ - -p $task.cpus \\ - $args \\ - $lua_cmd \\ - $toml \\ - $vcf \\ + -p ${task.cpus} \\ + ${args} \\ + ${lua_cmd} \\ + ${toml} \\ + ${vcf} \\ > ${prefix}.vcf cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/vcfanno/meta.yml b/modules/nf-core/vcfanno/meta.yml index 9e6c1d72..86fea0c4 100644 --- a/modules/nf-core/vcfanno/meta.yml +++ b/modules/nf-core/vcfanno/meta.yml @@ -30,6 +30,9 @@ input: type: file description: tabix index of query VCF - only needed if vcf is compressed pattern: "*.vcf.gz.tbi" + - specific_resources: + type: map + description: A list of sample specific reference files defined in toml config, must also include indices if bgzipped. - toml: type: file description: configuration file with reference file basenames @@ -39,8 +42,8 @@ input: description: Lua file for custom annotations pattern: "*.lua" - resources: - type: list - description: List of reference files defined in toml config, must also include indices. + type: map + description: List of reference files defined in toml config, must also include indices if bgzipped. output: - meta: diff --git a/nextflow.config b/nextflow.config index a653f2e8..9faa8055 100644 --- a/nextflow.config +++ b/nextflow.config @@ -23,10 +23,13 @@ params { // Main options analysis_type = 'wgs' bait_padding = 100 + save_mapped_as_cram = false + skip_cnv_calling = false skip_snv_annotation = false skip_sv_annotation = false skip_mt_analysis = false gens_switch = false + cadd_resources = null platform = 'illumina' // Alignment @@ -52,7 +55,6 @@ params { // Boilerplate options outdir = 'results' - tracedir = "${params.outdir}/pipeline_info" publish_dir_mode = 'copy' email = null email_on_fail = null @@ -61,20 +63,14 @@ params { hook_url = null help = false version = false - validate_params = true - show_hidden_params = false - schema_ignore_params = 'genomes' - // Config options + config_profile_name = null + config_profile_description = null custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_description = null config_profile_contact = null config_profile_url = null - config_profile_name = null - - local_config_path = null // Max resource options // Defaults only, expecting to be overwritten @@ -82,6 +78,13 @@ params { max_cpus = 16 max_time = '240.h' + // Schema validation default options + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = 'genomes' + validationShowHiddenParams = false + validate_params = true + } // Load base.config by default for all pipelines @@ -101,13 +104,11 @@ try { // } catch (Exception e) { // System.err.println("WARNING: Could not load nf-core/config/raredisease profiles: ${params.custom_config_base}/pipeline/raredisease.config") // } - - profiles { debug { dumpHashes = true process.beforeScript = 'echo $HOSTNAME' - cleanup = false + cleanup = false } conda { conda.enabled = true @@ -130,7 +131,6 @@ profiles { } docker { docker.enabled = true - docker.registry = 'quay.io' docker.userEmulation = true conda.enabled = false singularity.enabled = false @@ -154,7 +154,6 @@ profiles { } podman { podman.enabled = true - podman.registry = 'quay.io' conda.enabled = false docker.enabled = false singularity.enabled = false @@ -199,7 +198,20 @@ profiles { test_one_sample { includeConfig 'conf/test_one_sample.config' } } -// Load igenomes.config if required, else load custom genomes.config +// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' + +// Nextflow plugins +plugins { + id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet +} + +// Load igenomes.config if required if (!params.igenomes_ignore) { includeConfig 'conf/igenomes.config' } else { @@ -213,10 +225,6 @@ try { System.err.println("WARNING: Could not load nf-core/config/raredisease profiles: ${params.custom_config_base}/pipeline/raredisease.config") } -// Load in-house config -if (params.local_config_path) { - includeConfig "${params.local_config_path}" -} // Load wes.config if --analysis_type='wes' if (params.analysis_type == 'wes') { @@ -241,19 +249,19 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" } dag { enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" } manifest { @@ -262,8 +270,8 @@ manifest { homePage = 'https://github.com/nf-core/raredisease' description = """call and score variants from WGS/WES of rare disease patients""" mainScript = 'main.nf' - nextflowVersion = '!>=22.10.1' - version = '1.0.0' + nextflowVersion = '!>=23.04.0' + version = '1.1.0' doi = '' } @@ -292,7 +300,9 @@ includeConfig 'conf/modules/prepare_references.config' includeConfig 'conf/modules/qc_bam.config' includeConfig 'conf/modules/rank_variants.config' includeConfig 'conf/modules/scatter_genome.config' +includeConfig 'conf/modules/annotate_cadd.config' includeConfig 'conf/modules/peddy_check.config' +includeConfig 'conf/modules/call_sv_germlinecnvcaller.config' // Function to ensure that resource requirements don't go beyond // a maximum limit diff --git a/nextflow_schema.json b/nextflow_schema.json index 3da8d7be..5367ada4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -15,9 +15,9 @@ "input": { "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", - "schema": "assets/schema_input.json", "description": "Path to comma-separated file containing information about the samples in the experiment.", "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/raredisease/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" @@ -33,17 +33,19 @@ "description": "Email address for completion summary.", "fa_icon": "fas fa-envelope", "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", - "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", + "hidden": true }, "multiqc_title": { "type": "string", "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", - "fa_icon": "fas fa-file-signature" + "fa_icon": "fas fa-file-signature", + "hidden": true } } }, - "reference_genome_options": { - "title": "Reference genome options", + "reference_file_options": { + "title": "Reference file options", "type": "object", "fa_icon": "fas fa-dna", "description": "Reference genome related files and options required for the workflow.", @@ -51,68 +53,98 @@ "bait_padding": { "type": "number", "default": 100, - "fa_icon": "fas fa-greater-than", + "fa_icon": "fas fa-align-center", "pattern": "^\\S+\\.bed(\\.gz)?$", - "description": "The amount to pad each end of the target intervals to create bait intervals.", - "hidden": true + "description": "The amount to pad each end of the target intervals to create bait intervals." }, - "genome": { + "bwa": { "type": "string", - "description": "Name of iGenomes reference.", - "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." + "exists": true, + "format": "directory-path", + "description": "Directory for pre-built bwa index.", + "help_text": "If none provided, will be generated automatically from the FASTA reference.", + "fa_icon": "fas fa-folder-open" + }, + "bwamem2": { + "type": "string", + "exists": true, + "format": "directory-path", + "description": "Directory for pre-built bwamem2 index.", + "help_text": "If none provided, will be generated automatically from the FASTA reference.", + "fa_icon": "fas fa-folder-open" + }, + "cadd_resources": { + "type": "string", + "exists": true, + "format": "directory-path", + "fa_icon": "fas fa-file", + "description": "Path to the directory containing cadd annotations.", + "help_text": "This folder contains the uncompressed files that would otherwise be in data/annotation folder as described in https://github.com/kircherlab/CADD-scripts/#manual-installation." + }, + "fai": { + "type": "string", + "exists": true, + "format": "file-path", + "help_text": "If none provided, will be generated automatically from the FASTA reference", + "description": "Path to FASTA genome index file.", + "pattern": "^\\S+\\.fn?a(sta)?\\.fai$", + "fa_icon": "fas fa-file" }, "fasta": { "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/plain", "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "description": "Path to FASTA genome file.", "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", - "fa_icon": "far fa-file-code" + "fa_icon": "fas fa-file" }, - "fai": { + "gcnvcaller_model": { "type": "string", + "exists": true, + "fa_icon": "fas fa-file", + "description": "A file containing the path to models produced by GATK4 GermlineCNVCaller cohort.", "format": "file-path", - "help_text": "If none provided, will be generated automatically from the FASTA reference", - "description": "Path to FASTA genome index file.", - "pattern": "^\\S+\\.fn?a(sta)?\\.fai$", - "fa_icon": "far fa-file-code", - "hidden": true + "help_text": "This model is required for generating a cnv calls when using GermlineCNVCaller." }, - "igenomes_base": { + "genome": { "type": "string", - "format": "directory-path", - "description": "Directory / URL base for iGenomes references.", - "default": "s3://ngi-igenomes/igenomes", - "fa_icon": "fas fa-cloud-download-alt", - "hidden": true + "description": "Name of iGenomes reference.", + "fa_icon": "fas fa-align-center", + "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details.", + "default": "GRCh38" }, - "igenomes_ignore": { - "type": "boolean", - "description": "Do not load the iGenomes reference config.", - "fa_icon": "fas fa-ban", - "hidden": true, - "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." + "gens_gnomad_pos": { + "type": "string", + "exists": true, + "format": "file-path", + "fa_icon": "fas fa-file", + "description": "Path to a list of common SNP locations for Gens.", + "help_text": "Locations of gnomad SNPs with a high enough BAF.", + "hidden": true }, - "bwa": { + "gens_interval_list": { "type": "string", - "format": "directory-path", - "description": "Directory for pre-built bwa index.", - "help_text": "If none provided, will be generated automatically from the FASTA reference.", - "fa_icon": "fas fa-folder-open", + "exists": true, + "format": "file-path", + "fa_icon": "fas fa-file", + "description": "Path to interval list for Gens.", + "help_text": "This file contains the binning intervals used for CollectReadCounts.", "hidden": true }, - "bwamem2": { + "gens_pon": { "type": "string", - "format": "directory-path", - "description": "Directory for pre-built bwamem2 index.", - "help_text": "If none provided, will be generated automatically from the FASTA reference.", - "fa_icon": "fas fa-folder-open", + "exists": true, + "format": "file-path", + "fa_icon": "fas fa-file", + "description": "Path to panel of normals for Gens.", + "help_text": "The panel used to run DenoiseReadCounts.", "hidden": true }, "gnomad_af": { "type": "string", + "exists": true, "format": "path", "fa_icon": "fas fa-file", "pattern": "^\\S+\\.tab(\\.gz)?$", @@ -121,14 +153,29 @@ }, "gnomad_af_idx": { "type": "string", + "exists": true, "format": "path", "fa_icon": "fas fa-file", "pattern": "^\\S+\\.bed(\\.gz)?\\.idx$", "description": "Path to the index file for the gnomad tab file with allele frequencies.", "help_text": "Path to the index of gnomad tab file with CHR/START/REF,ALT/AF" }, + "igenomes_base": { + "type": "string", + "format": "directory-path", + "description": "Directory / URL base for iGenomes references.", + "default": "s3://ngi-igenomes/igenomes", + "fa_icon": "fas fa-cloud-download-alt" + }, + "igenomes_ignore": { + "type": "boolean", + "description": "Do not load the iGenomes reference config.", + "fa_icon": "fas fa-ban", + "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." + }, "intervals_wgs": { "type": "string", + "exists": true, "format": "path", "fa_icon": "fas fa-file", "pattern": "^\\S+\\.intervals?(_list)?$", @@ -137,251 +184,173 @@ }, "intervals_y": { "type": "string", + "exists": true, "format": "path", "fa_icon": "fas fa-file", "pattern": "^\\S+\\.intervals?(_list)?$", "description": "Path to the interval list of the Y chromosome.", "help_text": "Path to the interval list of the Y chromosome. This is used to calculate coverage statistics for the Y chromosome." }, - "mito_name": { - "type": "string", - "format": "path", - "description": "Name of the mitochondrial contig in the reference fasta file", - "help_text": "Used to extract relevant information from the references to analyse mitochondria" - }, - "mt_intervals": { - "type": "string", - "format": "path", - "fa_icon": "fas fa-file", - "pattern": "^\\S+\\.intervals?(_list)?$", - "description": "Path to the interval list of the non control mitochondral region.", - "help_text": "Path to the interval list of the non control mitochondral regions for Mutect2" - }, - "mt_intervals_shift": { - "type": "string", - "format": "path", - "fa_icon": "fas fa-file", - "pattern": "^\\S+\\.intervals?(\\_list)?$", - "description": "Path to the interval list of the non control mitochondral region in shifted fasta.", - "help_text": "Path to the interval list of the non control mitochondral regions in shifted fasta for Mutect2" - }, "known_dbsnp": { "type": "string", + "exists": true, "format": "path", - "fa_icon": "fas fa-chart-bar", + "fa_icon": "fas fa-file", "pattern": "^\\S+\\.vcf(\\.gz)?$", - "description": "Path to known dbSNP file.", - "hidden": true + "description": "Path to known dbSNP file." }, "known_dbsnp_tbi": { "type": "string", + "exists": true, "format": "path", - "fa_icon": "fas fa-chart-bar", + "fa_icon": "fas fa-file", "pattern": "^\\S+\\.vcf(\\.gz)?\\.tbi$", - "description": "Path to known dbSNP file index.", - "hidden": true + "description": "Path to known dbSNP file index." }, - "known_indels": { + "local_genomes": { "type": "string", - "format": "path", - "fa_icon": "fas fa-chart-bar", - "pattern": "^\\S+\\.vcf(\\.gz)?$", - "description": "Path to known indels file.", - "hidden": true + "format": "directory-path", + "fa_icon": "fas fa-map-marker-alt", + "description": "Local directory base for genome references that map to the config.", + "help_text": "This folder is a flat structure with file names that map to the config." }, - "known_mills": { + "mito_name": { "type": "string", - "format": "path", - "fa_icon": "fas fa-chart-bar", - "pattern": "^\\S+\\.vcf(\\.gz)?$", - "description": "Path to known Mills file.", - "hidden": true + "description": "Name of the mitochondrial contig in the reference fasta file", + "help_text": "Used to extract relevant information from the references to analyse mitochondria", + "fa_icon": "fas fa-align-center" }, "ml_model": { "type": "string", + "exists": true, "format": "path", - "fa_icon": "fas fa-chart-bar", - "description": "Path to sentieon machine learning model file.", - "hidden": true + "fa_icon": "fas fa-file", + "description": "Path to sentieon machine learning model file." }, - "mt_fasta_shift": { + "mt_fasta": { "type": "string", + "exists": true, "format": "file-path", "mimetype": "text/plain", "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "description": "Path to mitochondrial FASTA genome file.", - "fa_icon": "far fa-file-code" - }, - "mt_fai_shift": { - "type": "string", - "format": "file-path", - "mimetype": "text/plain", - "pattern": "^\\S+\\.fn?a(sta)?\\.fai?$", - "description": "Path to mitochondrial FASTA genome index file.", - "fa_icon": "far fa-file-code" + "fa_icon": "fas fa-file" }, - "mt_bwa_index_shift": { + "ploidy_model": { "type": "string", - "format": "directory-path", - "description": "Directory for pre-built bwa index for shifted mitochondrial fasta (used for alignment with sentioen)", - "help_text": "If none provided, will be generated automatically from the FASTA reference.", + "exists": true, "fa_icon": "fas fa-folder-open", - "hidden": true - }, - "mt_bwamem2_index_shift": { - "type": "string", + "description": "Directory containing the ploidy model files", "format": "directory-path", - "description": "Directory for pre-built bwamem2 index for shifted mitochondrial fasta.", - "help_text": "If none provided, will be generated automatically from the FASTA reference.", - "fa_icon": "fas fa-folder-open", - "hidden": true + "help_text": "Produced in GATK4 DetermineGermlineContigPloidy cohort, this model is required for generating a cnv model when using GermlineCNVCaller." }, - "mt_backchain_shift": { + "readcount_intervals": { "type": "string", - "format": "directory-path", - "description": "Chain file describing the alignment between the mitochondrial shifted fasta and typical mitochondrial fasta", - "help_text": "For more information, check https://genome.ucsc.edu/goldenPath/help/chain.html", - "fa_icon": "fas fa-folder-open", - "hidden": true + "exists": true, + "fa_icon": "fas fa-file", + "description": "Interval list file containing the intervals over which read counts are tabulated for CNV calling", + "format": "file-path", + "help_text": "Generated by GATK4 preprocessintervals. If absent, pipeline can generate this file." }, "reduced_penetrance": { "type": "string", + "exists": true, "format": "path", - "fa_icon": "fas fa-chart-bar", - "description": "File with gene ids that have reduced penetrance. For use with genmod", - "hidden": true + "fa_icon": "fas fa-file-csv", + "description": "File with gene ids that have reduced penetrance. For use with genmod" + }, + "save_reference": { + "type": "boolean", + "description": "If generated by the pipeline save the required indices/references in the results directory.", + "help_text": "The saved references can be used for future pipeline runs, reducing processing times.", + "fa_icon": "fas fa-save" }, "score_config_snv": { "type": "string", + "exists": true, "format": "path", - "fa_icon": "fas fa-chart-bar", - "description": "SNV rank model config file for genmod.", - "hidden": true + "fa_icon": "fas fa-file", + "description": "SNV rank model config file for genmod." }, "score_config_sv": { "type": "string", + "exists": true, "format": "path", - "fa_icon": "fas fa-chart-bar", - "description": "SV rank model config file for genmod.", - "hidden": true + "fa_icon": "fas fa-file", + "description": "SV rank model config file for genmod." }, "sequence_dictionary": { "type": "string", + "exists": true, "format": "path", - "fa_icon": "fas fa-chart-bar", - "pattern": "^\\S+\\.dict$", - "description": "Genome dictionary file", - "hidden": true - }, - "mt_sequence_dictionary_shift": { - "type": "string", - "format": "path", - "fa_icon": "fas fa-chart-bar", + "fa_icon": "fas fa-file", "pattern": "^\\S+\\.dict$", - "description": "Shifted mitochondrial genome dictionary file", - "hidden": true + "description": "Path to the genome dictionary file" }, - "vep_filters": { + "svdb_query_dbs": { "type": "string", - "format": "path", - "fa_icon": "fas fa-chart-bar", - "description": "File containing HGNC_IDs of interest on separate lines.", - "hidden": true + "exists": true, + "format": "file-path", + "description": "Databases used for structural variant annotation.", + "fa_icon": "fas fa-file-csv", + "help_text": "Path to comma-separated file containing information about the databases used for structural variant annotation." }, "target_bed": { "type": "string", + "exists": true, "format": "path", - "fa_icon": "fas fa-bezier-curve", + "fa_icon": "fas fa-file", "pattern": "^\\S+\\.bed(\\.gz)?$", "description": "Path to directory for target bed file.", - "help_text": "If you would like to limit your analysis to specific regions of the genome, you can pass those regions in a bed file using this option", - "hidden": true - }, - "svdb_query_dbs": { - "type": "string", - "format": "file-path", - "description": "Databases used for structural variant annotation.", - "fa_icon": "far fa-file-code", - "help_text": "Path to comma-separated file containing information about the databases used for structural variant annotation." + "help_text": "If you would like to limit your analysis to specific regions of the genome, you can pass those regions in a bed file using this option" }, "variant_catalog": { "type": "string", + "exists": true, "format": "file-path", "description": "Path to variant catalog file", "help_text": "Used with ExpansionHunter and if no catalogue is passed, then a default will be used.", - "fa_icon": "far fa-file-code", - "hidden": true + "fa_icon": "fas fa-file" }, "vcfanno_resources": { "type": "string", - "description": "Text file containing the absolute paths to resources defined within the vcfanno toml file. One line per resource.", + "exists": true, + "description": "Path to a file containing the absolute paths to resources defined within the vcfanno toml file. One line per resource.", "help_text": "If no file is passed, default configurations will be used according to genome build within the context of the pipeline.", - "fa_icon": "fas fa-folder-open", - "hidden": true + "fa_icon": "fas fa-file" }, "vcfanno_toml": { "type": "string", + "exists": true, "description": "Path to the vcfanno toml file.", "pattern": "^\\S+\\.toml$", "help_text": "If no toml is passed, default configurations will be used according to genome build within the context of the pipeline.", - "fa_icon": "fas fa-file-csv", - "hidden": true + "fa_icon": "fas fa-file" }, "vcfanno_lua": { "type": "string", + "exists": true, "description": "Path to the vcfanno lua file.", "pattern": "^\\S+\\.lua$", "help_text": "Custom operations file (lua). For use when the built-in ops don't supply the needed reduction.", - "fa_icon": "fas fa-file-code", - "hidden": true + "fa_icon": "fas fa-file" }, "vep_cache": { "type": "string", + "exists": true, "description": "Path to vep's cache directory.", "help_text": "If no directory path is passed, vcf files will not be annotated by vep.", "fa_icon": "fas fa-folder-open" }, - "save_reference": { - "type": "boolean", - "description": "If generated by the pipeline save the required indices/references in the results directory.", - "help_text": "The saved references can be used for future pipeline runs, reducing processing times.", - "fa_icon": "fas fa-save" - }, - "local_genomes": { - "type": "string", - "format": "directory-path", - "fa_icon": "fas fa-map-marker-alt", - "description": "Local directory base for genome references that map to the config.", - "help_text": "This folder is a flat structure with file names that map to the config." - }, - "gens_interval_list": { - "type": "string", - "format": "file-path", - "default": null, - "fa_icon": "fas fa-bezier-curve", - "description": "Path to interval list for Gens.", - "help_text": "This file contains the binning intervals used for CollectReadCounts.", - "hidden": true - }, - "gens_pon": { - "type": "string", - "format": "file-path", - "default": null, - "fa_icon": "far fa-file-code", - "description": "Path to panel of normals for Gens.", - "help_text": "The panel used to run DenoiseReadCounts.", - "hidden": true - }, - "gens_gnomad_pos": { + "vep_filters": { "type": "string", - "format": "file-path", - "default": null, - "fa_icon": "far fa-bezier-curve", - "description": "Path to a list of common SNP locations for Gens.", - "help_text": "Locations of gnomad SNPs with a high enough BAF.", - "hidden": true + "exists": true, + "format": "path", + "fa_icon": "fas fa-file-csv", + "description": "Path to the file containing HGNC_IDs of interest on separate lines." } - } + }, + "required": ["fasta", "intervals_wgs", "intervals_y"] }, "analysis_options": { "title": "Analysis options", @@ -393,39 +362,45 @@ "type": "string", "default": "wgs", "description": "Specifies which analysis type for the pipeline- either 'wgs','wes','mito'. This changes resources consumed and tools used.", - "fa_icon": "fas fa-book", + "fa_icon": "fas fa-align-center", "enum": ["wgs", "wes", "mito"] }, - "skip_snv_annotation": { + "gens_switch": { "type": "boolean", - "default": false, - "description": "Specifies whether or not to skip annotate SNV subworkflow.", - "fa_icon": "fas fa-book" + "description": "Specifies whether or not to run gens preprocessing subworkflow.", + "fa_icon": "fas fa-toggle-on" }, - "skip_sv_annotation": { + "platform": { + "type": "string", + "default": "illumina", + "description": "Specifies which analysis type for the pipeline- either 'wgs','wes','mito'. This changes resources consumed and tools used.", + "fa_icon": "fas fa-align-center", + "enum": ["illumina"] + }, + "save_mapped_as_cram": { "type": "boolean", - "default": false, - "description": "Specifies whether or not to skip annotate structural variant subworkflow.", + "description": "Specifies whether to generate and publish alignment files as cram instead of bam", + "fa_icon": "fas fa-toggle-on" + }, + "skip_cnv_calling": { + "type": "boolean", + "description": "Specifies whether or not to skip CNV calling.", "fa_icon": "fas fa-book" }, "skip_mt_analysis": { "type": "boolean", - "default": false, "description": "Specifies whether or not to skip the subworkflow that analyses mitochondrial genome separate from the nuclear genome.", - "fa_icon": "fas fa-book" + "fa_icon": "fas fa-toggle-on" }, - "gens_switch": { + "skip_snv_annotation": { "type": "boolean", - "default": false, - "description": "Specifies whether or not to run gens preprocessing subworkflow.", - "fa_icon": "fas fa-book" + "description": "Specifies whether or not to skip annotate SNV subworkflow.", + "fa_icon": "fas fa-toggle-on" }, - "platform": { - "type": "string", - "default": "illumina", - "description": "Specifies which analysis type for the pipeline- either 'wgs','wes','mito'. This changes resources consumed and tools used.", - "fa_icon": "fas fa-book", - "enum": ["illumina"] + "skip_sv_annotation": { + "type": "boolean", + "description": "Specifies whether or not to skip annotate structural variant subworkflow.", + "fa_icon": "fas fa-toggle-on" } } }, @@ -439,15 +414,13 @@ "type": "string", "default": "bwamem2", "description": "Specifies the alignment algorithm to use - available options are 'bwamem2' and 'sentieon'.", - "fa_icon": "fas fa-map-signs", + "fa_icon": "fas fa-align-center", "enum": ["bwamem2", "sentieon"] }, "rmdup": { "type": "boolean", - "default": false, "description": "Specifies whether duplicates reads should be removed prior to variant calling.", - "fa_icon": "fas fa-map-signs", - "hidden": true + "fa_icon": "fas fa-toggle-on" } } }, @@ -460,29 +433,27 @@ "call_interval": { "type": "string", "format": "path", - "fa_icon": "fas fa-chart-bar", - "description": "Interval in the reference that will be used in the software", - "hidden": true + "fa_icon": "fas fa-align-center", + "description": "Interval in the reference that will be used in the software" }, "pcr_amplification": { "type": "boolean", - "default": false, "description": "Indicates whether the sample library is amplified using PCR or not. Set to false for PCR Free samples.", - "fa_icon": "fas fa-map-signs" - }, - "variant_type": { - "type": "string", - "default": "snp", - "description": "Specifies the variant types for sentieon variant caller.", - "fa_icon": "fas fa-map-signs", - "enum": ["snp", "indel", "snp,indel"] + "fa_icon": "fas fa-toggle-on" }, "variant_caller": { "type": "string", "default": "deepvariant", "description": "Specifies the variant caller to use - available options are 'deepvariant' and 'sentieon'.", - "fa_icon": "fas fa-map-signs", + "fa_icon": "fas fa-random", "enum": ["deepvariant", "sentieon"] + }, + "variant_type": { + "type": "string", + "default": "snp", + "description": "Specifies the variant types for sentieon variant caller.", + "fa_icon": "fas fa-align-center", + "enum": ["snp", "indel", "snp,indel"] } } }, @@ -494,9 +465,9 @@ "properties": { "vep_cache_version": { "type": "integer", - "default": "107", + "default": 107, "description": "Specify the version of the VEP cache provided to the `--vep_cache` option.", - "fa_icon": "fas fa-book", + "fa_icon": "fas fa-align-center", "enum": [107] } } @@ -546,11 +517,6 @@ "description": "Institutional config URL link.", "hidden": true, "fa_icon": "fas fa-users-cog" - }, - "local_config_path": { - "type": "string", - "description": "Path to local config", - "fa_icon": "fas fa-users-cog" } } }, @@ -583,7 +549,7 @@ "description": "Maximum amount of time that can be requested for any single job.", "default": "240.h", "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", + "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", "hidden": true, "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" } @@ -654,6 +620,7 @@ }, "multiqc_config": { "type": "string", + "format": "file-path", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", "hidden": true @@ -667,13 +634,7 @@ "multiqc_methods_description": { "type": "string", "description": "Custom MultiQC yaml file containing HTML including a methods description.", - "fa_icon": "fas fa-cog" - }, - "tracedir": { - "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info", - "fa_icon": "fas fa-cogs", + "fa_icon": "fas fa-cog", "hidden": true }, "validate_params": { @@ -683,12 +644,26 @@ "fa_icon": "fas fa-check-square", "hidden": true }, - "show_hidden_params": { + "validationShowHiddenParams": { "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + }, + "validationFailUnrecognisedParams": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "hidden": true, + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + }, + "validationLenientMode": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", + "hidden": true, + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." } } } @@ -698,7 +673,7 @@ "$ref": "#/definitions/input_output_options" }, { - "$ref": "#/definitions/reference_genome_options" + "$ref": "#/definitions/reference_file_options" }, { "$ref": "#/definitions/analysis_options" diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index 44d86c8b..bab66216 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -4,14 +4,15 @@ include { ALIGN_BWAMEM2 } from './alignment/align_bwamem2' include { ALIGN_SENTIEON } from './alignment/align_sentieon' +include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main' workflow ALIGN { take: ch_reads_input // channel: [mandatory] [ val(meta), [path(reads)] ] - ch_fasta // channel: [mandatory] [ path(fasta) ] - ch_fai // channel: [mandatory] [ path(fai) ] - ch_index_bwa // channel: [mandatory] [ val(meta), path(index) ] - ch_index_bwamem2 // channel: [mandatory] [ val(meta), path(index) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_bwa_index // channel: [mandatory] [ val(meta), path(index) ] + ch_bwamem2_index // channel: [mandatory] [ val(meta), path(index) ] ch_known_dbsnp // channel: [optional; used by sentieon] [ path(known_dbsnp) ] ch_known_dbsnp_tbi // channel: [optional; used by sentieon] [ path(known_dbsnp_tbi) ] val_platform // string: [mandatory] illumina or a different technology @@ -21,17 +22,17 @@ workflow ALIGN { ALIGN_BWAMEM2 ( // Triggered when params.aligner is set as bwamem2 ch_reads_input, - ch_index_bwamem2, - ch_fasta, - ch_fai, + ch_bwamem2_index, + ch_genome_fasta, + ch_genome_fai, val_platform ) ALIGN_SENTIEON ( // Triggered when params.aligner is set as sentieon ch_reads_input, - ch_fasta, - ch_fai, - ch_index_bwa, + ch_genome_fasta, + ch_genome_fai, + ch_bwa_index, ch_known_dbsnp, ch_known_dbsnp_tbi, val_platform @@ -40,6 +41,9 @@ workflow ALIGN { ch_marked_bam = Channel.empty().mix(ALIGN_BWAMEM2.out.marked_bam, ALIGN_SENTIEON.out.marked_bam) ch_marked_bai = Channel.empty().mix(ALIGN_BWAMEM2.out.marked_bai, ALIGN_SENTIEON.out.marked_bai) ch_bam_bai = ch_marked_bam.join(ch_marked_bai, failOnMismatch:true, failOnDuplicate:true) + + SAMTOOLS_VIEW( ch_bam_bai, ch_genome_fasta, [] ) + ch_versions = Channel.empty().mix(ALIGN_BWAMEM2.out.versions, ALIGN_SENTIEON.out.versions) emit: diff --git a/subworkflows/local/alignment/align_bwamem2.nf b/subworkflows/local/alignment/align_bwamem2.nf index 3857c8e0..d35fd9de 100644 --- a/subworkflows/local/alignment/align_bwamem2.nf +++ b/subworkflows/local/alignment/align_bwamem2.nf @@ -12,23 +12,23 @@ include { PICARD_MARKDUPLICATES as MARKDUPLICATES } from '../../../modules/nf-c workflow ALIGN_BWAMEM2 { take: - ch_reads_input // channel: [mandatory] [ val(meta), path(reads_input) ] - ch_index // channel: [mandatory] [ val(meta), path(bwamem2_index) ] - ch_fasta // channel: [mandatory] [ path(fasta) ] - ch_fai // channel: [mandatory] [ path(fai) ] - val_platform // string: [mandatory] default: illumina + ch_reads_input // channel: [mandatory] [ val(meta), path(reads_input) ] + ch_bwamem2_index // channel: [mandatory] [ val(meta), path(bwamem2_index) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + val_platform // string: [mandatory] default: illumina main: ch_versions = Channel.empty() // Map, sort, and index - BWAMEM2_MEM ( ch_reads_input, ch_index, true ) + BWAMEM2_MEM ( ch_reads_input, ch_bwamem2_index, true ) SAMTOOLS_INDEX_ALIGN ( BWAMEM2_MEM.out.bam ) // Get stats for each demultiplexed read pair. bam_sorted_indexed = BWAMEM2_MEM.out.bam.join(SAMTOOLS_INDEX_ALIGN.out.bai, failOnMismatch:true, failOnDuplicate:true) - SAMTOOLS_STATS ( bam_sorted_indexed, [] ) + SAMTOOLS_STATS ( bam_sorted_indexed, [[],[]] ) // Merge multiple lane samples and index BWAMEM2_MEM.out.bam @@ -45,11 +45,11 @@ workflow ALIGN_BWAMEM2 { .set{ bams } // If there are no samples to merge, skip the process - SAMTOOLS_MERGE ( bams.multiple, ch_fasta, ch_fai ) + SAMTOOLS_MERGE ( bams.multiple, ch_genome_fasta, ch_genome_fai ) prepared_bam = bams.single.mix(SAMTOOLS_MERGE.out.bam) // Marking duplicates - MARKDUPLICATES ( prepared_bam , ch_fasta, ch_fai ) + MARKDUPLICATES ( prepared_bam , ch_genome_fasta, ch_genome_fai ) SAMTOOLS_INDEX_MARKDUP ( MARKDUPLICATES.out.bam ) ch_versions = ch_versions.mix(BWAMEM2_MEM.out.versions.first()) diff --git a/subworkflows/local/alignment/align_sentieon.nf b/subworkflows/local/alignment/align_sentieon.nf index a161cb1d..e4094edf 100644 --- a/subworkflows/local/alignment/align_sentieon.nf +++ b/subworkflows/local/alignment/align_sentieon.nf @@ -12,9 +12,9 @@ include { SENTIEON_READWRITER } from '../../../modules/local/sentieon/readwr workflow ALIGN_SENTIEON { take: ch_reads_input // channel: [mandatory] [ val(meta), path(reads_input) ] - ch_fasta // channel: [mandatory] [ path(fasta) ] - ch_fai // channel: [mandatory] [ path(fai) ] - ch_index // channel: [mandatory] [ val(meta), path(bwamem2_index) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_bwa_index // channel: [mandatory] [ val(meta), path(bwa_index) ] ch_known_dbsnp // channel: [optional] [ path(known_dbsnp) ] ch_known_dbsnp_tbi // channel: [optional] [ path(known_dbsnp_tbi) ] val_platform // string: [mandatory] default: illumina @@ -25,7 +25,7 @@ workflow ALIGN_SENTIEON { ch_bqsr_bai = Channel.empty() ch_bqsr_csv = Channel.empty() - SENTIEON_BWAMEM ( ch_reads_input, ch_fasta, ch_fai, ch_index ) + SENTIEON_BWAMEM ( ch_reads_input, ch_genome_fasta, ch_genome_fai, ch_bwa_index ) SENTIEON_BWAMEM.out .bam @@ -45,7 +45,7 @@ workflow ALIGN_SENTIEON { SENTIEON_READWRITER (merge_bams_in.multiple) ch_bam_bai = merge_bams_in.single.mix(SENTIEON_READWRITER.out.bam_bai) - SENTIEON_DATAMETRICS (ch_bam_bai, ch_fasta, ch_fai ) + SENTIEON_DATAMETRICS (ch_bam_bai, ch_genome_fasta, ch_genome_fai ) SENTIEON_LOCUSCOLLECTOR ( ch_bam_bai ) @@ -54,13 +54,13 @@ workflow ALIGN_SENTIEON { .join(SENTIEON_LOCUSCOLLECTOR.out.score_idx, failOnMismatch:true, failOnDuplicate:true) .set { ch_bam_bai_score } - SENTIEON_DEDUP ( ch_bam_bai_score, ch_fasta, ch_fai ) + SENTIEON_DEDUP ( ch_bam_bai_score, ch_genome_fasta, ch_genome_fai ) if (params.variant_caller == "sentieon") { SENTIEON_DEDUP.out.bam .join(SENTIEON_DEDUP.out.bai, failOnMismatch:true, failOnDuplicate:true) .set { ch_dedup_bam_bai } - SENTIEON_BQSR ( ch_dedup_bam_bai, ch_fasta, ch_fai, ch_known_dbsnp, ch_known_dbsnp_tbi ) + SENTIEON_BQSR ( ch_dedup_bam_bai, ch_genome_fasta, ch_genome_fai, ch_known_dbsnp, ch_known_dbsnp_tbi ) ch_bqsr_bam = SENTIEON_BQSR.out.bam ch_bqsr_bai = SENTIEON_BQSR.out.bai ch_bqsr_csv = SENTIEON_BQSR.out.recal_csv diff --git a/subworkflows/local/analyse_MT.nf b/subworkflows/local/analyse_MT.nf index 0ad82f1a..af5cebff 100644 --- a/subworkflows/local/analyse_MT.nf +++ b/subworkflows/local/analyse_MT.nf @@ -9,38 +9,38 @@ include { MERGE_ANNOTATE_MT } from './mitochondria/me workflow ANALYSE_MT { take: - ch_bam // channel: [mandatory] [ val(meta), file(bam), file(bai) ] - ch_genome_bwa_index // channel: [mandatory] [ path(index) ] - ch_genome_bwamem2_index // channel: [mandatory] [ path(index) ] - ch_genome_fasta_meta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_genome_fasta_no_meta // channel: [mandatory] [ path(fasta) ] - ch_genome_dict_meta // channel: [mandatory] [ val(meta), path(dict) ] - ch_genome_dict_no_meta // channel: [mandatory] [ path(dict) ] - ch_genome_fai // channel: [mandatory] [ path(fai) ] - ch_mt_intervals // channel: [mandatory] [ path(interval_list) ] - ch_shift_mt_bwa_index // channel: [mandatory] [ path(index) ] - ch_shift_mt_bwamem2_index // channel: [mandatory] [ path(index) ] - ch_shift_mt_fasta // channel: [mandatory] [ path(fasta) ] - ch_shift_mt_dict // channel: [mandatory] [ path(dict) ] - ch_shift_mt_fai // channel: [mandatory] [ path(fai) ] - ch_shift_mt_intervals // channel: [mandatory] [ path(interval_list) ] - ch_shift_mt_backchain // channel: [mandatory] [ path(back_chain) ] - ch_vcfanno_resources // channel: [mandatory] [ path(resources) ] - ch_vcfanno_toml // channel: [mandatory] [ path(toml) ] - val_vep_genome // string: [mandatory] GRCh37 or GRCh38 - val_vep_cache_version // string: [mandatory] 107 - ch_vep_cache // channel: [mandatory] [ path(cache) ] - ch_case_info // channel: [mandatory] [ val(case_info) ] + ch_bam_bai // channel: [mandatory] [ val(meta), file(bam), file(bai) ] + ch_cadd_header // channel: [mandatory] [ path(txt) ] + ch_cadd_resources // channel: [mandatory] [ path(annotation) ] + ch_genome_bwa_index // channel: [mandatory] [ val(meta), path(index) ] + ch_genome_bwamem2_index // channel: [mandatory] [ val(meta), path(index) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ] + ch_mt_intervals // channel: [mandatory] [ path(interval_list) ] + ch_mtshift_bwaindex // channel: [mandatory] [ val(meta), path(index) ] + ch_mtshift_bwamem2index // channel: [mandatory] [ val(meta), path(index) ] + ch_mtshift_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_mtshift_dict // channel: [mandatory] [ val(meta), path(dict) ] + ch_mtshift_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_mtshift_intervals // channel: [mandatory] [ path(interval_list) ] + ch_mtshift_backchain // channel: [mandatory] [ val(meta), path(back_chain) ] + ch_vcfanno_resources // channel: [mandatory] [ path(resources) ] + ch_vcfanno_toml // channel: [mandatory] [ path(toml) ] + val_vep_genome // string: [mandatory] GRCh37 or GRCh38 + val_vep_cache_version // string: [mandatory] 107 + ch_vep_cache // channel: [mandatory] [ path(cache) ] + ch_case_info // channel: [mandatory] [ val(case_info) ] main: ch_versions = Channel.empty() // PREPARING READS FOR MT ALIGNMENT CONVERT_MT_BAM_TO_FASTQ ( - ch_bam, - ch_genome_fasta_meta, + ch_bam_bai, + ch_genome_fasta, ch_genome_fai, - ch_genome_dict_no_meta + ch_genome_dict ) // MT ALIGNMENT AND VARIANT CALLING @@ -49,8 +49,8 @@ workflow ANALYSE_MT { CONVERT_MT_BAM_TO_FASTQ.out.bam, ch_genome_bwa_index, ch_genome_bwamem2_index, - ch_genome_fasta_no_meta, - ch_genome_dict_no_meta, + ch_genome_fasta, + ch_genome_dict, ch_genome_fai, ch_mt_intervals ) @@ -58,29 +58,30 @@ workflow ANALYSE_MT { ALIGN_AND_CALL_MT_SHIFT ( CONVERT_MT_BAM_TO_FASTQ.out.fastq, CONVERT_MT_BAM_TO_FASTQ.out.bam, - ch_shift_mt_bwa_index, - ch_shift_mt_bwamem2_index, - ch_shift_mt_fasta, - ch_shift_mt_dict, - ch_shift_mt_fai, - ch_shift_mt_intervals + ch_mtshift_bwaindex, + ch_mtshift_bwamem2index, + ch_mtshift_fasta, + ch_mtshift_dict, + ch_mtshift_fai, + ch_mtshift_intervals ) // LIFTOVER VCF FROM REFERENCE MT TO SHIFTED MT PICARD_LIFTOVERVCF ( ALIGN_AND_CALL_MT_SHIFT.out.vcf, - ch_genome_dict_no_meta, - ch_shift_mt_backchain, - ch_genome_fasta_no_meta + ch_genome_dict, + ch_genome_fasta, + ch_mtshift_backchain, ) // MT MERGE AND ANNOTATE VARIANTS MERGE_ANNOTATE_MT( ALIGN_AND_CALL_MT.out.vcf, PICARD_LIFTOVERVCF.out.vcf_lifted, - ch_genome_fasta_no_meta, - ch_genome_dict_meta, - ch_genome_dict_no_meta, + ch_cadd_header, + ch_cadd_resources, + ch_genome_fasta, + ch_genome_dict, ch_genome_fai, ch_vcfanno_resources, ch_vcfanno_toml, @@ -97,17 +98,21 @@ workflow ANALYSE_MT { ch_versions = ch_versions.mix(MERGE_ANNOTATE_MT.out.versions) emit: - vcf = MERGE_ANNOTATE_MT.out.vcf // channel: [ val(meta), path(vcf) ] - tbi = MERGE_ANNOTATE_MT.out.tbi // channel: [ val(meta), path(tbi) ] - stats = ALIGN_AND_CALL_MT.out.stats // channel: [ val(meta), path(stats) ] - filt_stats = ALIGN_AND_CALL_MT.out.filt_stats // channel: [ val(meta), path(tsv) ] - stats_sh = ALIGN_AND_CALL_MT_SHIFT.out.stats // channel: [ val(meta), path(stats) ] - filt_stats_sh = ALIGN_AND_CALL_MT_SHIFT.out.filt_stats // channel: [ val(meta), path(tsv) ] - haplog = MERGE_ANNOTATE_MT.out.haplog // channel: [ val(meta), path(txt) ] - report = MERGE_ANNOTATE_MT.out.report // channel: [ path(html) ] - txt = ALIGN_AND_CALL_MT.out.txt // channel: [ val(meta), path(txt) ] - html = ALIGN_AND_CALL_MT.out.html // channel: [ val(meta), path(html) ] - txt_sh = ALIGN_AND_CALL_MT_SHIFT.out.txt // channel: [ val(meta), path(txt) ] - html_sh = ALIGN_AND_CALL_MT_SHIFT.out.html // channel: [ val(meta), path(html) ] - versions = ch_versions // channel: [ path(versions.yml) ] + vcf = MERGE_ANNOTATE_MT.out.vcf // channel: [ val(meta), path(vcf) ] + tbi = MERGE_ANNOTATE_MT.out.tbi // channel: [ val(meta), path(tbi) ] + stats = ALIGN_AND_CALL_MT.out.stats // channel: [ val(meta), path(stats) ] + filt_stats = ALIGN_AND_CALL_MT.out.filt_stats // channel: [ val(meta), path(tsv) ] + mt_del_result = ALIGN_AND_CALL_MT.out.mt_del_result // channel: [ val(meta), path(txt) ] + stats_sh = ALIGN_AND_CALL_MT_SHIFT.out.stats // channel: [ val(meta), path(stats) ] + filt_stats_sh = ALIGN_AND_CALL_MT_SHIFT.out.filt_stats // channel: [ val(meta), path(tsv) ] + eklipse_del = ALIGN_AND_CALL_MT.out.eklipse_del // channel: [ val(meta), path(csv) ] + eklipse_genes = ALIGN_AND_CALL_MT.out.eklipse_genes // channel: [ val(meta), path(csv) ] + eklipse_circos = ALIGN_AND_CALL_MT.out.eklipse_circos // channel: [ val(meta), path(png) ] + haplog = MERGE_ANNOTATE_MT.out.haplog // channel: [ val(meta), path(txt) ] + report = MERGE_ANNOTATE_MT.out.report // channel: [ path(html) ] + txt = ALIGN_AND_CALL_MT.out.txt // channel: [ val(meta), path(txt) ] + html = ALIGN_AND_CALL_MT.out.html // channel: [ val(meta), path(html) ] + txt_sh = ALIGN_AND_CALL_MT_SHIFT.out.txt // channel: [ val(meta), path(txt) ] + html_sh = ALIGN_AND_CALL_MT_SHIFT.out.html // channel: [ val(meta), path(html) ] + versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/local/annotate_snvs.nf b/subworkflows/local/annotate_snvs.nf index 5bc27d26..bd5fdfb8 100644 --- a/subworkflows/local/annotate_snvs.nf +++ b/subworkflows/local/annotate_snvs.nf @@ -7,6 +7,10 @@ include { BCFTOOLS_CONCAT } from '../../modules/nf-core/bc include { BCFTOOLS_ROH } from '../../modules/nf-core/bcftools/roh/main' include { BCFTOOLS_VIEW } from '../../modules/nf-core/bcftools/view/main' include { RHOCALL_ANNOTATE } from '../../modules/nf-core/rhocall/annotate/main' +include { UPD as UPD_SITES } from '../../modules/nf-core/upd/main' +include { UPD as UPD_REGIONS } from '../../modules/nf-core/upd/main' +include { CHROMOGRAPH as CHROMOGRAPH_SITES } from '../../modules/nf-core/chromograph/main' +include { CHROMOGRAPH as CHROMOGRAPH_REGIONS } from '../../modules/nf-core/chromograph/main' include { ENSEMBLVEP as ENSEMBLVEP_SNV } from '../../modules/local/ensemblvep/main' include { TABIX_BGZIPTABIX as ZIP_TABIX_ROHCALL } from '../../modules/nf-core/tabix/bgziptabix/main' include { TABIX_BGZIPTABIX as ZIP_TABIX_VCFANNO } from '../../modules/nf-core/tabix/bgziptabix/main' @@ -14,51 +18,61 @@ include { TABIX_TABIX as TABIX_VEP } from '../../modules/nf-core/ta include { TABIX_TABIX as TABIX_BCFTOOLS_CONCAT } from '../../modules/nf-core/tabix/tabix/main' include { TABIX_TABIX as TABIX_BCFTOOLS_VIEW } from '../../modules/nf-core/tabix/tabix/main' include { GATK4_SELECTVARIANTS } from '../../modules/nf-core/gatk4/selectvariants/main' +include { ANNOTATE_CADD } from './annotation/annotate_cadd' workflow ANNOTATE_SNVS { take: ch_vcf // channel: [mandatory] [ val(meta), path(vcf), path(tbi) ] analysis_type // string: [mandatory] 'wgs' or 'wes' + ch_cadd_header // channel: [mandatory] [ path(txt) ] + ch_cadd_resources // channel: [mandatory] [ path(annotation) ] ch_vcfanno_resources // channel: [mandatory] [ path(resources) ] ch_vcfanno_lua // channel: [mandatory] [ path(lua) ] ch_vcfanno_toml // channel: [mandatory] [ path(toml) ] val_vep_genome // string: [mandatory] GRCh37 or GRCh38 val_vep_cache_version // string: [mandatory] default: 107 ch_vep_cache // channel: [mandatory] [ path(cache) ] - ch_fasta // channel: [mandatory] [ path(fasta) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_gnomad_af // channel: [optional] [ path(tab), path(tbi) ] ch_split_intervals // channel: [mandatory] [ path(intervals) ] - ch_samples // channel: [mandatory] [ val(sample_id), val(sex), val(phenotype), val(paternal_id), val(maternal_id), val(case_id) ] main: ch_versions = Channel.empty() ch_vcf_scatter_in = Channel.empty() ch_vep_in = Channel.empty() - ch_vcf.map { meta, vcf, idx -> return [vcf, idx] }.set { ch_roh_vcfs } - ch_samples - .branch { it -> - affected: it.phenotype == "2" - unaffected: it.phenotype == "1" - }.set { ch_phenotype } - ch_phenotype.affected.combine(ch_roh_vcfs).set { ch_roh_input } - - BCFTOOLS_ROH (ch_roh_input, ch_gnomad_af, [], [], [], []) - - BCFTOOLS_ROH.out.roh - .map { meta, roh -> - new_meta = [:] - new_meta.id = meta.case_id - return [new_meta, roh] - } - .set { ch_roh_rhocall } + BCFTOOLS_ROH (ch_vcf, ch_gnomad_af, [], [], [], []) + + RHOCALL_ANNOTATE (ch_vcf, BCFTOOLS_ROH.out.roh, []) - RHOCALL_ANNOTATE (ch_vcf, ch_roh_rhocall, []) ZIP_TABIX_ROHCALL (RHOCALL_ANNOTATE.out.vcf) - VCFANNO (ZIP_TABIX_ROHCALL.out.gz_tbi, ch_vcfanno_toml, ch_vcfanno_lua, ch_vcfanno_resources) + ZIP_TABIX_ROHCALL.out.gz_tbi + .map { meta, vcf, tbi -> return [meta, vcf, tbi, []]} + .set { ch_vcf_in } + + VCFANNO (ch_vcf_in, ch_vcfanno_toml, ch_vcfanno_lua, ch_vcfanno_resources) + + VCFANNO.out.vcf + .map {meta, vcf -> + def splitchannels = [] + for (int i=0; i< meta.upd_children.size(); i++) { + upd_sample = meta.upd_children[i] + new_meta = meta + [upd_child:upd_sample] + splitchannels.add([new_meta,vcf]) + } + return splitchannels + } + .flatten() + .buffer (size: 2) + .set { ch_upd_in } + + UPD_SITES(ch_upd_in) + UPD_REGIONS(ch_upd_in) + CHROMOGRAPH_SITES([[],[]], [[],[]], [[],[]], [[],[]], [[],[]], [[],[]], UPD_SITES.out.bed) + CHROMOGRAPH_REGIONS([[],[]], [[],[]], [[],[]], [[],[]], [[],[]], UPD_REGIONS.out.bed, [[],[]]) ZIP_TABIX_VCFANNO (VCFANNO.out.vcf) @@ -69,57 +83,87 @@ workflow ANNOTATE_SNVS { BCFTOOLS_VIEW.out.vcf .join(TABIX_BCFTOOLS_VIEW.out.tbi, failOnMismatch:true, failOnDuplicate:true) .collect() + .combine(ch_split_intervals) + .map { + meta, vcf, tbi, interval -> + return [meta + [scatterid:interval.baseName], vcf, tbi, interval] + } .set { ch_vcf_scatter_in } - GATK4_SELECTVARIANTS (ch_vcf_scatter_in.combine(ch_split_intervals)).vcf.set { ch_vep_in } + GATK4_SELECTVARIANTS (ch_vcf_scatter_in) + + // Annotating with CADD + ANNOTATE_CADD ( + GATK4_SELECTVARIANTS.out.vcf, + GATK4_SELECTVARIANTS.out.tbi, + ch_cadd_header, + ch_cadd_resources + ) + + // If CADD is run, pick CADD output as input for VEP else pass selectvariants output to VEP. + GATK4_SELECTVARIANTS.out.vcf + .combine(ANNOTATE_CADD.out.vcf.ifEmpty("null")) // If CADD is not run then this channel will be empty, so assign a default value to allow filtering with branch operator + .branch { it -> // If CADD is run, then "it" will be [[meta],selvar.vcf,[meta],cadd.vcf], else [[meta],selvar.vcf,null] + selvar: it[2].equals("null") + return [it[0], it[1]] + cadd: !(it[2].equals("null")) + return [it[2], it[3]] + } + .set { ch_for_mix } + ch_vep_in = ch_for_mix.selvar.mix(ch_for_mix.cadd) + + // Annotating with ensembl Vep ENSEMBLVEP_SNV( ch_vep_in, + ch_genome_fasta, val_vep_genome, "homo_sapiens", val_vep_cache_version, ch_vep_cache, - ch_fasta, [] ) - TABIX_VEP (ENSEMBLVEP_SNV.out.vcf_gz) - - ch_vep_ann = ENSEMBLVEP_SNV.out.vcf_gz - ch_vep_index = TABIX_VEP.out.tbi + ENSEMBLVEP_SNV.out.vcf_gz + .map { meta, vcf -> [meta - meta.subMap('scatterid'), vcf] } + .set { ch_vep_out } - if (params.analysis_type == 'wgs') { + TABIX_VEP (ch_vep_out) - ENSEMBLVEP_SNV.out.vcf_gz - .join(TABIX_VEP.out.tbi, failOnMismatch:true) - .groupTuple() - .map { meta, vcfs, tbis -> - def sortedvcfs = vcfs.sort { it.baseName } - def sortedtbis = tbis.sort { it.baseName } - return [ meta, sortedvcfs, sortedtbis ] - } - .set { ch_concat_in } + ch_vep_out + .join(TABIX_VEP.out.tbi, failOnMismatch:true) + .groupTuple() + .map { meta, vcfs, tbis -> + def sortedvcfs = vcfs.sort { it.baseName } + def sortedtbis = tbis.sort { it.baseName } + return [ meta, sortedvcfs, sortedtbis ] + } + .set { ch_concat_in } - BCFTOOLS_CONCAT (ch_concat_in) + BCFTOOLS_CONCAT (ch_concat_in) - TABIX_BCFTOOLS_CONCAT (BCFTOOLS_CONCAT.out.vcf) + TABIX_BCFTOOLS_CONCAT (BCFTOOLS_CONCAT.out.vcf) - ch_vep_ann = BCFTOOLS_CONCAT.out.vcf - ch_vep_index = TABIX_BCFTOOLS_CONCAT.out.tbi - ch_versions = ch_versions.mix(BCFTOOLS_CONCAT.out.versions) - ch_versions = ch_versions.mix(TABIX_BCFTOOLS_CONCAT.out.versions) - } + ch_vep_ann = BCFTOOLS_CONCAT.out.vcf + ch_vep_index = TABIX_BCFTOOLS_CONCAT.out.tbi ch_versions = ch_versions.mix(BCFTOOLS_ROH.out.versions) ch_versions = ch_versions.mix(RHOCALL_ANNOTATE.out.versions) ch_versions = ch_versions.mix(ZIP_TABIX_ROHCALL.out.versions) ch_versions = ch_versions.mix(VCFANNO.out.versions) + ch_versions = ch_versions.mix(UPD_SITES.out.versions) + ch_versions = ch_versions.mix(UPD_REGIONS.out.versions) + ch_versions = ch_versions.mix(CHROMOGRAPH_SITES.out.versions) + ch_versions = ch_versions.mix(CHROMOGRAPH_REGIONS.out.versions) ch_versions = ch_versions.mix(ZIP_TABIX_VCFANNO.out.versions) ch_versions = ch_versions.mix(BCFTOOLS_VIEW.out.versions) ch_versions = ch_versions.mix(TABIX_BCFTOOLS_VIEW.out.versions) ch_versions = ch_versions.mix(GATK4_SELECTVARIANTS.out.versions.first()) + ch_versions = ch_versions.mix(ANNOTATE_CADD.out.versions) ch_versions = ch_versions.mix(ENSEMBLVEP_SNV.out.versions.first()) ch_versions = ch_versions.mix(TABIX_VEP.out.versions.first()) + ch_versions = ch_versions.mix(BCFTOOLS_CONCAT.out.versions) + ch_versions = ch_versions.mix(TABIX_BCFTOOLS_CONCAT.out.versions) emit: vcf_ann = ch_vep_ann // channel: [ val(meta), path(vcf) ] diff --git a/subworkflows/local/annotate_structural_variants.nf b/subworkflows/local/annotate_structural_variants.nf index 6af0519f..be777ee2 100644 --- a/subworkflows/local/annotate_structural_variants.nf +++ b/subworkflows/local/annotate_structural_variants.nf @@ -16,8 +16,8 @@ workflow ANNOTATE_STRUCTURAL_VARIANTS { val_vep_genome // string: [mandatory] GRCh37 or GRCh38 val_vep_cache_version // string: [mandatory] default: 107 ch_vep_cache // channel: [mandatory] [ path(cache) ] - ch_fasta // channel: [mandatory] [ path(fasta) ] - ch_seq_dict // channel: [mandatory] [ path(dict) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_dictionary // channel: [mandatory] [ val(meta), path(dict) ] main: ch_versions = Channel.empty() @@ -42,7 +42,7 @@ workflow ANNOTATE_STRUCTURAL_VARIANTS { ch_svdb_dbs.vcf_dbs.toList() ) - PICARD_SORTVCF(SVDB_QUERY.out.vcf, ch_fasta, ch_seq_dict) + PICARD_SORTVCF(SVDB_QUERY.out.vcf, ch_genome_fasta, ch_genome_dictionary) PICARD_SORTVCF.out.vcf.map { meta, vcf -> return [meta,vcf,[]] }.set { ch_sortvcf } @@ -50,11 +50,11 @@ workflow ANNOTATE_STRUCTURAL_VARIANTS { ENSEMBLVEP_SV( BCFTOOLS_VIEW.out.vcf, + ch_genome_fasta, val_vep_genome, "homo_sapiens", val_vep_cache_version, ch_vep_cache, - ch_fasta, [] ) diff --git a/subworkflows/local/annotation/annotate_cadd.nf b/subworkflows/local/annotation/annotate_cadd.nf new file mode 100644 index 00000000..e471dd21 --- /dev/null +++ b/subworkflows/local/annotation/annotate_cadd.nf @@ -0,0 +1,53 @@ +// +// A subworkflow to annotate snvs +// + +include { BCFTOOLS_ANNOTATE } from '../../../modules/nf-core/bcftools/annotate/main' +include { BCFTOOLS_VIEW } from '../../../modules/nf-core/bcftools/view/main' +include { CADD } from '../../../modules/nf-core/cadd/main' +include { TABIX_TABIX as TABIX_ANNOTATE } from '../../../modules/nf-core/tabix/tabix/main' +include { TABIX_TABIX as TABIX_CADD } from '../../../modules/nf-core/tabix/tabix/main' +include { TABIX_TABIX as TABIX_VIEW } from '../../../modules/nf-core/tabix/tabix/main' + +workflow ANNOTATE_CADD { + + take: + ch_vcf // channel: [mandatory] [ val(meta), path(vcfs) ] + ch_index // channel: [mandatory] [ val(meta), path(tbis) ] + ch_header // channel: [mandatory] [ path(txt) ] + ch_cadd_resources // channel: [mandatory] [ path(dir) ] + + main: + ch_versions = Channel.empty() + + BCFTOOLS_VIEW(ch_vcf.join(ch_index), [], [], []) + + TABIX_VIEW(BCFTOOLS_VIEW.out.vcf) + + CADD(BCFTOOLS_VIEW.out.vcf, ch_cadd_resources) + + TABIX_CADD(CADD.out.tsv) + + ch_vcf + .join(ch_index) + .join(CADD.out.tsv) + .join(TABIX_CADD.out.tbi) + .combine(ch_header) + .set { ch_annotate_in } + + BCFTOOLS_ANNOTATE(ch_annotate_in) + + TABIX_ANNOTATE (BCFTOOLS_ANNOTATE.out.vcf) + + ch_versions = ch_versions.mix(BCFTOOLS_VIEW.out.versions.first()) + ch_versions = ch_versions.mix(TABIX_VIEW.out.versions.first()) + ch_versions = ch_versions.mix(CADD.out.versions.first()) + ch_versions = ch_versions.mix(TABIX_CADD.out.versions.first()) + ch_versions = ch_versions.mix(BCFTOOLS_ANNOTATE.out.versions.first()) + ch_versions = ch_versions.mix(TABIX_ANNOTATE.out.versions.first()) + + emit: + vcf = BCFTOOLS_ANNOTATE.out.vcf // channel: [ val(meta), path(vcf) ] + tbi = TABIX_ANNOTATE.out.tbi // channel: [ val(meta), path(tbi) ] + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/local/call_repeat_expansions.nf b/subworkflows/local/call_repeat_expansions.nf index 52343806..8e527eab 100644 --- a/subworkflows/local/call_repeat_expansions.nf +++ b/subworkflows/local/call_repeat_expansions.nf @@ -18,25 +18,23 @@ workflow CALL_REPEAT_EXPANSIONS { ch_bam // channel: [mandatory] [ val(meta), path(bam), path(bai) ] ch_variant_catalog // channel: [mandatory] [ path(variant_catalog.json) ] ch_case_info // channel: [mandatory] [ val(case_id) ] - ch_fasta // channel: [mandatory] [ path(fasta) ] - ch_fai // channel: [mandatory] [ path(fai) ] - ch_fasta_meta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_fai_meta // channel: [mandatory] [ val(meta), path(fai) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] main: ch_versions = Channel.empty() EXPANSIONHUNTER ( ch_bam, - ch_fasta_meta, - ch_fai_meta, - ch_variant_catalog.map { it -> [[id:it[0].simpleName],it]} + ch_genome_fasta, + ch_genome_fai, + ch_variant_catalog ) // Fix header and rename sample BCFTOOLS_REHEADER_EXP ( EXPANSIONHUNTER.out.vcf.map{ meta, vcf -> [ meta, vcf, [] ]}, - ch_fai + ch_genome_fai ) RENAMESAMPLE_EXP ( BCFTOOLS_REHEADER_EXP.out.vcf ) TABIX_EXP_RENAME ( RENAMESAMPLE_EXP.out.vcf ) @@ -44,7 +42,7 @@ workflow CALL_REPEAT_EXPANSIONS { // Split multi allelelic SPLIT_MULTIALLELICS_EXP ( RENAMESAMPLE_EXP.out.vcf.join(TABIX_EXP_RENAME.out.tbi, failOnMismatch:true, failOnDuplicate:true), - ch_fasta + ch_genome_fasta ) // Merge indiviual repeat expansions diff --git a/subworkflows/local/call_snv.nf b/subworkflows/local/call_snv.nf index 28eeee0b..7b63825e 100644 --- a/subworkflows/local/call_snv.nf +++ b/subworkflows/local/call_snv.nf @@ -9,9 +9,9 @@ include { GATK4_SELECTVARIANTS } from '../../modules/nf-core/gatk4/selectvariant workflow CALL_SNV { take: - ch_input // channel: [mandatory] [ val(meta), path(bam), path(bai) ] - ch_fasta // channel: [mandatory] [ path(fasta) ] - ch_fai // channel: [mandatory] [ path(fai) ] + ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] ch_known_dbsnp // channel: [optional] [ val(meta), path(vcf) ] ch_known_dbsnp_tbi // channel: [optional] [ val(meta), path(tbi) ] ch_call_interval // channel: [mandatory] [ path(intervals) ] @@ -24,16 +24,16 @@ workflow CALL_SNV { ch_tabix = Channel.empty() CALL_SNV_DEEPVARIANT ( // triggered only when params.variant_caller is set as deepvariant - ch_input, - ch_fasta, - ch_fai, + ch_bam_bai, + ch_genome_fasta, + ch_genome_fai, ch_case_info ) - CALL_SNV_SENTIEON( // triggered only when params.variant_caller is set as sentieon - ch_input, - ch_fasta, - ch_fai, + CALL_SNV_SENTIEON( // triggered only when params.variant_caller is set as sentieon + ch_bam_bai, + ch_genome_fasta, + ch_genome_fai, ch_known_dbsnp, ch_known_dbsnp_tbi, ch_call_interval, diff --git a/subworkflows/local/call_structural_variants.nf b/subworkflows/local/call_structural_variants.nf index e5708a65..65016260 100644 --- a/subworkflows/local/call_structural_variants.nf +++ b/subworkflows/local/call_structural_variants.nf @@ -2,53 +2,72 @@ // A nested subworkflow to call structural variants. // -include { CALL_SV_MANTA } from './variant_calling/call_sv_manta' -include { CALL_SV_TIDDIT } from './variant_calling/call_sv_tiddit' -include { SVDB_MERGE } from '../../modules/nf-core/svdb/merge/main' -include { TABIX_TABIX } from '../../modules/nf-core/tabix/tabix/main' +include { CALL_SV_MANTA } from './variant_calling/call_sv_manta' +include { CALL_SV_TIDDIT } from './variant_calling/call_sv_tiddit' +include { SVDB_MERGE } from '../../modules/nf-core/svdb/merge/main' +include { CALL_SV_GERMLINECNVCALLER } from './variant_calling/call_sv_germlinecnvcaller' +include { TABIX_TABIX } from '../../modules/nf-core/tabix/tabix/main' workflow CALL_STRUCTURAL_VARIANTS { take: - ch_bam // channel: [mandatory] [ val(meta), path(bam) ] - ch_bai // channel: [mandatory] [ val(meta), path(bai) ] - ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] - ch_bwa_index // channel: [mandatory] [ val(meta), path(index)] - ch_fasta_no_meta // channel: [mandatory] [ path(fasta) ] - ch_fasta_meta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_fai // channel: [mandatory] [ path(fai) ] - ch_case_info // channel: [mandatory] [ val(case_info) ] - ch_target_bed // channel: [mandatory for WES] [ val(meta), path(bed), path(tbi) ] + ch_bam // channel: [mandatory] [ val(meta), path(bam) ] + ch_bai // channel: [mandatory] [ val(meta), path(bai) ] + ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_bwa_index // channel: [mandatory] [ val(meta), path(index)] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_case_info // channel: [mandatory] [ val(case_info) ] + ch_target_bed // channel: [mandatory for WES] [ val(meta), path(bed), path(tbi) ] + ch_genome_dictionary // channel: [optional; used by mandatory for GATK's cnvcaller][ val(meta), path(dict) ] + ch_svcaller_priority // channel: [mandatory] [ val(["var caller tag 1", ...]) ] + ch_readcount_intervals // channel: [optional; used by mandatory for GATK's cnvcaller][ path(intervals) ] + ch_ploidy_model // channel: [optional; used by mandatory for GATK's cnvcaller][ path(ploidy_model) ] + ch_gcnvcaller_model // channel: [optional; used by mandatory for GATK's cnvcaller][ path(gcnvcaller_model) ] main: ch_versions = Channel.empty() - CALL_SV_MANTA (ch_bam, ch_bai, ch_fasta_no_meta, ch_fai, ch_case_info, ch_target_bed) + CALL_SV_MANTA (ch_bam, ch_bai, ch_genome_fasta, ch_genome_fai, ch_case_info, ch_target_bed) .diploid_sv_vcf .collect{it[1]} .set{ manta_vcf } - CALL_SV_TIDDIT (ch_bam_bai, ch_fasta_meta, ch_bwa_index, ch_case_info) + CALL_SV_TIDDIT (ch_bam_bai, ch_genome_fasta, ch_bwa_index, ch_case_info) .vcf .collect{it[1]} .set { tiddit_vcf } + CALL_SV_GERMLINECNVCALLER (ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_readcount_intervals, ch_genome_dictionary, ch_ploidy_model, ch_gcnvcaller_model) + .genotyped_intervals_vcf + .collect{it[1]} + .set { gcnvcaller_vcf } + //merge - tiddit_vcf - .combine(manta_vcf) - .toList() - .set { vcf_list } + if (params.skip_cnv_calling) { + tiddit_vcf + .combine(manta_vcf) + .toList() + .set { vcf_list } + } else { + tiddit_vcf + .combine(manta_vcf) + .combine(gcnvcaller_vcf) + .toList() + .set { vcf_list } + } ch_case_info .combine(vcf_list) .set { merge_input_vcfs } - SVDB_MERGE (merge_input_vcfs, ["tiddit","manta"]) + SVDB_MERGE (merge_input_vcfs, ch_svcaller_priority) TABIX_TABIX (SVDB_MERGE.out.vcf) ch_versions = ch_versions.mix(CALL_SV_MANTA.out.versions) ch_versions = ch_versions.mix(CALL_SV_TIDDIT.out.versions) + ch_versions = ch_versions.mix(CALL_SV_GERMLINECNVCALLER.out.versions) ch_versions = ch_versions.mix(TABIX_TABIX.out.versions) emit: diff --git a/subworkflows/local/check_input.nf b/subworkflows/local/check_input.nf index c7a490dc..3c862df2 100644 --- a/subworkflows/local/check_input.nf +++ b/subworkflows/local/check_input.nf @@ -14,8 +14,11 @@ workflow CHECK_INPUT { .splitCsv ( header:true, sep:',' ) .set { sheet } - case_info = sheet.first() - .map { create_case_channel(it) } + case_info = sheet + .toList() + .map {create_case_channel(it)} + + reads = sheet.map { row -> [[row.sample.split('_')[0]], row] } .groupTuple() .map { meta, rows -> @@ -25,6 +28,7 @@ workflow CHECK_INPUT { .map { row, numLanes -> create_fastq_channel(row + [num_lanes:numLanes]) } + samples = sheet.map { create_samples_channel(it) } emit: @@ -79,9 +83,34 @@ def create_samples_channel(LinkedHashMap row) { } // Function to get a list of metadata (e.g. case id) for the case [ meta ] -def create_case_channel(LinkedHashMap row) { - def case_info = [:] - case_info.id = row.case_id +def create_case_channel(List rows) { + def case_info = [:] + def probands = [] + def upd_children = [] + def father = "" + def mother = "" + + for (item in rows) { + if (item.phenotype == "2") { + probands.add(item.sample.split("_T")[0]) + } + if ( (item.paternal_id!="0") && (item.paternal_id!="") && (item.maternal_id!="0") && (item.maternal_id!="") ) { + upd_children.add(item.sample.split("_T")[0]) + } + if ( (item.paternal_id!="0") && (item.paternal_id!="") ) { + father = item.paternal_id + } + if ( (item.maternal_id!="0") && (item.maternal_id!="") ) { + mother = item.maternal_id + } + } + + case_info.father = father + case_info.mother = mother + case_info.probands = probands + case_info.upd_children = upd_children + case_info.id = rows[0].case_id return case_info } + diff --git a/subworkflows/local/gens.nf b/subworkflows/local/gens.nf index 6cc87a12..8e47cac7 100644 --- a/subworkflows/local/gens.nf +++ b/subworkflows/local/gens.nf @@ -8,20 +8,20 @@ include { GENS as GENS_GENERATE } from '../../modules/loc workflow GENS { take: - ch_bam // channel: [mandatory] [ val(meta), path(bam), path(bai) ] - ch_vcf // channel: [mandatory] [ val(meta), path(vcf) ] - ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_fai // channel: [mandatory] [ path(fai) ] - ch_interval_list // channel: [mandatory] [ path(interval_list) ] - ch_pon // channel: [mandatory] [ path(pon) ] - ch_gnomad_pos // channel: [mandatory] [ path(gnomad_pos) ] - ch_case_info // channel: [mandatory] [ val(case_info) ] - ch_seq_dict // channel: [mandatory] [ path(dict) ] + ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_vcf // channel: [mandatory] [ val(meta), path(vcf) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_interval_list // channel: [mandatory] [ path(interval_list) ] + ch_pon // channel: [mandatory] [ path(pon) ] + ch_gnomad_pos // channel: [mandatory] [ path(gnomad_pos) ] + ch_case_info // channel: [mandatory] [ val(case_info) ] + ch_genome_dictionary // channel: [mandatory] [ val(meta), path(dict) ] main: ch_versions = Channel.empty() - COLLECTREADCOUNTS (ch_bam, ch_fasta, ch_fai, ch_seq_dict, ch_interval_list) + COLLECTREADCOUNTS (ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_sequence_dictionary, ch_interval_list) DENOISEREADCOUNTS (COLLECTREADCOUNTS.out.read_counts, ch_pon) diff --git a/subworkflows/local/mitochondria/align_and_call_MT.nf b/subworkflows/local/mitochondria/align_and_call_MT.nf index 7feb58bb..be060a7d 100644 --- a/subworkflows/local/mitochondria/align_and_call_MT.nf +++ b/subworkflows/local/mitochondria/align_and_call_MT.nf @@ -13,29 +13,33 @@ include { HAPLOCHECK as HAPLOCHECK_MT } fr include { GATK4_MUTECT2 as GATK4_MUTECT2_MT } from '../../../modules/nf-core/gatk4/mutect2/main' include { GATK4_FILTERMUTECTCALLS as GATK4_FILTERMUTECTCALLS_MT } from '../../../modules/nf-core/gatk4/filtermutectcalls/main' include { TABIX_TABIX as TABIX_TABIX_MT } from '../../../modules/nf-core/tabix/tabix/main' +include { MT_DELETION } from '../../../modules/local/mt_deletion_script' +include { EKLIPSE as EKLIPSE_MT } from '../../../modules/nf-core/eklipse/main' workflow ALIGN_AND_CALL_MT { take: - ch_fastq // channel: [mandatory] [ val(meta), [ path(reads) ] ] - ch_ubam // channel: [mandatory] [ val(meta), path(bam) ] - ch_index_bwa // channel: [mandatory for sentieon] [ val(meta), path(index) ] - ch_index_bwamem2 // channel: [mandatory for bwamem2] [ val(meta), path(index) ] - ch_fasta // channel: [mandatory] [ path(fasta) ] - ch_dict // channel: [mandatory] [ path(dict) ] - ch_fai // channel: [mandatory] [ path(fai) ] - ch_intervals_mt // channel: [mandatory] [ path(interval_list) ] + ch_fastq // channel: [mandatory] [ val(meta), [ path(reads) ] ] + ch_ubam // channel: [mandatory] [ val(meta), path(bam) ] + ch_bwaindex // channel: [mandatory for sentieon] [ val(meta), path(index) ] + ch_bwamem2index // channel: [mandatory for bwamem2] [ val(meta), path(index) ] + ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_dict // channel: [mandatory] [ val(meta), path(dict) ] + ch_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_intervals // channel: [mandatory] [ path(interval_list) ] main: ch_versions = Channel.empty() - BWAMEM2_MEM_MT (ch_fastq , ch_index_bwamem2, true) + BWAMEM2_MEM_MT (ch_fastq, ch_bwamem2index, true) - SENTIEON_BWAMEM_MT ( ch_fastq, ch_fasta, ch_fai, ch_index_bwa ) + SENTIEON_BWAMEM_MT ( ch_fastq, ch_fasta, ch_fai, ch_bwaindex ) - ch_mt_bam = Channel.empty().mix(BWAMEM2_MEM_MT.out.bam, SENTIEON_BWAMEM_MT.out.bam) - ch_fastq_ubam = ch_mt_bam.join(ch_ubam, failOnMismatch:true, failOnDuplicate:true) + Channel.empty() + .mix(BWAMEM2_MEM_MT.out.bam, SENTIEON_BWAMEM_MT.out.bam) + .join(ch_ubam, failOnMismatch:true, failOnDuplicate:true) + .set {ch_bam_ubam} - GATK4_MERGEBAMALIGNMENT_MT (ch_fastq_ubam, ch_fasta, ch_dict) + GATK4_MERGEBAMALIGNMENT_MT (ch_bam_ubam, ch_fasta, ch_dict) PICARD_ADDORREPLACEREADGROUPS_MT (GATK4_MERGEBAMALIGNMENT_MT.out.bam) @@ -45,7 +49,11 @@ workflow ALIGN_AND_CALL_MT { SAMTOOLS_INDEX_MT(SAMTOOLS_SORT_MT.out.bam) ch_sort_index_bam = SAMTOOLS_SORT_MT.out.bam.join(SAMTOOLS_INDEX_MT.out.bai, failOnMismatch:true, failOnDuplicate:true) - ch_sort_index_bam_int_mt = ch_sort_index_bam.combine(ch_intervals_mt) + ch_sort_index_bam_int_mt = ch_sort_index_bam.combine(ch_intervals) + + EKLIPSE_MT(ch_sort_index_bam,[]) + + MT_DELETION(ch_sort_index_bam, ch_fasta) GATK4_MUTECT2_MT (ch_sort_index_bam_int_mt, ch_fasta, ch_fai, ch_dict, [], [], [],[]) @@ -67,16 +75,22 @@ workflow ALIGN_AND_CALL_MT { ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES_MT.out.versions.first()) ch_versions = ch_versions.mix(SAMTOOLS_SORT_MT.out.versions.first()) ch_versions = ch_versions.mix(SAMTOOLS_INDEX_MT.out.versions.first()) + ch_versions = ch_versions.mix(EKLIPSE_MT.out.versions.first()) + ch_versions = ch_versions.mix(MT_DELETION.out.versions.first()) ch_versions = ch_versions.mix(GATK4_MUTECT2_MT.out.versions.first()) ch_versions = ch_versions.mix(HAPLOCHECK_MT.out.versions.first()) ch_versions = ch_versions.mix(GATK4_FILTERMUTECTCALLS_MT.out.versions.first()) emit: - vcf = GATK4_FILTERMUTECTCALLS_MT.out.vcf // channel: [ val(meta), path(vcf) ] - tbi = GATK4_FILTERMUTECTCALLS_MT.out.tbi // channel: [ val(meta), path(tbi) ] - stats = GATK4_MUTECT2_MT.out.stats // channel: [ val(meta), path(stats) ] - filt_stats = GATK4_FILTERMUTECTCALLS_MT.out.stats // channel: [ val(meta), path(tsv) ] - txt = HAPLOCHECK_MT.out.txt // channel: [ val(meta), path(txt) ] - html = HAPLOCHECK_MT.out.html // channel: [ val(meta), path(html) ] - versions = ch_versions // channel: [ path(versions.yml) ] + vcf = GATK4_FILTERMUTECTCALLS_MT.out.vcf // channel: [ val(meta), path(vcf) ] + tbi = GATK4_FILTERMUTECTCALLS_MT.out.tbi // channel: [ val(meta), path(tbi) ] + stats = GATK4_MUTECT2_MT.out.stats // channel: [ val(meta), path(stats) ] + filt_stats = GATK4_FILTERMUTECTCALLS_MT.out.stats // channel: [ val(meta), path(tsv) ] + eklipse_del = EKLIPSE_MT.out.deletions // channel: [ val(meta), path(csv) ] + eklipse_genes = EKLIPSE_MT.out.genes // channel: [ val(meta), path(csv) ] + eklipse_circos = EKLIPSE_MT.out.circos // channel: [ val(meta), path(png) ] + txt = HAPLOCHECK_MT.out.txt // channel: [ val(meta), path(txt) ] + html = HAPLOCHECK_MT.out.html // channel: [ val(meta), path(html) ] + mt_del_result = MT_DELETION.out.mt_del_result // channel: [ val(meta), path(txt) ] + versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/local/mitochondria/convert_mt_bam_to_fastq.nf b/subworkflows/local/mitochondria/convert_mt_bam_to_fastq.nf index adbee07b..156f71bb 100644 --- a/subworkflows/local/mitochondria/convert_mt_bam_to_fastq.nf +++ b/subworkflows/local/mitochondria/convert_mt_bam_to_fastq.nf @@ -8,16 +8,16 @@ include { GATK4_SAMTOFASTQ as GATK4_SAMTOFASTQ_MT } from '../../../modules/nf-co workflow CONVERT_MT_BAM_TO_FASTQ { take: - ch_bam // channel: [mandatory] [ val(meta), path(bam), path(bai) ] - ch_genome_fasta_meta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_genome_fai // channel: [mandatory] [ path(fai) ] - ch_genome_dict // channel: [mandatory] [ path(dict) ] + ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ] main: ch_versions = Channel.empty() // Outputs bam containing only MT - GATK4_PRINTREADS_MT ( ch_bam, ch_genome_fasta_meta, ch_genome_fai, ch_genome_dict ) + GATK4_PRINTREADS_MT ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_genome_dict ) // Removes alignment information GATK4_REVERTSAM_MT ( GATK4_PRINTREADS_MT.out.bam ) diff --git a/subworkflows/local/mitochondria/merge_annotate_MT.nf b/subworkflows/local/mitochondria/merge_annotate_MT.nf index 16eb4438..8a78ed27 100644 --- a/subworkflows/local/mitochondria/merge_annotate_MT.nf +++ b/subworkflows/local/mitochondria/merge_annotate_MT.nf @@ -9,21 +9,24 @@ include { TABIX_TABIX as TABIX_TABIX_MT } from '../../.. include { BCFTOOLS_NORM as REMOVE_DUPLICATES_MT } from '../../../modules/nf-core/bcftools/norm/main' include { TABIX_TABIX as TABIX_TABIX_MT2 } from '../../../modules/nf-core/tabix/tabix/main' include { BCFTOOLS_MERGE as BCFTOOLS_MERGE_MT } from '../../../modules/nf-core/bcftools/merge/main' +include { TABIX_TABIX as TABIX_TABIX_MERGE } from '../../../modules/nf-core/tabix/tabix/main' include { TABIX_TABIX as TABIX_TABIX_MT3 } from '../../../modules/nf-core/tabix/tabix/main' include { ENSEMBLVEP as ENSEMBLVEP_MT } from '../../../modules/local/ensemblvep/main' include { HAPLOGREP2_CLASSIFY as HAPLOGREP2_CLASSIFY_MT } from '../../../modules/nf-core/haplogrep2/classify/main' include { VCFANNO as VCFANNO_MT } from '../../../modules/nf-core/vcfanno/main' +include { ANNOTATE_CADD } from '../annotation/annotate_cadd' include { TABIX_BGZIPTABIX as ZIP_TABIX_HMTNOTE } from '../../../modules/nf-core/tabix/bgziptabix/main' -include { HMTNOTE_ANNOTATE as HMTNOTE_ANNOTATE } from '../../../modules/nf-core/hmtnote/annotate/main' +include { HMTNOTE_ANNOTATE } from '../../../modules/nf-core/hmtnote/annotate/main' workflow MERGE_ANNOTATE_MT { take: ch_vcf1 // channel: [mandatory] [ val(meta), path(vcf) ] ch_vcf2 // channel: [mandatory] [ val(meta), path(vcf) ] - ch_genome_fasta // channel: [mandatory] [ path(fasta) ] - ch_genome_dict_meta // channel: [mandatory] [ val(meta), path(dict) ] - ch_genome_dict_no_meta // channel: [mandatory] [ path(dict) ] - ch_genome_fai // channel: [mandatory] [ path(fai) ] + ch_cadd_header // channel: [mandatory] [ path(txt) ] + ch_cadd_resources // channel: [mandatory] [ path(annotation) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] ch_vcfanno_resources // channel: [mandatory] [ path(resources) ] ch_vcfanno_toml // channel: [mandatory] [ path(toml) ] val_vep_genome // string: [mandatory] GRCh37 or GRCh38 @@ -39,13 +42,13 @@ workflow MERGE_ANNOTATE_MT { .map{ meta, vcf1, vcf2 -> [meta, [vcf1, vcf2]] } - GATK4_MERGEVCFS_LIFT_UNLIFT_MT( ch_vcfs, ch_genome_dict_meta) + GATK4_MERGEVCFS_LIFT_UNLIFT_MT( ch_vcfs, ch_genome_dict) // Filtering Variants GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.vcf .join(GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true) .set { ch_filt_vcf } - GATK4_VARIANTFILTRATION_MT (ch_filt_vcf, ch_genome_fasta, ch_genome_fai, ch_genome_dict_no_meta) + GATK4_VARIANTFILTRATION_MT (ch_filt_vcf, ch_genome_fasta, ch_genome_fai, ch_genome_dict) // Spliting multiallelic calls GATK4_VARIANTFILTRATION_MT.out.vcf @@ -87,25 +90,55 @@ workflow MERGE_ANNOTATE_MT { }.set { ch_case_vcf } BCFTOOLS_MERGE_MT( ch_case_vcf.multiple, - [], ch_genome_fasta, - ch_genome_fai) - ch_merged_vcf = BCFTOOLS_MERGE_MT.out.merged_variants - - ch_in_vep = ch_merged_vcf.mix(ch_case_vcf.single) + ch_genome_fai, + [] + ) + + BCFTOOLS_MERGE_MT.out.merged_variants + .mix(ch_case_vcf.single) + .set { ch_annotation_in } + + TABIX_TABIX_MERGE(ch_annotation_in) + + // Annotating with CADD + ANNOTATE_CADD ( + ch_annotation_in, + TABIX_TABIX_MERGE.out.tbi, + ch_cadd_header, + ch_cadd_resources + ) + + // Pick input for vep + ch_annotation_in + .combine(ANNOTATE_CADD.out.vcf.ifEmpty("null")) + .branch { it -> + merged: it[2].equals("null") + return [it[0], it[1]] + cadd: !(it[2].equals("null")) + return [it[2], it[3]] + } + .set { ch_for_mix } + ch_vep_in = ch_for_mix.merged.mix(ch_for_mix.cadd) // Annotating with ensembl Vep - ENSEMBLVEP_MT( ch_in_vep, + ENSEMBLVEP_MT( + ch_vep_in, + ch_genome_fasta, val_vep_genome, "homo_sapiens", val_vep_cache_version, ch_vep_cache, - ch_genome_fasta, - []) + [] + ) // Running vcfanno TABIX_TABIX_MT3(ENSEMBLVEP_MT.out.vcf_gz) - ch_in_vcfanno = ENSEMBLVEP_MT.out.vcf_gz.join(TABIX_TABIX_MT3.out.tbi, failOnMismatch:true, failOnDuplicate:true) + ENSEMBLVEP_MT.out.vcf_gz + .join(TABIX_TABIX_MT3.out.tbi, failOnMismatch:true, failOnDuplicate:true) + .map { meta, vcf, tbi -> return [meta, vcf, tbi, []]} + .set { ch_in_vcfanno } + VCFANNO_MT(ch_in_vcfanno, ch_vcfanno_toml, [], ch_vcfanno_resources) // HMTNOTE ANNOTATE @@ -117,13 +150,14 @@ workflow MERGE_ANNOTATE_MT { ch_tbi_out = ZIP_TABIX_HMTNOTE.out.gz_tbi.map{meta, vcf, tbi -> return [meta, tbi] } // Running haplogrep2 - HAPLOGREP2_CLASSIFY_MT(ch_in_vep, "vcf.gz") + HAPLOGREP2_CLASSIFY_MT(ch_vep_in, "vcf.gz") ch_versions = ch_versions.mix(GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.versions.first()) ch_versions = ch_versions.mix(GATK4_VARIANTFILTRATION_MT.out.versions.first()) ch_versions = ch_versions.mix(SPLIT_MULTIALLELICS_MT.out.versions.first()) ch_versions = ch_versions.mix(REMOVE_DUPLICATES_MT.out.versions.first()) ch_versions = ch_versions.mix(BCFTOOLS_MERGE_MT.out.versions) + ch_versions = ch_versions.mix(ANNOTATE_CADD.out.versions) ch_versions = ch_versions.mix(ENSEMBLVEP_MT.out.versions) ch_versions = ch_versions.mix(VCFANNO_MT.out.versions) ch_versions = ch_versions.mix(HMTNOTE_ANNOTATE.out.versions) diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index 0eb68049..45e20a24 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -4,17 +4,21 @@ include { BWA_INDEX as BWA_INDEX_GENOME } from '../../modules/nf-core/bwa/index/main' include { BWAMEM2_INDEX as BWAMEM2_INDEX_GENOME } from '../../modules/nf-core/bwamem2/index/main' -include { BWAMEM2_INDEX as BWAMEM2_INDEX_SHIFT_MT } from '../../modules/nf-core/bwamem2/index/main' +include { BWAMEM2_INDEX as BWAMEM2_INDEX_MT_SHIFT } from '../../modules/nf-core/bwamem2/index/main' include { CAT_CAT as CAT_CAT_BAIT } from '../../modules/nf-core/cat/cat/main' include { GATK4_BEDTOINTERVALLIST as GATK_BILT } from '../../modules/nf-core/gatk4/bedtointervallist/main' include { GATK4_CREATESEQUENCEDICTIONARY as GATK_SD } from '../../modules/nf-core/gatk4/createsequencedictionary/main' -include { GATK4_CREATESEQUENCEDICTIONARY as GATK_SD_SHIFT_MT } from '../../modules/nf-core/gatk4/createsequencedictionary/main' +include { GATK4_CREATESEQUENCEDICTIONARY as GATK_SD_MT_SHIFT } from '../../modules/nf-core/gatk4/createsequencedictionary/main' include { GATK4_INTERVALLISTTOOLS as GATK_ILT } from '../../modules/nf-core/gatk4/intervallisttools/main' +include { GATK4_PREPROCESSINTERVALS as GATK_PREPROCESS_WGS } from '../../modules/nf-core/gatk4/preprocessintervals/main.nf' +include { GATK4_PREPROCESSINTERVALS as GATK_PREPROCESS_WES } from '../../modules/nf-core/gatk4/preprocessintervals/main.nf' +include { GATK4_SHIFTFASTA as GATK_SHIFTFASTA } from '../../modules/nf-core/gatk4/shiftfasta/main' include { GET_CHROM_SIZES } from '../../modules/local/get_chrom_sizes' +include { SAMTOOLS_FAIDX as SAMTOOLS_EXTRACT_MT } from '../../modules/nf-core/samtools/faidx/main' include { SAMTOOLS_FAIDX as SAMTOOLS_FAIDX_GENOME } from '../../modules/nf-core/samtools/faidx/main' -include { SAMTOOLS_FAIDX as SAMTOOLS_FAIDX_SHIFT_MT } from '../../modules/nf-core/samtools/faidx/main' +include { SAMTOOLS_FAIDX as SAMTOOLS_FAIDX_MT_SHIFT } from '../../modules/nf-core/samtools/faidx/main' include { SENTIEON_BWAINDEX as SENTIEON_BWAINDEX_GENOME } from '../../modules/local/sentieon/bwamemindex' -include { SENTIEON_BWAINDEX as SENTIEON_BWAINDEX_SHIFT_MT } from '../../modules/local/sentieon/bwamemindex' +include { SENTIEON_BWAINDEX as SENTIEON_BWAINDEX_MT_SHIFT } from '../../modules/local/sentieon/bwamemindex' include { TABIX_BGZIPTABIX as TABIX_PBT } from '../../modules/nf-core/tabix/bgziptabix/main' include { TABIX_TABIX as TABIX_DBSNP } from '../../modules/nf-core/tabix/tabix/main' include { TABIX_TABIX as TABIX_GNOMAD_AF } from '../../modules/nf-core/tabix/tabix/main' @@ -23,14 +27,13 @@ include { UNTAR as UNTAR_VEP_CACHE } from '../../modul workflow PREPARE_REFERENCES { take: - ch_fasta_no_meta // channel: [mandatory] [ path(fasta) ] - ch_fasta_meta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_mt_fasta_shift_no_meta // channel: [mandatory for dedicated mt analysis] [ path(fasta) ] - ch_mt_fasta_shift_meta // channel: [mandatory for dedicated mt analysis] [ val(meta), path(fasta) ] - ch_gnomad_af_tab // channel: [optional; used in for snv annotation] [ val(meta), path(tab) ] - ch_known_dbsnp // channel: [optional; used only by sentieon] [ val(meta), path(vcf) ] - ch_target_bed // channel: [mandatory for WES] [ path(bed) ] - ch_vep_cache // channel: [mandatory for annotation] [ path(cache) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_mt_fasta // channel: [mandatory for dedicated mt analysis] [ val(meta), path(fasta) ] + ch_gnomad_af_tab // channel: [optional; used in for snv annotation] [ val(meta), path(tab) ] + ch_known_dbsnp // channel: [optional; used only by sentieon] [ val(meta), path(vcf) ] + ch_target_bed // channel: [mandatory for WES] [ path(bed) ] + ch_vep_cache // channel: [mandatory for annotation] [ path(cache) ] main: ch_versions = Channel.empty() @@ -40,19 +43,36 @@ workflow PREPARE_REFERENCES { ch_sentieonbwa = Channel.empty() // Genome indices - BWA_INDEX_GENOME(ch_fasta_meta).index.set{ch_bwa} - BWAMEM2_INDEX_GENOME(ch_fasta_meta) - BWAMEM2_INDEX_SHIFT_MT(ch_mt_fasta_shift_meta) - SENTIEON_BWAINDEX_GENOME(ch_fasta_meta).index.set{ch_sentieonbwa} - SENTIEON_BWAINDEX_SHIFT_MT(ch_mt_fasta_shift_meta) - SAMTOOLS_FAIDX_GENOME(ch_fasta_meta) - SAMTOOLS_FAIDX_SHIFT_MT(ch_mt_fasta_shift_meta) - GATK_SD(ch_fasta_no_meta) - GATK_SD_SHIFT_MT(ch_mt_fasta_shift_no_meta) + BWA_INDEX_GENOME(ch_genome_fasta).index.set{ch_bwa} + BWAMEM2_INDEX_GENOME(ch_genome_fasta) + SENTIEON_BWAINDEX_GENOME(ch_genome_fasta).index.set{ch_sentieonbwa} + SAMTOOLS_FAIDX_GENOME(ch_genome_fasta, [[],[]]) + GATK_SD(ch_genome_fasta) GET_CHROM_SIZES( SAMTOOLS_FAIDX_GENOME.out.fai ) + // MT indices + ch_fai = Channel.empty().mix(ch_genome_fai, SAMTOOLS_FAIDX_GENOME.out.fai).collect() + SAMTOOLS_EXTRACT_MT(ch_genome_fasta, ch_fai) + ch_mt_fasta_in = Channel.empty().mix(ch_mt_fasta, SAMTOOLS_EXTRACT_MT.out.fa).collect() + SAMTOOLS_FAIDX_MT_SHIFT(ch_mt_fasta_in, [[],[]]) + GATK_SD_MT_SHIFT(ch_mt_fasta_in) + GATK_SHIFTFASTA(ch_mt_fasta_in, SAMTOOLS_FAIDX_MT_SHIFT.out.fai, GATK_SD_MT_SHIFT.out.dict) + BWAMEM2_INDEX_MT_SHIFT(GATK_SHIFTFASTA.out.shift_fa) + SENTIEON_BWAINDEX_MT_SHIFT(GATK_SHIFTFASTA.out.shift_fa) + GATK_SHIFTFASTA.out.intervals + .multiMap{ meta, files -> + shift_intervals: + ind = files.findIndexValues {it.toString().endsWith("shifted.intervals")} + files[ind] + intervals: + ind = files.findIndexValues {!(it.toString().endsWith("shifted.intervals"))} + files[ind] + } + .set {ch_shiftfasta_mtintervals} + // Vcf, tab and bed indices TABIX_DBSNP(ch_known_dbsnp) + ch_versions = ch_versions.mix(TABIX_DBSNP.out.versions) TABIX_GNOMAD_AF(ch_gnomad_af_tab) TABIX_PT(ch_target_bed).tbi.set { ch_tbi } TABIX_PBT(ch_target_bed).gz_tbi.set { ch_bgzip_tbi } @@ -70,44 +90,58 @@ workflow PREPARE_REFERENCES { CAT_CAT_BAIT ( ch_bait_intervals_cat_in ) UNTAR_VEP_CACHE (ch_vep_cache) + //cnvcalling intervals + GATK_PREPROCESS_WGS (ch_genome_fasta, ch_fai, GATK_SD.out.dict, [[],[]], [[],[]]).set {ch_preprocwgs} + GATK_PREPROCESS_WES (ch_genome_fasta, ch_fai, GATK_SD.out.dict, GATK_BILT.out.interval_list, [[],[]]).set {ch_preprocwes} + // Gather versions ch_versions = ch_versions.mix(BWA_INDEX_GENOME.out.versions) ch_versions = ch_versions.mix(BWAMEM2_INDEX_GENOME.out.versions) - ch_versions = ch_versions.mix(BWAMEM2_INDEX_SHIFT_MT.out.versions) ch_versions = ch_versions.mix(SENTIEON_BWAINDEX_GENOME.out.versions) - ch_versions = ch_versions.mix(SENTIEON_BWAINDEX_SHIFT_MT.out.versions) ch_versions = ch_versions.mix(SAMTOOLS_FAIDX_GENOME.out.versions) - ch_versions = ch_versions.mix(SAMTOOLS_FAIDX_SHIFT_MT.out.versions) ch_versions = ch_versions.mix(GATK_SD.out.versions) - ch_versions = ch_versions.mix(GATK_SD_SHIFT_MT.out.versions) ch_versions = ch_versions.mix(GET_CHROM_SIZES.out.versions) - ch_versions = ch_versions.mix(TABIX_DBSNP.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_EXTRACT_MT.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_FAIDX_MT_SHIFT.out.versions) + ch_versions = ch_versions.mix(GATK_SD_MT_SHIFT.out.versions) + ch_versions = ch_versions.mix(GATK_SHIFTFASTA.out.versions) + ch_versions = ch_versions.mix(BWAMEM2_INDEX_MT_SHIFT.out.versions) + ch_versions = ch_versions.mix(SENTIEON_BWAINDEX_MT_SHIFT.out.versions) ch_versions = ch_versions.mix(TABIX_GNOMAD_AF.out.versions) ch_versions = ch_versions.mix(TABIX_PT.out.versions) ch_versions = ch_versions.mix(TABIX_PBT.out.versions) ch_versions = ch_versions.mix(GATK_BILT.out.versions) ch_versions = ch_versions.mix(GATK_ILT.out.versions) + ch_versions = ch_versions.mix(CAT_CAT_BAIT.out.versions) ch_versions = ch_versions.mix(UNTAR_VEP_CACHE.out.versions) + ch_versions = ch_versions.mix(GATK_PREPROCESS_WGS.out.versions) + ch_versions = ch_versions.mix(GATK_PREPROCESS_WES.out.versions) emit: - bait_intervals = CAT_CAT_BAIT.out.file_out.map { id, it -> [it] }.collect() // channel: [ path(intervals) ] - bwa_index = Channel.empty().mix(ch_bwa, ch_sentieonbwa).collect() // channel: [ val(meta), path(index) ] - bwa_index_mt_shift = SENTIEON_BWAINDEX_SHIFT_MT.out.index.collect() // channel: [ val(meta), path(index) ] - bwamem2_index = BWAMEM2_INDEX_GENOME.out.index.collect() // channel: [ val(meta), path(index) ] - bwamem2_index_mt_shift = BWAMEM2_INDEX_SHIFT_MT.out.index.collect() // channel: [ val(meta), path(index) ] - chrom_sizes = GET_CHROM_SIZES.out.sizes.collect() // channel: [ path(sizes) ] - fasta_fai = SAMTOOLS_FAIDX_GENOME.out.fai.map{ meta, fai -> [fai] }.collect() // channel: [ path(fai) ] - fasta_fai_meta = SAMTOOLS_FAIDX_GENOME.out.fai.collect() // channel: [ val(meta), path(fai) ] - fasta_fai_mt_shift = SAMTOOLS_FAIDX_SHIFT_MT.out.fai.map{ meta, fai -> [fai] }.collect() // channel: [ path(fai) ] - gnomad_af_idx = TABIX_GNOMAD_AF.out.tbi.collect() // channel: [ val(meta), path(fasta) ] - known_dbsnp_tbi = TABIX_DBSNP.out.tbi.collect() // channel: [ val(meta), path(fasta) ] - sequence_dict = GATK_SD.out.dict.collect() // channel: [ path(dict) ] - sequence_dict_meta = GATK_SD.out.dict.map {it -> [[id:it[0].simpleName], it]}.collect() // channel: [ val(meta), path(fasta) ] - sequence_dict_mt_shift = GATK_SD_SHIFT_MT.out.dict.collect() // channel: [ path(dict) ] - target_bed = Channel.empty().mix(ch_tbi, ch_bgzip_tbi).collect() // channel: [ val(meta), path(bed), path(tbi) ] - target_intervals = GATK_BILT.out.interval_list.collect{it[1]}.collect() // channel: [ path(interval_list) ] - vep_resources = UNTAR_VEP_CACHE.out.untar.map{meta, files -> [files]}.collect() // channel: [ path(cache) ] - versions = ch_versions // channel: [ path(versions.yml) ] + genome_bwa_index = Channel.empty().mix(ch_bwa, ch_sentieonbwa).collect() // channel: [ val(meta), path(index) ] + genome_bwamem2_index = BWAMEM2_INDEX_GENOME.out.index.collect() // channel: [ val(meta), path(index) ] + genome_chrom_sizes = GET_CHROM_SIZES.out.sizes.collect() // channel: [ path(sizes) ] + genome_fai = ch_fai // channel: [ val(meta), path(fai) ] + genome_dict = GATK_SD.out.dict.collect() // channel: [ path(dict) ] + readcount_intervals = Channel.empty() + .mix(ch_preprocwgs.interval_list,ch_preprocwes.interval_list)// channel: [ path(intervals) ] + + mt_intervals = ch_shiftfasta_mtintervals.intervals.collect() // channel: [ path(intervals) ] + mtshift_intervals = ch_shiftfasta_mtintervals.shift_intervals.collect() // channel: [ path(intervals) ] + mtshift_backchain = GATK_SHIFTFASTA.out.shift_back_chain.collect() // channel: [ val(meta), path(backchain) ] + mtshift_fai = GATK_SHIFTFASTA.out.shift_fai.collect() // channel: [ val(meta), path(fai) ] + mtshift_fasta = GATK_SHIFTFASTA.out.shift_fa.collect() // channel: [ val(meta), path(fai) ] + mtshift_dict = GATK_SHIFTFASTA.out.dict.collect() // channel: [ path(dict) ] + mtshift_bwa_index = SENTIEON_BWAINDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ] + mtshift_bwamem2_index = BWAMEM2_INDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ] + + gnomad_af_idx = TABIX_GNOMAD_AF.out.tbi.collect() // channel: [ val(meta), path(fasta) ] + known_dbsnp_tbi = TABIX_DBSNP.out.tbi.collect() // channel: [ val(meta), path(fasta) ] + target_bed = Channel.empty().mix(ch_tbi, ch_bgzip_tbi).collect() // channel: [ val(meta), path(bed), path(tbi) ] + bait_intervals = CAT_CAT_BAIT.out.file_out.map{ meta, inter -> inter}.collect() // channel: [ path(intervals) ] + target_intervals = GATK_BILT.out.interval_list.map{ meta, inter -> inter}.collect() // channel: [ path(interval_list) ] + vep_resources = UNTAR_VEP_CACHE.out.untar.map{meta, files -> [files]}.collect() // channel: [ path(cache) ] + versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/local/qc_bam.nf b/subworkflows/local/qc_bam.nf index 5e3f046b..06b55422 100644 --- a/subworkflows/local/qc_bam.nf +++ b/subworkflows/local/qc_bam.nf @@ -19,8 +19,8 @@ workflow QC_BAM { ch_bam // channel: [mandatory] [ val(meta), path(bam) ] ch_bai // channel: [mandatory] [ val(meta), path(bai) ] ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] - ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] ch_bait_intervals // channel: [mandatory] [ path(intervals_list) ] ch_target_intervals // channel: [mandatory] [ path(intervals_list) ] ch_chrom_sizes // channel: [mandatory] [ path(sizes) ] @@ -30,25 +30,30 @@ workflow QC_BAM { main: ch_versions = Channel.empty() - PICARD_COLLECTMULTIPLEMETRICS (ch_bam_bai, ch_fasta, ch_fai) + PICARD_COLLECTMULTIPLEMETRICS (ch_bam_bai, ch_genome_fasta, ch_genome_fai) - PICARD_COLLECTHSMETRICS (ch_bam_bai, ch_fasta, ch_fai, ch_bait_intervals, ch_target_intervals) + ch_bam_bai + .combine(ch_bait_intervals) + .combine(ch_target_intervals) + .set { ch_hsmetrics_in} + + PICARD_COLLECTHSMETRICS (ch_hsmetrics_in, ch_genome_fasta, ch_genome_fai, [[],[]]) QUALIMAP_BAMQC (ch_bam, []) - TIDDIT_COV (ch_bam, []) // 2nd pos. arg is req. only for cram input + TIDDIT_COV (ch_bam, [[],[]]) // 2nd pos. arg is req. only for cram input UCSC_WIGTOBIGWIG (TIDDIT_COV.out.wig, ch_chrom_sizes) ch_bam_bai.map{ meta, bam, bai -> [meta, bam, bai, []]}.set{ch_mosdepth_in} - MOSDEPTH (ch_mosdepth_in, ch_fasta) + MOSDEPTH (ch_mosdepth_in, ch_genome_fasta) // COLLECT WGS METRICS - PICARD_COLLECTWGSMETRICS ( ch_bam_bai, ch_fasta, ch_fai, ch_intervals_wgs ) - PICARD_COLLECTWGSMETRICS_Y ( ch_bam_bai, ch_fasta, ch_fai, ch_intervals_y ) + PICARD_COLLECTWGSMETRICS ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_intervals_wgs ) + PICARD_COLLECTWGSMETRICS_Y ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_intervals_y ) - SENTIEON_WGSMETRICS ( ch_bam_bai, ch_fasta, ch_fai, ch_intervals_wgs ) - SENTIEON_WGSMETRICS_Y ( ch_bam_bai, ch_fasta, ch_fai, ch_intervals_y ) + SENTIEON_WGSMETRICS ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_intervals_wgs ) + SENTIEON_WGSMETRICS_Y ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_intervals_y ) ch_cov = Channel.empty().mix(PICARD_COLLECTWGSMETRICS.out.metrics, SENTIEON_WGSMETRICS.out.wgs_metrics) ch_cov_y = Channel.empty().mix(PICARD_COLLECTWGSMETRICS_Y.out.metrics, SENTIEON_WGSMETRICS_Y.out.wgs_metrics) diff --git a/subworkflows/local/rank_variants.nf b/subworkflows/local/rank_variants.nf index f1a9c788..e274de33 100644 --- a/subworkflows/local/rank_variants.nf +++ b/subworkflows/local/rank_variants.nf @@ -12,7 +12,7 @@ workflow RANK_VARIANTS { take: ch_vcf // channel: [mandatory] [ val(meta), path(vcf) ] - ch_ped // channel: [mandatory] [ path(ped) ] + ch_pedfile // channel: [mandatory] [ path(ped) ] ch_reduced_penetrance // channel: [mandatory] [ path(pentrance) ] ch_score_config // channel: [mandatory] [ path(ini) ] @@ -21,9 +21,9 @@ workflow RANK_VARIANTS { GENMOD_ANNOTATE(ch_vcf) - GENMOD_MODELS(GENMOD_ANNOTATE.out.vcf, ch_ped, ch_reduced_penetrance) + GENMOD_MODELS(GENMOD_ANNOTATE.out.vcf, ch_pedfile, ch_reduced_penetrance) - GENMOD_SCORE(GENMOD_MODELS.out.vcf, ch_ped, ch_score_config) + GENMOD_SCORE(GENMOD_MODELS.out.vcf, ch_pedfile, ch_score_config) GENMOD_COMPOUND(GENMOD_SCORE.out.vcf) diff --git a/subworkflows/local/scatter_genome.nf b/subworkflows/local/scatter_genome.nf index 6f5dac41..db95dfb2 100644 --- a/subworkflows/local/scatter_genome.nf +++ b/subworkflows/local/scatter_genome.nf @@ -8,17 +8,16 @@ include { GATK4_SPLITINTERVALS } from '../../modules/nf-core/gatk4/splitinterval workflow SCATTER_GENOME { take: - ch_dict // channel: [mandatory] [ path(dict) ] - ch_fai_meta // channel: [mandatory] [ val(meta), path(fai) ] - ch_fai_no_meta // channel: [mandatory] [ path(fai) ] - ch_fasta_no_meta // channel: [mandatory] [ path(fasta) ] + ch_genome_dictionary // channel: [mandatory] [ val(meta), path(dict) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] main: ch_versions = Channel.empty() - BUILD_BED (ch_fai_meta) + BUILD_BED (ch_genome_fai) - GATK4_SPLITINTERVALS(BUILD_BED.out.bed, ch_fasta_no_meta, ch_fai_no_meta, ch_dict) + GATK4_SPLITINTERVALS(BUILD_BED.out.bed, ch_genome_fasta, ch_genome_fai, ch_genome_dictionary) ch_versions = ch_versions.mix(BUILD_BED.out.versions) ch_versions = ch_versions.mix(GATK4_SPLITINTERVALS.out.versions) diff --git a/subworkflows/local/variant_calling/call_snv_deepvariant.nf b/subworkflows/local/variant_calling/call_snv_deepvariant.nf index fa240b1e..8324aa35 100644 --- a/subworkflows/local/variant_calling/call_snv_deepvariant.nf +++ b/subworkflows/local/variant_calling/call_snv_deepvariant.nf @@ -10,20 +10,20 @@ include { TABIX_TABIX as TABIX_GL } from '../../../modules/nf-co workflow CALL_SNV_DEEPVARIANT { take: - ch_bam // channel: [mandatory] [ val(meta), path(bam), path(bai) ] - ch_fasta // channel: [mandatory] [ path(fasta) ] - ch_fai // channel: [mandatory] [ path(fai) ] - ch_case_info // channel: [mandatory] [ val(case_info) ] + ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_case_info // channel: [mandatory] [ val(case_info) ] main: ch_versions = Channel.empty() - ch_bam.map { meta, bam, bai -> + ch_bam_bai.map { meta, bam, bai -> return [meta, bam, bai, []] } .set { ch_deepvar_in } - DEEPVARIANT ( ch_deepvar_in, ch_fasta, ch_fai, [] ) + DEEPVARIANT ( ch_deepvar_in, ch_genome_fasta, ch_genome_fai, [[],[]] ) DEEPVARIANT.out.gvcf .collect{it[1]} .toList() @@ -39,12 +39,12 @@ workflow CALL_SNV_DEEPVARIANT { ch_split_multi_in = GLNEXUS.out.bcf .map{ meta, bcf -> return [meta, bcf, []] } - SPLIT_MULTIALLELICS_GL (ch_split_multi_in, ch_fasta) + SPLIT_MULTIALLELICS_GL (ch_split_multi_in, ch_genome_fasta) ch_remove_dup_in = SPLIT_MULTIALLELICS_GL.out.vcf .map{ meta, vcf -> return [meta, vcf, []] } - REMOVE_DUPLICATES_GL (ch_remove_dup_in, ch_fasta) + REMOVE_DUPLICATES_GL (ch_remove_dup_in, ch_genome_fasta) TABIX_GL (REMOVE_DUPLICATES_GL.out.vcf) diff --git a/subworkflows/local/variant_calling/call_snv_sentieon.nf b/subworkflows/local/variant_calling/call_snv_sentieon.nf index 03b6c684..d1b8bbb2 100644 --- a/subworkflows/local/variant_calling/call_snv_sentieon.nf +++ b/subworkflows/local/variant_calling/call_snv_sentieon.nf @@ -14,9 +14,9 @@ include { BCFTOOLS_FILTER as BCF_FILTER_TWO } from '../../../modules/nf-c workflow CALL_SNV_SENTIEON { take: - ch_input // channel: [mandatory] [ val(meta), path(bam), path(bai) ] - ch_fasta // channel: [mandatory] [ path(fasta) ] - ch_fai // channel: [mandatory] [ path(fai) ] + ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_genome_fasta // channel: [mandatory] [ path(fasta) ] + ch_genome_fai // channel: [mandatory] [ path(fai) ] ch_dbsnp // channel: [mandatory] [ val(meta), path(vcf) ] ch_dbsnp_index // channel: [mandatory] [ val(meta), path(tbi) ] ch_call_interval // channel: [mandatory] [ path(interval) ] @@ -26,9 +26,9 @@ workflow CALL_SNV_SENTIEON { main: ch_versions = Channel.empty() - SENTIEON_DNASCOPE ( ch_input, ch_fasta, ch_fai, ch_dbsnp, ch_dbsnp_index, ch_call_interval, ch_ml_model ) + SENTIEON_DNASCOPE ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_dbsnp, ch_dbsnp_index, ch_call_interval, ch_ml_model ) - SENTIEON_DNAMODELAPPLY ( SENTIEON_DNASCOPE.out.vcf_index, ch_fasta, ch_fai, ch_ml_model ) + SENTIEON_DNAMODELAPPLY ( SENTIEON_DNASCOPE.out.vcf_index, ch_genome_fasta, ch_genome_fai, ch_ml_model ) BCF_FILTER_ONE (SENTIEON_DNAMODELAPPLY.out.vcf ) @@ -49,7 +49,7 @@ workflow CALL_SNV_SENTIEON { } .set{ ch_vcf_idx_merge_in } - BCFTOOLS_MERGE(ch_vcf_idx_merge_in.multiple, [], ch_fasta, ch_fai) + BCFTOOLS_MERGE(ch_vcf_idx_merge_in.multiple, ch_genome_fasta, ch_genome_fai, []) ch_split_multi_in = BCFTOOLS_MERGE.out.merged_variants .map{meta, bcf -> @@ -57,13 +57,13 @@ workflow CALL_SNV_SENTIEON { ch_vcf_idx_case = ch_vcf_idx_merge_in.single.mix(ch_split_multi_in) - SPLIT_MULTIALLELICS_SEN(ch_vcf_idx_case, ch_fasta) + SPLIT_MULTIALLELICS_SEN(ch_vcf_idx_case, ch_genome_fasta) ch_remove_dup_in = SPLIT_MULTIALLELICS_SEN.out.vcf .map{meta, vcf -> return [meta, vcf, []]} - REMOVE_DUPLICATES_SEN(ch_remove_dup_in, ch_fasta) + REMOVE_DUPLICATES_SEN(ch_remove_dup_in, ch_genome_fasta) TABIX_SEN(REMOVE_DUPLICATES_SEN.out.vcf) diff --git a/subworkflows/local/variant_calling/call_sv_germlinecnvcaller.nf b/subworkflows/local/variant_calling/call_sv_germlinecnvcaller.nf new file mode 100644 index 00000000..029c0db5 --- /dev/null +++ b/subworkflows/local/variant_calling/call_sv_germlinecnvcaller.nf @@ -0,0 +1,81 @@ +// +// A variant caller workflow for GATK's GermlinceCNVCaller +// + +include { GATK4_COLLECTREADCOUNTS } from '../../../modules/nf-core/gatk4/collectreadcounts/main.nf' +include { GATK4_DETERMINEGERMLINECONTIGPLOIDY } from '../../../modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf' +include { GATK4_GERMLINECNVCALLER } from '../../../modules/nf-core/gatk4/germlinecnvcaller/main.nf' +include { GATK4_POSTPROCESSGERMLINECNVCALLS } from '../../../modules/nf-core/gatk4/postprocessgermlinecnvcalls/main.nf' + +workflow CALL_SV_GERMLINECNVCALLER { + take: + ch_bam_bai // channel: [mandatory][ val(meta), path(bam), path(bai) ] + ch_fasta // channel: [mandatory][ val(meta), path(ch_fasta_no_meta) ] + ch_fai // channel: [mandatory][ val(meta), path(ch_fai) ] + ch_readcount_intervals // channel: [mandatory][ val(meta), path(bed), path(tbi) ] + ch_genome_dictionary // channel: [mandatory][ val(meta), path(ch_dict) ] + ch_ploidy_model // channel: [mandatory][ path(ch_ploidy_model) ] + ch_gcnvcaller_model // channel: [mandatory][ path(ch_gcnvcaller_model) ] + + main: + ch_versions = Channel.empty() + + input = ch_bam_bai.combine( ch_readcount_intervals.collect{ it[1] } ) + + GATK4_COLLECTREADCOUNTS ( input, ch_fasta, ch_fai, ch_genome_dictionary ) + + GATK4_COLLECTREADCOUNTS.out.tsv + .map({ meta, tsv -> return [meta, tsv, [], [] ]}) + .set{ch_dgcp_in} + + GATK4_DETERMINEGERMLINECONTIGPLOIDY ( ch_dgcp_in, ch_ploidy_model, [] ) + + GATK4_COLLECTREADCOUNTS.out.tsv + .join(GATK4_DETERMINEGERMLINECONTIGPLOIDY.out.calls) + .combine(ch_gcnvcaller_model) + .map({ meta, tsv, calls, meta2, model -> return [meta, tsv, [], calls, model ]}) + .set{ch_gcnvc_in} + + GATK4_GERMLINECNVCALLER ( ch_gcnvc_in ) + + GATK4_GERMLINECNVCALLER.out.calls.toList() + .flatMap {reduce_input(it)} + .buffer (size: 2) + .combine(ch_gcnvcaller_model.collect{it[1]}.toList()) + .join(GATK4_DETERMINEGERMLINECONTIGPLOIDY.out.calls) + .set {ch_postproc_in} + + GATK4_POSTPROCESSGERMLINECNVCALLS ( ch_postproc_in ) + + ch_versions = ch_versions.mix(GATK4_COLLECTREADCOUNTS.out.versions) + ch_versions = ch_versions.mix(GATK4_DETERMINEGERMLINECONTIGPLOIDY.out.versions) + ch_versions = ch_versions.mix(GATK4_GERMLINECNVCALLER.out.versions) + ch_versions = ch_versions.mix(GATK4_POSTPROCESSGERMLINECNVCALLS.out.versions) + + emit: + genotyped_intervals_vcf = GATK4_POSTPROCESSGERMLINECNVCALLS.out.intervals // channel: [ val(meta), path(*.tar.gz) ] + genotyped_segments_vcf = GATK4_POSTPROCESSGERMLINECNVCALLS.out.segments // channel: [ val(meta), path(*.tar.gz) ] + denoised_vcf = GATK4_POSTPROCESSGERMLINECNVCALLS.out.denoised // channel: [ val(meta), path(*.tar.gz) ] + versions = ch_versions // channel: [ versions.yml ] +} + +// This function groups calls with same meta for postprocessing. +def reduce_input (List gcnvoutput) { + def dictionary = [:] + def reducedList = [] + for (int i = 0; i it + [ [], [] ] } .set { manta_input } - MANTA ( manta_input, ch_fasta, ch_fai ) + MANTA ( manta_input, ch_genome_fasta, ch_genome_fai ) } else { ch_case_info.combine(bam_file_list) .combine(bai_file_list) .combine(bed_input) .set { manta_input } - MANTA ( manta_input, ch_fasta, ch_fai ) + MANTA ( manta_input, ch_genome_fasta, ch_genome_fai ) } ch_versions = MANTA.out.versions diff --git a/subworkflows/local/variant_calling/call_sv_tiddit.nf b/subworkflows/local/variant_calling/call_sv_tiddit.nf index 329b11b4..f033c51e 100644 --- a/subworkflows/local/variant_calling/call_sv_tiddit.nf +++ b/subworkflows/local/variant_calling/call_sv_tiddit.nf @@ -7,13 +7,13 @@ include { SVDB_MERGE as SVDB_MERGE_TIDDIT } from '../../../modules/nf-core/svdb/ workflow CALL_SV_TIDDIT { take: - ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] - ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_index // channel: [mandatory] [ val(meta), path(index)] - ch_case_info // channel: [mandatory] [ val(case_info) ] + ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_bwa_index // channel: [mandatory] [ val(meta), path(index)] + ch_case_info // channel: [mandatory] [ val(case_info) ] main: - TIDDIT_SV ( ch_bam_bai, ch_fasta, ch_index ) + TIDDIT_SV ( ch_bam_bai, ch_genome_fasta, ch_bwa_index ) TIDDIT_SV.out .vcf diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 6ab05e3e..e5497222 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -1,58 +1,19 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS + PRINT PARAMS SUMMARY ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) +include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' -// Validate input parameters -WorkflowRaredisease.initialise(params, log) +def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) +def citation = '\n' + WorkflowMain.citation(workflow) + '\n' +def summary_params = paramsSummaryMap(workflow) -// Check input path parameters to see if they exist -def checkPathParamList = [ - params.bwa, - params.bwamem2, - params.call_interval, - params.fasta, - params.fai, - params.gens_gnomad_pos, - params.gens_interval_list, - params.gens_pon, - params.gnomad_af, - params.gnomad_af_idx, - params.input, - params.intervals_wgs, - params.intervals_y, - params.known_dbsnp, - params.known_dbsnp_tbi, - params.known_indels, - params.known_mills, - params.ml_model, - params.mt_backchain_shift, - params.mt_bwa_index_shift, - params.mt_bwamem2_index_shift, - params.mt_fasta_shift, - params.mt_fai_shift, - params.mt_intervals, - params.mt_intervals_shift, - params.mt_sequence_dictionary_shift, - params.multiqc_config, - params.reduced_penetrance, - params.score_config_snv, - params.score_config_sv, - params.sequence_dictionary, - params.target_bed, - params.svdb_query_dbs, - params.variant_catalog, - params.vep_filters, - params.vcfanno_lua, - params.vcfanno_resources, - params.vcfanno_toml, - params.vep_cache -] +// Print parameter summary log to screen +log.info logo + paramsSummaryLog(workflow) + citation -for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } +WorkflowRaredisease.initialise(params, log) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -82,8 +43,7 @@ if (!params.skip_sv_annotation) { } if (!params.skip_mt_analysis) { - mandatoryParams += ["genome", "mt_backchain_shift", "mito_name", "mt_fasta_shift", "mt_intervals", - "mt_intervals_shift", "vcfanno_resources", "vcfanno_toml", "vep_cache_version", "vep_cache"] + mandatoryParams += ["genome", "mito_name", "vcfanno_resources", "vcfanno_toml", "vep_cache_version", "vep_cache"] } if (params.analysis_type.equals("wes")) { @@ -94,6 +54,10 @@ if (params.variant_caller.equals("sentieon")) { mandatoryParams += ["ml_model"] } +if (!params.skip_cnv_calling) { + mandatoryParams += ["ploidy_model", "gcnvcaller_model"] +} + def missingParamsCount = 0 for (param in mandatoryParams.unique()) { if (params[param] == null) { @@ -103,7 +67,7 @@ for (param in mandatoryParams.unique()) { } if (missingParamsCount>0) { - error("\nSet missing parameters and restart the run.") + error("\nSet missing parameters and restart the run. For more information please check usage documentation on github.") } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -181,116 +145,119 @@ workflow RAREDISEASE { CHECK_INPUT (ch_input) ch_versions = ch_versions.mix(CHECK_INPUT.out.versions) - // Initialize all file channels including unprocessed vcf, bed and tab files - ch_call_interval = params.call_interval ? Channel.fromPath(params.call_interval).collect() - : Channel.value([]) - ch_genome_fasta_no_meta = params.fasta ? Channel.fromPath(params.fasta).collect() - : ( error('Genome fasta not specified!') ) - ch_genome_fasta_meta = ch_genome_fasta_no_meta.map { it -> [[id:it[0].simpleName], it] } - ch_gnomad_af_tab = params.gnomad_af ? Channel.fromPath(params.gnomad_af).map{ it -> [[id:it[0].simpleName], it] }.collect() - : Channel.value([[],[]]) - ch_intervals_wgs = params.intervals_wgs ? Channel.fromPath(params.intervals_wgs).collect() - : Channel.empty() - ch_intervals_y = params.intervals_y ? Channel.fromPath(params.intervals_y).collect() - : Channel.empty() - ch_known_dbsnp = params.known_dbsnp ? Channel.fromPath(params.known_dbsnp).map{ it -> [[id:it[0].simpleName], it] }.collect() - : Channel.value([[],[]]) - ch_ml_model = (params.variant_caller.equals("sentieon") && params.ml_model) ? Channel.fromPath(params.ml_model).collect() - : Channel.value([]) - ch_mt_backchain_shift = params.mt_backchain_shift ? Channel.fromPath(params.mt_backchain_shift).collect() - : Channel.value([]) - ch_mt_fasta_shift_no_meta = params.mt_fasta_shift ? Channel.fromPath(params.mt_fasta_shift).collect() - : Channel.empty() - ch_mt_fasta_shift_meta = params.mt_fasta_shift ? ch_mt_fasta_shift_no_meta.map { it -> [[id:it[0].simpleName], it] }.collect() - : Channel.empty() - ch_mt_intervals = params.mt_intervals ? Channel.fromPath(params.mt_intervals).collect() - : Channel.value([]) - ch_mt_intervals_shift = params.mt_intervals_shift ? Channel.fromPath(params.mt_intervals_shift).collect() - : Channel.value([]) - ch_reduced_penetrance = params.reduced_penetrance ? Channel.fromPath(params.reduced_penetrance).collect() - : Channel.value([]) - ch_score_config_snv = params.score_config_snv ? Channel.fromPath(params.score_config_snv).collect() - : Channel.value([]) - ch_score_config_sv = params.score_config_sv ? Channel.fromPath(params.score_config_sv).collect() - : Channel.value([]) - ch_target_bed_unprocessed = params.target_bed ? Channel.fromPath(params.target_bed).map{ it -> [[id:it[0].simpleName], it] }.collect() - : Channel.value([]) - ch_variant_catalog = params.variant_catalog ? Channel.fromPath(params.variant_catalog).collect() - : Channel.value([]) - ch_variant_consequences = Channel.fromPath("$projectDir/assets/variant_consequences_v1.txt", checkIfExists: true).collect() - - ch_vcfanno_resources = params.vcfanno_resources ? Channel.fromPath(params.vcfanno_resources).splitText().map{it -> it.trim()}.collect() - : Channel.value([]) - ch_vcfanno_lua = params.vcfanno_lua ? Channel.fromPath(params.vcfanno_lua).collect() - : Channel.value([]) - ch_vcfanno_toml = params.vcfanno_toml ? Channel.fromPath(params.vcfanno_toml).collect() - : Channel.value([]) - ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache).map { it -> [[id:'vep_cache'], it] }.collect() - : Channel.value([[],[]]) - ch_vep_filters = params.vep_filters ? Channel.fromPath(params.vep_filters).collect() - : Channel.value([]) - - // Generate pedigree file - pedfile = CHECK_INPUT.out.samples.toList().map { makePed(it) } - - // Input QC - FASTQC (CHECK_INPUT.out.reads) - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + // Initialize file channels for PREPARE_REFERENCES subworkflow + ch_genome_fasta = Channel.fromPath(params.fasta).map { it -> [[id:it[0].simpleName], it] }.collect() + ch_genome_fai = params.fai ? Channel.fromPath(params.fai).map {it -> [[id:it[0].simpleName], it]}.collect() + : Channel.empty() + ch_gnomad_af_tab = params.gnomad_af ? Channel.fromPath(params.gnomad_af).map{ it -> [[id:it[0].simpleName], it] }.collect() + : Channel.value([[],[]]) + ch_dbsnp = params.known_dbsnp ? Channel.fromPath(params.known_dbsnp).map{ it -> [[id:it[0].simpleName], it] }.collect() + : Channel.value([[],[]]) + ch_mt_fasta = params.mt_fasta ? Channel.fromPath(params.mt_fasta).map { it -> [[id:it[0].simpleName], it] }.collect() + : Channel.empty() + ch_target_bed_unprocessed = params.target_bed ? Channel.fromPath(params.target_bed).map{ it -> [[id:it[0].simpleName], it] }.collect() + : Channel.value([[],[]]) + ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache).map { it -> [[id:'vep_cache'], it] }.collect() + : Channel.value([[],[]]) // Prepare references and indices. PREPARE_REFERENCES ( - ch_genome_fasta_no_meta, - ch_genome_fasta_meta, - ch_mt_fasta_shift_no_meta, - ch_mt_fasta_shift_meta, + ch_genome_fasta, + ch_genome_fai, + ch_mt_fasta, ch_gnomad_af_tab, - ch_known_dbsnp, + ch_dbsnp, ch_target_bed_unprocessed, ch_vep_cache_unprocessed ) .set { ch_references } // Gather built indices or get them from the params - ch_bait_intervals = ch_references.bait_intervals - ch_bwa_index = params.bwa ? Channel.fromPath(params.bwa).map {it -> [[id:it[0].simpleName], it]}.collect() - : ch_references.bwa_index - ch_bwa_index_mt_shift = params.mt_bwa_index_shift ? Channel.fromPath(params.mt_bwa_index_shift).map {it -> [[id:it[0].simpleName], it]}.collect() - : ch_references.bwa_index_mt_shift - ch_bwamem2_index = params.bwamem2 ? Channel.fromPath(params.bwamem2).map {it -> [[id:it[0].simpleName], it]}.collect() - : ch_references.bwamem2_index - ch_bwamem2_index_mt_shift = params.mt_bwamem2_index_shift ? Channel.fromPath(params.mt_bwamem2_index_shift).collect() - : ch_references.bwamem2_index_mt_shift - ch_chrom_sizes = ch_references.chrom_sizes - ch_genome_fai_no_meta = params.fai ? Channel.fromPath(params.fai).collect() - : ch_references.fasta_fai - ch_genome_fai_meta = params.fai ? Channel.fromPath(params.fai).map {it -> [[id:it[0].simpleName], it]}.collect() - : ch_references.fasta_fai_meta - ch_mt_shift_fai = params.mt_fai_shift ? Channel.fromPath(params.mt_fai_shift).collect() - : ch_references.fasta_fai_mt_shift - ch_gnomad_af_idx = params.gnomad_af_idx ? Channel.fromPath(params.gnomad_af_idx).collect() + ch_bait_intervals = ch_references.bait_intervals + ch_cadd_header = Channel.fromPath("$projectDir/assets/cadd_to_vcf_header_-1.0-.txt", checkIfExists: true).collect() + ch_cadd_resources = params.cadd_resources ? Channel.fromPath(params.cadd_resources).collect() + : Channel.value([]) + ch_call_interval = params.call_interval ? Channel.fromPath(params.call_interval).collect() + : Channel.value([]) + ch_dbsnp_tbi = params.known_dbsnp_tbi ? Channel.fromPath(params.known_dbsnp_tbi).map {it -> [[id:it[0].simpleName], it]}.collect() + : ch_references.known_dbsnp_tbi.ifEmpty([[],[]]) + ch_gcnvcaller_model = params.gcnvcaller_model ? Channel.fromPath(params.gcnvcaller_model).splitCsv ( header:true ) + .map { row -> + return [[id:file(row.models).simpleName], row.models] + } + : Channel.empty() + ch_genome_bwaindex = params.bwa ? Channel.fromPath(params.bwa).map {it -> [[id:it[0].simpleName], it]}.collect() + : ch_references.genome_bwa_index + ch_genome_bwamem2index = params.bwamem2 ? Channel.fromPath(params.bwamem2).map {it -> [[id:it[0].simpleName], it]}.collect() + : ch_references.genome_bwamem2_index + ch_genome_chrsizes = ch_references.genome_chrom_sizes + ch_genome_fai = ch_references.genome_fai + ch_genome_dictionary = params.sequence_dictionary ? Channel.fromPath(params.sequence_dictionary).map {it -> [[id:it[0].simpleName], it]}.collect() + : ch_references.genome_dict + ch_gnomad_afidx = params.gnomad_af_idx ? Channel.fromPath(params.gnomad_af_idx).collect() : ch_references.gnomad_af_idx - ch_gnomad_af = params.gnomad_af ? ch_gnomad_af_tab.join(ch_gnomad_af_idx).map {meta, tab, idx -> [tab,idx]}.collect() + ch_gnomad_af = params.gnomad_af ? ch_gnomad_af_tab.join(ch_gnomad_afidx).map {meta, tab, idx -> [tab,idx]}.collect() : Channel.empty() - ch_known_dbsnp_tbi = params.known_dbsnp_tbi ? Channel.fromPath(params.known_dbsnp_tbi).map {it -> [[id:it[0].simpleName], it]}.collect() - : ch_references.known_dbsnp_tbi.ifEmpty([[],[]]) - ch_sequence_dictionary_no_meta = params.sequence_dictionary ? Channel.fromPath(params.sequence_dictionary).collect() - : ch_references.sequence_dict - ch_sequence_dictionary_meta = params.sequence_dictionary ? Channel.fromPath(params.sequence_dictionary).map {it -> [[id:it[0].simpleName], it]}.collect() - : ch_references.sequence_dict_meta - ch_sequence_dictionary_mt_shift = params.mt_sequence_dictionary_shift ? Channel.fromPath(params.mt_sequence_dictionary_shift).collect() - : ch_references.sequence_dict_mt_shift - ch_target_bed = ch_references.target_bed - ch_target_intervals = ch_references.target_intervals - ch_vep_cache = ( params.vep_cache && params.vep_cache.endsWith("tar.gz") ) ? ch_references.vep_resources - : ( params.vep_cache ? Channel.fromPath(params.vep_cache).collect() : Channel.value([]) ) - ch_versions = ch_versions.mix(ch_references.versions) + ch_intervals_wgs = params.intervals_wgs ? Channel.fromPath(params.intervals_wgs).collect() + : Channel.empty() + ch_intervals_y = params.intervals_y ? Channel.fromPath(params.intervals_y).collect() + : Channel.empty() + ch_ml_model = params.variant_caller.equals("sentieon") ? Channel.fromPath(params.ml_model).collect() + : Channel.value([]) + ch_mt_intervals = ch_references.mt_intervals + ch_mtshift_backchain = ch_references.mtshift_backchain + ch_mtshift_bwaindex = ch_references.mtshift_bwa_index + ch_mtshift_bwamem2index = ch_references.mtshift_bwamem2_index + ch_mtshift_dictionary = ch_references.mtshift_dict + ch_mtshift_fai = ch_references.mtshift_fai + ch_mtshift_fasta = ch_references.mtshift_fasta + ch_mtshift_intervals = ch_references.mtshift_intervals + ch_ploidy_model = params.ploidy_model ? Channel.fromPath(params.ploidy_model).map{ it -> [[id:it[0].simpleName], it] }.collect() + : Channel.empty() + ch_readcount_intervals = params.readcount_intervals ? Channel.fromPath(params.readcount_intervals).collect() + : ( ch_references.readcount_intervals ?: Channel.empty() ) + ch_reduced_penetrance = params.reduced_penetrance ? Channel.fromPath(params.reduced_penetrance).collect() + : Channel.value([]) + ch_score_config_snv = params.score_config_snv ? Channel.fromPath(params.score_config_snv).collect() + : Channel.value([]) + ch_score_config_sv = params.score_config_sv ? Channel.fromPath(params.score_config_sv).collect() + : Channel.value([]) + ch_target_bed = ch_references.target_bed + ch_target_intervals = ch_references.target_intervals + ch_variant_catalog = params.variant_catalog ? Channel.fromPath(params.variant_catalog).map { it -> [[id:it[0].simpleName],it]}.collect() + : Channel.value([[],[]]) + ch_variant_consequences = Channel.fromPath("$projectDir/assets/variant_consequences_v1.txt", checkIfExists: true).collect() + ch_vcfanno_resources = params.vcfanno_resources ? Channel.fromPath(params.vcfanno_resources).splitText().map{it -> it.trim()}.collect() + : Channel.value([]) + ch_vcfanno_lua = params.vcfanno_lua ? Channel.fromPath(params.vcfanno_lua).collect() + : Channel.value([]) + ch_vcfanno_toml = params.vcfanno_toml ? Channel.fromPath(params.vcfanno_toml).collect() + : Channel.value([]) + ch_vep_cache = ( params.vep_cache && params.vep_cache.endsWith("tar.gz") ) ? ch_references.vep_resources + : ( params.vep_cache ? Channel.fromPath(params.vep_cache).collect() : Channel.value([]) ) + ch_vep_filters = params.vep_filters ? Channel.fromPath(params.vep_filters).collect() + : Channel.value([]) + ch_versions = ch_versions.mix(ch_references.versions) + + // Generate pedigree file + ch_pedfile = CHECK_INPUT.out.samples.toList().map { makePed(it) } + + // SV caller priority + if (params.skip_cnv_calling) { + ch_svcaller_priority = Channel.value(["tiddit", "manta"]) + } else { + ch_svcaller_priority = Channel.value(["tiddit", "manta", "gcnvcaller"]) + } + + // Input QC + FASTQC (CHECK_INPUT.out.reads) + ch_versions = ch_versions.mix(FASTQC.out.versions.first()) // CREATE CHROMOSOME BED AND INTERVALS SCATTER_GENOME ( - ch_sequence_dictionary_no_meta, - ch_genome_fai_meta, - ch_genome_fai_no_meta, - ch_genome_fasta_no_meta + ch_genome_dictionary, + ch_genome_fai, + ch_genome_fasta ) .set { ch_scatter } @@ -299,12 +266,12 @@ workflow RAREDISEASE { // ALIGNING READS, FETCH STATS, AND MERGE. ALIGN ( CHECK_INPUT.out.reads, - ch_genome_fasta_no_meta, - ch_genome_fai_no_meta, - ch_bwa_index, - ch_bwamem2_index, - ch_known_dbsnp, - ch_known_dbsnp_tbi, + ch_genome_fasta, + ch_genome_fai, + ch_genome_bwaindex, + ch_genome_bwamem2index, + ch_dbsnp, + ch_dbsnp_tbi, params.platform ) .set { ch_mapped } @@ -315,11 +282,11 @@ workflow RAREDISEASE { ch_mapped.marked_bam, ch_mapped.marked_bai, ch_mapped.bam_bai, - ch_genome_fasta_meta, - ch_genome_fai_meta, + ch_genome_fasta, + ch_genome_fai, ch_bait_intervals, ch_target_intervals, - ch_chrom_sizes, + ch_genome_chrsizes, ch_intervals_wgs, ch_intervals_y ) @@ -330,10 +297,8 @@ workflow RAREDISEASE { ch_mapped.bam_bai, ch_variant_catalog, CHECK_INPUT.out.case_info, - ch_genome_fasta_no_meta, - ch_genome_fai_no_meta, - ch_genome_fasta_meta, - ch_genome_fai_meta + ch_genome_fasta, + ch_genome_fai ) ch_versions = ch_versions.mix(CALL_REPEAT_EXPANSIONS.out.versions) @@ -361,10 +326,10 @@ workflow RAREDISEASE { // STEP 2: VARIANT CALLING CALL_SNV ( ch_mapped.bam_bai, - ch_genome_fasta_no_meta, - ch_genome_fai_no_meta, - ch_known_dbsnp, - ch_known_dbsnp_tbi, + ch_genome_fasta, + ch_genome_fai, + ch_dbsnp, + ch_dbsnp_tbi, ch_call_interval, ch_ml_model, CHECK_INPUT.out.case_info @@ -375,19 +340,23 @@ workflow RAREDISEASE { ch_mapped.marked_bam, ch_mapped.marked_bai, ch_mapped.bam_bai, - ch_bwa_index, - ch_genome_fasta_no_meta, - ch_genome_fasta_meta, - ch_genome_fai_no_meta, + ch_genome_bwaindex, + ch_genome_fasta, + ch_genome_fai, CHECK_INPUT.out.case_info, - ch_target_bed + ch_target_bed, + ch_genome_dictionary, + ch_svcaller_priority, + ch_readcount_intervals, + ch_ploidy_model, + ch_gcnvcaller_model ) ch_versions = ch_versions.mix(CALL_STRUCTURAL_VARIANTS.out.versions) // ped correspondence, sex check, ancestry check PEDDY_CHECK ( CALL_SNV.out.vcf.join(CALL_SNV.out.tabix, failOnMismatch:true, failOnDuplicate:true), - pedfile + ch_pedfile ) ch_versions = ch_versions.mix(PEDDY_CHECK.out.versions) @@ -396,13 +365,13 @@ workflow RAREDISEASE { GENS ( ch_mapped.bam_bai, CALL_SNV.out.vcf, - ch_genome_fasta_meta, - ch_genome_fai_no_meta, + ch_genome_fasta, + ch_genome_fai, file(params.gens_interval_list), file(params.gens_pon), file(params.gens_gnomad_pos), CHECK_INPUT.out.case_info, - ch_sequence_dictionary_no_meta + ch_genome_dictionary ) ch_versions = ch_versions.mix(GENS.out.versions) } @@ -414,8 +383,8 @@ workflow RAREDISEASE { params.genome, params.vep_cache_version, ch_vep_cache, - ch_genome_fasta_no_meta, - ch_sequence_dictionary_no_meta + ch_genome_fasta, + ch_genome_dictionary ).set {ch_sv_annotate} ch_versions = ch_versions.mix(ch_sv_annotate.versions) @@ -427,7 +396,7 @@ workflow RAREDISEASE { RANK_VARIANTS_SV ( ANN_CSQ_PLI_SV.out.vcf_ann, - pedfile, + ch_pedfile, ch_reduced_penetrance, ch_score_config_sv ) @@ -444,21 +413,21 @@ workflow RAREDISEASE { if (!params.skip_mt_analysis) { ANALYSE_MT ( ch_mapped.bam_bai, - ch_bwa_index, - ch_bwamem2_index, - ch_genome_fasta_meta, - ch_genome_fasta_no_meta, - ch_sequence_dictionary_meta, - ch_sequence_dictionary_no_meta, - ch_genome_fai_no_meta, + ch_cadd_header, + ch_cadd_resources, + ch_genome_bwaindex, + ch_genome_bwamem2index, + ch_genome_fasta, + ch_genome_fai, + ch_genome_dictionary, ch_mt_intervals, - ch_bwa_index_mt_shift, - ch_bwamem2_index_mt_shift, - ch_mt_fasta_shift_no_meta, - ch_sequence_dictionary_mt_shift, - ch_mt_shift_fai, - ch_mt_intervals_shift, - ch_mt_backchain_shift, + ch_mtshift_bwaindex, + ch_mtshift_bwamem2index, + ch_mtshift_fasta, + ch_mtshift_dictionary, + ch_mtshift_fai, + ch_mtshift_intervals, + ch_mtshift_backchain, ch_vcfanno_resources, ch_vcfanno_toml, params.genome, @@ -491,16 +460,17 @@ workflow RAREDISEASE { ANNOTATE_SNVS ( ch_vcf, params.analysis_type, + ch_cadd_header, + ch_cadd_resources, ch_vcfanno_resources, ch_vcfanno_lua, ch_vcfanno_toml, params.genome, params.vep_cache_version, ch_vep_cache, - ch_genome_fasta_no_meta, + ch_genome_fasta, ch_gnomad_af, - ch_scatter_split_intervals, - CHECK_INPUT.out.samples + ch_scatter_split_intervals ).set {ch_snv_annotate} ch_versions = ch_versions.mix(ch_snv_annotate.versions) @@ -533,7 +503,7 @@ workflow RAREDISEASE { RANK_VARIANTS_SNV ( ANN_CSQ_PLI_SNV.out.vcf_ann, - pedfile, + ch_pedfile, ch_reduced_penetrance, ch_score_config_snv ) @@ -562,7 +532,8 @@ workflow RAREDISEASE { // workflow_summary = WorkflowRaredisease.paramsSummaryMultiqc(workflow, summary_params) ch_workflow_summary = Channel.value(workflow_summary) - methods_description = WorkflowRaredisease.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description) + + methods_description = WorkflowRaredisease.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) ch_methods_description = Channel.value(methods_description) ch_multiqc_files = Channel.empty() ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))