diff --git a/.cspell.json b/.cspell.json new file mode 100644 index 00000000..2f55e79d --- /dev/null +++ b/.cspell.json @@ -0,0 +1,22 @@ +{ + "version": "0.2", + "language": "en", + "words": [ + "CACHEDIR", + "Charliecloud", + "conda", + "entrez", + "ksumngs", + "MAFFT", + "metagenomic", + "NCBI", + "Nextflow", + "outdir", + "Palinski", + "phylotree", + "taxid", + "Trimmomatic", + "yavsap", + "Zenodo" + ] +} diff --git a/.editorconfig b/.editorconfig index e2d23a44..b6b31907 100644 --- a/.editorconfig +++ b/.editorconfig @@ -1,4 +1,3 @@ - root = true [*] @@ -9,7 +8,7 @@ trim_trailing_whitespace = true indent_size = 4 indent_style = space -[*.{md,yml,yaml,html,css,scss,js,pug,rst,prettierrc}] +[*.{md,yml,yaml,html,css,scss,js}] indent_size = 2 # These files are edited and tested upstream in nf-core/modules diff --git a/.gitattributes b/.gitattributes index 7fe55006..f3ec7a7c 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,4 @@ *.config linguist-language=nextflow +modules/nf-core/** linguist-generated +subworkflows/nf-core/** linguist-generated +bin/sequence-table linguist-language=julia diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 00000000..7763cd28 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,51 @@ +name: Bug report +description: Report something that is broken or incorrect +labels: ["bug"] +title: "[Bug]: " +body: + - type: markdown + attributes: + value: | + Before you post this issue, please check the documentation: + + - [nf-core website: troubleshooting](https://nf-co.re/usage/troubleshooting) + - [yavsap pipeline documentation](https://ksumngs.github.io/yavsap/usage) + + - type: textarea + id: description + attributes: + label: Description of the bug + description: A clear and concise description of what the bug is. + validations: + required: true + + - type: textarea + id: command_used + attributes: + label: Command used and terminal output + description: Steps to reproduce the behaviour. Please paste the command you used to launch the pipeline and the output from your terminal. + render: console + placeholder: | + $ nextflow run ... + + Some output where something broke + + - type: textarea + id: files + attributes: + label: Relevant files + description: | + Please drag and drop the relevant files here. Create a `.zip` archive if the extension is not allowed. + Your verbose log file `.nextflow.log` is often useful _(this is a hidden file in the directory where you launched the pipeline)_ as well as custom Nextflow configuration files. + + - type: textarea + id: system + attributes: + label: System information + description: | + * Nextflow version _(eg. 21.10.3)_ + * Hardware _(eg. HPC, Desktop, Cloud)_ + * Executor _(eg. slurm, local, awsbatch)_ + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter or Charliecloud)_ + * OS _(eg. CentOS Linux, macOS, Linux Mint)_ + * Version of nf-core/yavsap _(eg. 1.1, 1.5, 1.8.2)_ diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 00000000..9faa05b8 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,28 @@ +name: Feature request +description: Suggest an idea for the nf-core/yavsap pipeline +labels: ["enhancement"] +title: "[Feature]: " +body: + - type: textarea + id: summary + attributes: + label: Summary of feature + description: Please describe your suggestion for a new feature. It might help to describe a problem or use case, plus any alternatives that you have considered. + validations: + required: true + - type: textarea + id: processes + attributes: + label: Additional processes + description: Explain what tools would be needed to implement this feature, along with a link to the tool's documentation and a container for the tool. + - type: textarea + id: parameters + attributes: + label: Additional parameters + description: Give a list of new parameters and which tools/processes it would be passed to + - type: textarea + id: visualizer + attributes: + label: Additional visualizer section + description: Describe a new section to add to the visualizer + placeholder: diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index b5a3771b..88ef4c04 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -17,14 +17,11 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/ksumngs/yavs - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! - - [ ] If you've added a new tool - have you followed the pipeline conventions - in the - [contribution docs](https://github.com/ksumngs/yavsap/tree/master/.github/CONTRIBUTING.md) + - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/ksumngs/yavsap/tree/master/.github/CONTRIBUTING.md) + - [ ] If necessary, also make a PR on the yavsap _branch_ on the [ksumngs/nf-test-datasets](https://github.com/ksumngs/nf-test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). -- [ ] Ensure the test suite passes - (`nextflow run . -profile test,docker --outdir `). +- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. -- [ ] `README.md` is updated (including new tool citations and - authors/contributors). +- [ ] `README.md` is updated (including new tool citations and authors/contributors). diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml new file mode 100644 index 00000000..0d5193ee --- /dev/null +++ b/.github/workflows/awsfulltest.yml @@ -0,0 +1,33 @@ +name: nf-core AWS full size tests +# This workflow is triggered on published releases. +# It can be additionally triggered manually with GitHub actions workflow dispatch button. +# It runs the -profile 'test_full' on AWS batch + +on: + release: + types: [published] + workflow_dispatch: +jobs: + run-tower: + name: Run AWS full tests + if: github.repository == 'nf-core/yavsap' + runs-on: ubuntu-latest + steps: + - name: Launch workflow via tower + uses: nf-core/tower-action@v3 + # TODO nf-core: You can customise AWS full pipeline tests as required + # Add full size test data (but still relatively small datasets for few samples) + # on the `test_full.config` test runs with only one set of parameters + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/yavsap/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/yavsap/results-${{ github.sha }}" + } + profiles: test_full,aws_tower + nextflow_config: | + process.errorStrategy = 'retry' + process.maxRetries = 3 diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml new file mode 100644 index 00000000..47b3ccbe --- /dev/null +++ b/.github/workflows/awstest.yml @@ -0,0 +1,28 @@ +name: nf-core AWS test +# This workflow can be triggered manually with the GitHub actions workflow dispatch button. +# It runs the -profile 'test' on AWS batch + +on: + workflow_dispatch: +jobs: + run-tower: + name: Run AWS tests + if: github.repository == 'nf-core/yavsap' + runs-on: ubuntu-latest + steps: + # Launch workflow using Tower CLI tool action + - name: Launch workflow via tower + uses: nf-core/tower-action@v3 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/yavsap/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/yavsap/results-test-${{ github.sha }}" + } + profiles: test,aws_tower + nextflow_config: | + process.errorStrategy = 'retry' + process.maxRetries = 3 diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index 75e3353d..60ed3a1c 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -42,3 +42,4 @@ jobs: Thanks again for your contribution! repo-token: ${{ secrets.GITHUB_TOKEN }} allow-repeats: false +# diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6d43eb8f..8de30b74 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,4 +1,4 @@ -name: Testing +name: nf-core CI # This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors on: push: @@ -15,7 +15,7 @@ env: jobs: test: - name: Run workflow tests + name: Run pipeline with test data # Only run on push if this is the develop branch (merged PRs) if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'ksumngs/yavsap') }} runs-on: ubuntu-latest @@ -24,8 +24,9 @@ jobs: # Nextflow versions include: # Test pipeline minimum Nextflow version - - NXF_VER: "21.10.6" + - NXF_VER: "21.10.3" NXF_EDGE: "" + # Test latest edge release of Nextflow - NXF_VER: "" NXF_EDGE: "1" steps: @@ -46,7 +47,7 @@ jobs: env: NCBI_API_KEY: ${{ secrets.NCBI_API_KEY }} run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,gh,docker + nextflow run ${GITHUB_WORKSPACE} -profile test,gh,docker --outdir results parameters: name: Test workflow parameters @@ -84,7 +85,7 @@ jobs: env: NCBI_API_KEY: ${{ secrets.NCBI_API_KEY }} run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,gh,docker ${{ matrix.parameters }} + nextflow run ${GITHUB_WORKSPACE} -profile test,gh,docker ${{ matrix.parameters }} --outdir results test_interleaved: name: Test Interleaved Reads Workflow @@ -104,7 +105,8 @@ jobs: env: NCBI_API_KEY: ${{ secrets.NCBI_API_KEY }} run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_interleaved,gh,docker + nextflow run ${GITHUB_WORKSPACE} -profile test_interleaved,gh,docker --outdir results + test_nanopore: name: Run workflow tests # Only run on push if this is the develop branch (merged PRs) @@ -123,4 +125,4 @@ jobs: env: NCBI_API_KEY: ${{ secrets.NCBI_API_KEY }} run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,gh,docker + nextflow run ${GITHUB_WORKSPACE} -profile test,gh,docker --outdir results diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 673e25e4..3e49bb88 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -17,16 +17,15 @@ jobs: requirements: docs/requirements.txt - name: Install nf-core Tools 🍎 run: | - python -m pip install git+https://github.com/krokicki/tools@dev + python -m pip install nf-core - name: Convert schema to docs 🧑‍⚖️ run: | - echo "# Complete Parameter Reference" > docs/parameters.md - echo "" >> docs/parameters.md - echo "Every single parameter in the entire pipeline is described here. If you need to" >> docs/parameters.md - echo "tweak every little detail of how your JEV samples are analyzed, you've come to" >> docs/parameters.md - echo "the right place! Note that *every* parameter is described here, even those that" >> docs/parameters.md - echo "shouldn't be used, so proceed with caution!" >> docs/parameters.md - nf-core schema docs nextflow_schema.json | tail -n+4 >> docs/parameters.md + nf-core schema docs -f \ + -o docs/parameters.md \ + -x markdown \ + -c 'parameter,description,type,required,default' + sed -i '1s/^.*/# Complete Parameter Reference/' docs/parameters.md + sed -i "3s/^.*/Every single parameter in the entire pipeline is described here. If you need to tweak every little detail of how your samples are analyzed, you've come to the right place! Note that *every* parameter is described here, even those that shouldn't be used, so proceed with caution!/" docs/parameters.md - name: Build docs 📝 run: | sphinx-build -b html docs docs/_build diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 7a839ab8..e9cf5de3 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -1,6 +1,7 @@ name: nf-core linting # This workflow is triggered on pushes and PRs to the repository. -# It runs the `nf-core lint` and markdown lint tests to ensure that the code meets the nf-core guidelines +# It runs the `nf-core lint` and markdown lint tests to ensure +# that the code meets the nf-core guidelines. on: push: pull_request: @@ -19,7 +20,7 @@ jobs: run: npm install -g editorconfig-checker - name: Run ECLint check - run: editorconfig-checker -exclude '.*.md' $(git ls-files | grep -v test) + run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') Prettier: runs-on: ubuntu-latest @@ -77,3 +78,5 @@ jobs: lint_log.txt lint_results.md PR_number.txt + +# diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 04758f61..91c487a1 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -26,3 +26,4 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} path: linting-logs/lint_results.md +# diff --git a/.gitignore b/.gitignore index 594b23d9..36227c85 100644 --- a/.gitignore +++ b/.gitignore @@ -3,131 +3,10 @@ work/ data/ results/ -.trace/ .DS_Store testing/ testing* *.pyc -### Node gitignore ### -# Logs -logs -*.log -npm-debug.log* -yarn-debug.log* -yarn-error.log* -lerna-debug.log* -.pnpm-debug.log* - -# Diagnostic reports (https://nodejs.org/api/report.html) -report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json - -# Runtime data -pids -*.pid -*.seed -*.pid.lock - -# Directory for instrumented libs generated by jscoverage/JSCover -lib-cov - -# Coverage directory used by tools like istanbul -coverage -*.lcov - -# nyc test coverage -.nyc_output - -# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) -.grunt - -# Bower dependency directory (https://bower.io/) -bower_components - -# node-waf configuration -.lock-wscript - -# Compiled binary addons (https://nodejs.org/api/addons.html) -build/Release - -# Dependency directories -node_modules/ -jspm_packages/ - -# Snowpack dependency directory (https://snowpack.dev/) -web_modules/ - -# TypeScript cache -*.tsbuildinfo - -# Optional npm cache directory -.npm - -# Optional eslint cache -.eslintcache - -# Microbundle cache -.rpt2_cache/ -.rts2_cache_cjs/ -.rts2_cache_es/ -.rts2_cache_umd/ - -# Optional REPL history -.node_repl_history - -# Output of 'npm pack' -*.tgz - -# Yarn Integrity file -.yarn-integrity - -# dotenv environment variables file -.env -.env.test -.env.production - -# parcel-bundler cache (https://parceljs.org/) -.cache -.parcel-cache - -# Next.js build output -.next -out - -# Nuxt.js build / generate output -.nuxt -dist - -# Gatsby files -.cache/ -# Comment in the public line in if your project uses Gatsby and not Next.js -# https://nextjs.org/blog/next-9-1#public-directory-support -# public - -# vuepress build output -.vuepress/dist - -# Serverless directories -.serverless/ - -# FuseBox cache -.fusebox/ - -# DynamoDB Local files -.dynamodb/ - -# TernJS port file -.tern-port - -# Stores VSCode versions used for testing VSCode extensions -.vscode-test - -# yarn v2 -.yarn/cache -.yarn/unplugged -.yarn/build-state.yml -.yarn/install-state.gz -.pnp.* - ### Sphinx gitignore ### docs/_build diff --git a/.gitpod.yml b/.gitpod.yml new file mode 100644 index 00000000..85d95ecc --- /dev/null +++ b/.gitpod.yml @@ -0,0 +1,14 @@ +image: nfcore/gitpod:latest + +vscode: + extensions: # based on nf-core.nf-core-extensionpack + - codezombiech.gitignore # Language support for .gitignore files + # - cssho.vscode-svgviewer # SVG viewer + - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code + - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed + - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files + - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar + - mechatroner.rainbow-csv # Highlight columns in csv files in different colors + # - nextflow.nextflow # Nextflow syntax highlighting + - oderwat.indent-rainbow # Highlight indentation level + - streetsidesoftware.code-spell-checker # Spelling checker for source code diff --git a/.markdownlint.yml b/.markdownlint.yml index 5200aa53..535c6602 100644 --- a/.markdownlint.yml +++ b/.markdownlint.yml @@ -1,2 +1,6 @@ no-duplicate-header: siblings_only: true +no-inline-html: + allowed_elements: + - kbd + - span diff --git a/.nf-core.yml b/.nf-core.yml index 9f70950b..12400600 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,34 +1,14 @@ repository_type: pipeline lint: files_exist: - # We have markdown templates instead of yaml - - .github/ISSUE_TEMPLATE/bug_report.yml - - .github/ISSUE_TEMPLATE/feature_request.yml # Our pipeline name doesn't have nf-core in it, so the logo doesn't either - assets/nf-core-yavsap_logo_light.png - docs/images/nf-core-yavsap_logo_light.png - docs/images/nf-core-yavsap_logo_dark.png # We don't have 'full' test data yet - conf/test_full.config - # We use RST instead of markdown - - docs/output.md - - docs/README.md - - docs/usage.md - # We spin our own help text and validation - # Maybe someday we'll use the nf-core templates - - lib/nfcore_external_java_deps.jar - - lib/NfcoreSchema.groovy - - lib/NfcoreTemplate.groovy - - lib/Utils.groovy - - lib/WorkflowMain.groovy - - lib/WorkflowYavsap.groovy # We don't use igenomes - conf/igenomes.config - # We don't use AWS - - .github/workflows/awstest.yml - - .github/workflows/awsfulltest.yml - # We use default Prettier settings - - .prettierrc.yml # We hang on to our Markdownlint config file for VSCode to read - .markdownlint.yml nextflow_config: false @@ -42,6 +22,8 @@ lint: - LICENSE - .github/CONTRIBUTING.md - .github/ISSUE_TEMPLATE/config.yml + - .github/ISSUE_TEMPLATE/bug_report.yml + - .github/ISSUE_TEMPLATE/feature_request.yml - .github/PULL_REQUEST_TEMPLATE.md - .github/workflows/branch.yml - .github/workflows/linting_comment.yml @@ -51,9 +33,14 @@ lint: - assets/sendmail_template.txt - .gitignore - .gitattributes + - docs/README.md - assets/multiqc_config.yaml + - .prettierrc.yml + - lib/NfcoreTemplate.groovy actions_ci: false # Can't find minimum NF version and doesn't like the addition of 'dispatch' # to the events multiqc_config: false + # We actually use template strings in the pipeline, so turn this off + template_strings: false diff --git a/.prettierignore b/.prettierignore index ac93bf76..6b1ff273 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,2 +1,4 @@ modules/ksumngs/ modules/nf-core/ +# Ignore Mustache templates +assets/*_mqc.html diff --git a/.prettierrc b/.prettierrc deleted file mode 100644 index e1c1e158..00000000 --- a/.prettierrc +++ /dev/null @@ -1,4 +0,0 @@ -"$schema": http://json.schemastore.org/prettierrc -overrides: - - files: "*.md" - proseWrap: always diff --git a/.prettierrc.yml b/.prettierrc.yml new file mode 100644 index 00000000..c81f9a76 --- /dev/null +++ b/.prettierrc.yml @@ -0,0 +1 @@ +printWidth: 120 diff --git a/.vscode/settings.json b/.vscode/settings.json index 944113e1..615d8bcc 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,5 +1,8 @@ { "files.associations": { - "*.config": "nextflow" + "*.config": "nextflow", + "bin/igvgen": "julia", + "bin/phylotreegen": "julia", + "bin/sequencetable": "julia" } } diff --git a/CHANGELOG.md b/CHANGELOG.md index d00dc17c..106175c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,15 @@ and this project adheres to ## [Unreleased] +## [0.8.0] - 2022-06-06 + +### Changed + +- Visualizer created entirely in MultiQC ([#46](https://github.com/ksumngs/yavsap/pull/46)) +- nf-core template updated to v1.4.0 ([#46](https://github.com/ksumngs/yavsap/pull/46)) +- Consensus sequences now called using the variant callers for Illumina/Nanopore + reads + ## [0.7.1] - 2022-04-25 ### Fixed @@ -238,7 +247,9 @@ This is a major overhaul of YAVSAP to make it use nf-core's DSL2 modules. - Krona graphs of Kraken2 output - BLAST of assemblies and unclassified reads -[unreleased]: https://github.com/ksumngs/yavsap/compare/v0.7.0...HEAD +[unreleased]: https://github.com/ksumngs/yavsap/compare/v0.8.0...HEAD +[0.8.0]: https://github.com/ksumngs/yavsap/compare/v0.7.1...v0.8.0 +[0.7.1]: https://github.com/ksumngs/yavsap/compare/v0.7.0...v0.7.1 [0.7.0]: https://github.com/ksumngs/yavsap/compare/v0.6.5-alpha...v0.7.0 [0.6.5-alpha]: https://github.com/ksumngs/yavsap/compare/v0.6.4-alpha...v0.6.5-alpha [0.6.4-alpha]: https://github.com/ksumngs/yavsap/compare/v0.6.3-alpha...v0.6.4-alpha diff --git a/README.md b/README.md index 0331d34e..5bc864f3 100644 --- a/README.md +++ b/README.md @@ -1,87 +1,66 @@ - -

- logo -

+ -# YAVSAP (Yet Another Viral Subspecies Analysis Pipeline) +# ![yavsap](docs/images/yavsap_logo.png) YAVSAP (Yet Another Viral Subspecies Analysis Pipeline) -[![Testing](https://github.com/ksumngs/yavsap/actions/workflows/ci.yml/badge.svg?branch=master)](https://github.com/ksumngs/yavsap/actions/workflows/ci.yml) -[![Documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://ksumngs.github.io/yavsap) -[![Project Status: WIP – Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.](https://www.repostatus.org/badges/latest/wip.svg)](https://www.repostatus.org/#wip) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.6-23aa62.svg?labelColor=000000)](https://www.nextflow.io/) -[![GitHub tag (latest by date)](https://img.shields.io/github/v/tag/ksumngs/yavsap?label=version)](https://github.com/ksumngs/yavsap/blob/master/CHANGELOG.md) -[![GitHub license](https://img.shields.io/github/license/ksumngs/yavsap)](https://github.com/ksumngs/yavsap/blob/master/LICENSE) - - - -A [Nextflow] pipeline for studying viral populations within a single sample, -tuned for [Japanese Encephalitis Virus]. -:dna::computer::chart_with_upwards_trend: Yeah, we're still looking for a better -name. :shrug: - -> This project follows the [semver] _pro forma_ and uses the [git-flow] -> branching model. - -## Installation - -1. Install [Nextflow] (>= 21.10.6) -2. Install [Conda] -3. Install one or more of - - [Singularity] (**Recommended**) - - [Podman] - - [Docker] -4. Download a [Kraken2 database] - -Check out the [Installation] docs for a more nuanced take on the requirements. - -## Usage +[![GitHub Actions CI Status](https://github.com/ksumngs/yavsap/actions/workflows/ci.yml/badge.svg)](https://github.com/ksumngs/yavsap/actions/workflows/ci.yml) +[![GitHub Actions Linting Status](https://github.com/ksumngs/yavsap/actions/workflows/linting.yml/badge.svg)](https://github.com/ksumngs/yavsap/actions/workflows/linting.yml) -### Syntax + + -```bash -nextflow run ksumngs/yavsap \ - -profile \ - --platform \ - --kraken2_db /path/to/kraken2/database \ - [--input /path/to/reads/folder] \ - [--genome accession_number] \ - [--keep_taxid list] \ - [--outdir /path/to/output] -``` - -### Example: Illumina reads with a Kraken2 database containing the host - -```bash -nextflow run ksumngs/yavsap \ - -profile singularity \ - --platform illumina \ - --kraken2_db /databases/kraken2/nt -``` - -### Example: Nanopore reads with a viral-only Kraken2 database +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg?labelColor=000000)](https://www.nextflow.io/) +[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) +[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) +[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://apptainer.org/docs/) -```bash -nextflow run ksumngs/yavsap \ - -profile podman \ - --platform nanopore \ - --kraken2_db /databases/kraken2/viral \ - --keep_taxid '10239' -``` + -### Example: Illumina reads aligned against a different reference genome - -```bash -nextflow run ksumngs/yavsap \ - -profile docker \ - --platform illumina \ - --kraken2_db /databases/kraken2/refseq-complete_unmasked \ - --genome 'KT957423.1' -``` - -There are _way_ more parameters than listed here. For a more complete -description, please read the docs on [Usage] and [Parameters]. +[![Documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://ksumngs.github.io/yavsap) +[![Project Status: WIP – Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.](https://www.repostatus.org/badges/latest/wip.svg)](https://www.repostatus.org/#wip) +[![GitHub tag (latest by date)](https://img.shields.io/github/v/tag/ksumngs/yavsap?label=version)](https://github.com/ksumngs/yavsap/blob/master/CHANGELOG.md) +[![GitHub license](https://img.shields.io/github/license/ksumngs/yavsap)](https://github.com/ksumngs/yavsap/blob/master/LICENSE) -## Process Summary +> This project follows the [semver](https://semver.org) _pro forma_ and uses the [git-flow](https://nvie.com/posts/a-successful-git-branching-model) branching model. + +## Introduction + +**yavsap** is a bioinformatics best-practice analysis pipeline for identifying and analyzing viral haplotypes in metagenomic NGS reads. + +The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community! + + + + +## Pipeline summary + +1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)/[`NanoStat`](https://github.com/wdecoster/nanostat)) +2. Read trimming ([`Trimmomatic`](http://www.usadellab.org/cms/?page=trimmomatic)/[`NanoFilt`](https://github.com/wdecoster/nanofilt/)) +3. Host read filtering ([`Kraken2`](https://github.com/DerrickWood/kraken2/wiki)+[`krakentools`](https://github.com/jenniferlu717/KrakenTools)) +4. Consensus sequence generation + 1. Reference genome download ([`entrez-direct`](https://www.ncbi.nlm.nih.gov/books/NBK179288/)) + 2. Read alignment ([`minimap2`](https://lh3.github.io/minimap2/)) + 3. Variant calling ([`CliqueSNV`](https://github.com/vtsyvina/CliqueSNV)/[`HapLink.jl`](https://ksumngs.github.io/HapLink.jl)) + 4. Consensus sequence generation ([`CliqueSNV`](https://github.com/vtsyvina/CliqueSNV)/[`HapLink.jl`](https://ksumngs.github.io/HapLink.jl)) +5. Strain identification ([`BLAST+`](https://www.ncbi.nlm.nih.gov/books/NBK569839/)) +6. Variant calling + 1. Read alignment ([`minimap2`](https://lh3.github.io/minimap2/)) + 2. Variant calling ([`CliqueSNV`](https://github.com/vtsyvina/CliqueSNV)/[`HapLink.jl`](https://ksumngs.github.io/HapLink.jl)) +7. Haplotype calling ([`CliqueSNV`](https://github.com/vtsyvina/CliqueSNV)/[`HapLink.jl`](https://ksumngs.github.io/HapLink.jl)) +8. Phylogenetic tree generation + 1. Multiple sequence alignment ([`MAFFT`](https://mafft.cbrc.jp/alignment/software/)) + 2. Maximum-likelihood phylogenetic trees ([`RAxML-ng`](https://github.com/amkozlov/raxml-ng)) +9. Output visualization + - Haplotypes table ([`BioJulia`](https://biojulia.net)) + - Read QC results ([`MultiQC`](http://multiqc.info/)) + - Metagenomic classifications ([`Krona`](https://github.com/marbl/Krona/wiki/KronaTools)) + - Alignments ([`IGV`](https://igv.org/)) + - Phylogenetic tree ([`phylotree.js`](https://github.com/veg/phylotree.js)) ```mermaid flowchart TD @@ -111,47 +90,58 @@ flowchart TD L --> M[Phylogenetic tree] ``` -Here's what happens to your reads in the pipeline. - -1. Quality analysis ([FastQC]) -2. Quality trimming ([Trimmomatic]/[NanoFilt]) -3. Read classification ([Kraken2]) -4. Host read removal ([KrakenTools]) -5. Alignment of reads against the reference genome ([minimap2]) -6. Consensus sequence generation ([iVar]) -7. Closest strain matching ([BLAST]) -8. Realignment to closest strain ([minimap2]) -9. Variant calling ([CliqueSNV]/[HapLink.jl]) -10. Haplotype calling ([CliqueSNV]/[HapLink.jl]) -11. Multiple sequence alignment of consensus sequences, strain genomes, and - haplotypes alignment ([MAFFT]) -12. Phylogenetic tree generation ([raxml-ng]) -13. Alignments and phylogenetics output to browser ([IGV]+[phylotree.js]) - -[blast]: https://blast.ncbi.nlm.nih.gov/Blast.cgi -[cliquesnv]: https://github.com/vtsyvina/CliqueSNV -[conda]: https://conda.io/miniconda.html -[docker]: https://docs.docker.com/engine/installation -[fastqc]: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ -[git-flow]: https://nvie.com/posts/a-successful-git-branching-model -[haplink.jl]: https://ksumngs.github.io/HapLink.jl -[igv]: https://igv.org/ -[installation]: https://ksumngs.github.io/yavsap/install -[ivar]: https://andersen-lab.github.io/ivar/html/manualpage.html -[japanese encephalitis virus]: https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=11072 -[kraken2 database]: https://github.com/DerrickWood/kraken2/wiki/Manual#custom-databases -[kraken2]: https://github.com/DerrickWood/kraken2/wiki -[krakentools]: https://github.com/jenniferlu717/KrakenTools -[mafft]: https://mafft.cbrc.jp/alignment/software/ -[minimap2]: https://lh3.github.io/minimap2/ -[nanofilt]: https://github.com/wdecoster/nanofilt/ -[nextflow]: https://nextflow.io -[parameters]: https://ksumngs.github.io/yavsap/parameters -[phylotree.js]: https://github.com/veg/phylotree.js -[podman]: https://podman.io -[raxml-ng]: https://github.com/amkozlov/raxml-ng -[semver]: https://semver.org -[singularity]: https://www.sylabs.io/guides/3.8/user-guide -[spades]: cab.spbu.ru/spades -[trimmomatic]: www.usadellab.org/cms/?page=trimmomatic -[usage]: https://ksumngs.github.io/yavsap/usage +## Quick Start + +1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=21.10.3`) + +2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_. + +3. Download the pipeline and test it on a minimal dataset with a single command: + + ```console + nextflow run ksumngs/yavsap -profile test,YOUR_PROFILE --outdir + ``` + + Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOUR_PROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string. + + > - The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`. + > - Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. + > - If you are using `singularity`, please use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs. + > - If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs. + +4. Start running your own analysis! + + ```console + nextflow run ksumngs/yavsap -profile --input . --outdir --platform illumina --kraken2_db https://genome-idx.s3.amazonaws.com/kraken/k2_viral_20210517.tar.gz --keep_taxid classified + ``` + +## Documentation + +The nf-core/yavsap pipeline comes with documentation about the pipeline [usage](https://ksumngs.github.io/yavsap/usage), [parameters](https://ksumngs.github.io/yavsap/parameters) and [output](https://ksumngs.github.io/yavsap/output). + +## Credits + +nf-core/yavsap was originally written by [Thomas A. Christensen II](https://millironx.com), under the supervision of [Rachel Palinski](https://www.vet.k-state.edu/academics/dmp/faculty-staff/faculty/palinski/) at the [Kansas State University Veterinary Diagnostic Laboratory](http://www.ksvdl.org/). + +## Contributions and Support + +If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md). + +## Citations + +This pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/master/LICENSE). + + + +> The nf-core framework for community-curated bioinformatics pipelines. +> +> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen. +> +> Nat Biotechnol. 2020 Feb 13. doi: 10.1038/s41587-020-0439-x. +> In addition, references of tools and data used in this pipeline are as follows: + + + + + +An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. diff --git a/assets/cliquesnv_info.yml b/assets/cliquesnv_info.yml new file mode 100644 index 00000000..fab8223e --- /dev/null +++ b/assets/cliquesnv_info.yml @@ -0,0 +1,4 @@ +name: CliqueSNV +description: is a tool for accurate assembly of minority viral haplotypes from next-generation sequencing through efficient noise reduction. +doi: 10.1093/nar/gkab576 +url: https://github.com/vtsyvina/CliqueSNV diff --git a/assets/email_template.html b/assets/email_template.html index f47ff389..76e04fee 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -4,21 +4,11 @@ - + YAVSAP Pipeline Report -
+

YAVSAP v${version}

@@ -36,17 +26,10 @@

Run Name: $runName

border-radius: 4px; " > -

- YAVSAP execution completed unsuccessfully! -

-

- The exit status of the task that caused the workflow execution to fail - was: $exitStatus. -

+

YAVSAP execution completed unsuccessfully!

+

The exit status of the task that caused the workflow execution to fail was: $exitStatus.

The full error message was:

-
-${errorReport}
+
${errorReport}
""" } else { out << """
""" } %> -

- The workflow was completed at $dateComplete (duration: - $duration) -

+

The workflow was completed at $dateComplete (duration: $duration)

The command used to launch the workflow was as follows:

https://github.com/ksumng/yavsap
+        https://github.com/ksumng/yavsap
       

diff --git a/assets/haplink_info.yml b/assets/haplink_info.yml new file mode 100644 index 00000000..177cd499 --- /dev/null +++ b/assets/haplink_info.yml @@ -0,0 +1,4 @@ +name: HapLink.jl +description: is a viral haplotype caller for long NGS reads based on linkage disequilibrium. +doi: ~ +url: https://ksumngs.github.io/HapLink.jl diff --git a/assets/igv_mqc.html b/assets/igv_mqc.html new file mode 100644 index 00000000..325b7f10 --- /dev/null +++ b/assets/igv_mqc.html @@ -0,0 +1,19 @@ + +
+ + + diff --git a/assets/kelpie_mqc.html b/assets/kelpie_mqc.html new file mode 100644 index 00000000..f088737e --- /dev/null +++ b/assets/kelpie_mqc.html @@ -0,0 +1,64 @@ + + +
+
+ {{{kelpieseqtable}}} +
+
+ + diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index ceeb6e67..e958b65f 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,3 +1,37 @@ +report_comment: > + This report has been generated by the yavsap + analysis pipeline. For information about how to interpret these results, please see the + documentation. + +show_analysis_paths: false + +run_modules: + - custom_content + - fastqc + - nanostat + - trimmomatic + - kraken + +module_order: + - fastqc: + - nanostat: + - trimmomatic: + - kraken: + +report_section_order: + minimap2: + order: -1000 + cliquesnv: + order: -1001 + haplink_jl: + order: -1001 + "raxml-ng": + order: -1002 + software_versions: + order: -1100 + "yavsap-summary": + order: -1101 + extra_fn_clean_exts: - "kreport" - type: regex @@ -5,11 +39,3 @@ extra_fn_clean_exts: - type: regex pattern: "_[0-9]" - "_nanostat.log" -top_modules: - - "fastqc" - - "nanostat" -module_order: - - fastqc - - nanostat - - trimmomatic - - kraken diff --git a/assets/phylotree_mqc.html b/assets/phylotree_mqc.html new file mode 100644 index 00000000..9d3a2e18 --- /dev/null +++ b/assets/phylotree_mqc.html @@ -0,0 +1,353 @@ + + +
+ +
+
+
+
+ + + + diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv new file mode 100644 index 00000000..5f653ab7 --- /dev/null +++ b/assets/samplesheet.csv @@ -0,0 +1,3 @@ +sample,fastq_1,fastq_2 +SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz +SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz, diff --git a/assets/schema_input.json b/assets/schema_input.json new file mode 100644 index 00000000..5f5c9c9b --- /dev/null +++ b/assets/schema_input.json @@ -0,0 +1,36 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/yavsap/master/assets/schema_input.json", + "title": "nf-core/yavsap pipeline - params.input schema", + "description": "Schema for the file provided with params.input", + "type": "array", + "items": { + "type": "object", + "properties": { + "sample": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Sample name must be provided and cannot contain spaces" + }, + "fastq_1": { + "type": "string", + "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + }, + "fastq_2": { + "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", + "anyOf": [ + { + "type": "string", + "pattern": "^\\S+\\.f(ast)?q\\.gz$" + }, + { + "type": "string", + "maxLength": 0 + } + ] + } + }, + "required": ["sample", "fastq_1"] + } +} diff --git a/assets/yavsap-logo.svg b/assets/yavsap-logo.svg index 4f1b753e..888f47b3 100644 --- a/assets/yavsap-logo.svg +++ b/assets/yavsap-logo.svg @@ -2,641 +2,86 @@ + inkscape:current-layer="layer1"> + + - - - - - + id="defs1317"> - + clipPathUnits="userSpaceOnUse" + id="clipPath864"> + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + diff --git a/bin/haplotype-parser b/bin/haplotype-parser deleted file mode 100755 index f79cdb33..00000000 --- a/bin/haplotype-parser +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env python3 -# Converts a haplotype yaml file into a TSV of the haplotype ids and their -# abundances -import argparse -import yaml -import sys - -parser = argparse.ArgumentParser() -parser.add_argument("id", nargs="?", type=str) -parser.add_argument( - "haplotype_yaml", nargs="?", type=argparse.FileType("r"), default=sys.stdin -) -parser.add_argument( - "haplotype_tsv", nargs="?", type=argparse.FileType("w"), default=sys.stdout -) - -id = parser.parse_args().id -haplotype_yaml = parser.parse_args().haplotype_yaml -haplotype_tsv = parser.parse_args().haplotype_tsv - -for hap in yaml.load(haplotype_yaml, Loader=yaml.BaseLoader)["haplotypes"]: - hap_name = hap["name"].replace(">", "") - hap_freq = hap["frequency"] - haplotype_tsv.write(f"{id}\t{hap_name}\t{hap_freq}\n") diff --git a/bin/haplotypestandardizer b/bin/haplotypestandardizer new file mode 100755 index 00000000..e0e184c7 --- /dev/null +++ b/bin/haplotypestandardizer @@ -0,0 +1,120 @@ +#!/usr/bin/env julia +using BioAlignments +using FASTX +using YAML + +sample_name = ARGS[1] +reference_fasta = ARGS[2] +strain_name = ARGS[3] +strain_ncbi = ARGS[4] +consensus_fasta = ARGS[5] +haplotype_yaml = ARGS[6] +haplotype_fasta = ARGS[7] +output_file = ARGS[8] + +# Create an alignment model that hates gaps +const SCORE_MODEL = AffineGapScoreModel(EDNAFULL; gap_open=-10, gap_extend=-5) +function align_sequences(query, reference) + return alignment(pairalign(GlobalAlignment(), query, reference, SCORE_MODEL)) +end + +function aligned_sequence(alignment) + seq = [] + for (s, r) in alignment + push!(seq, s) + end + return string(seq...) +end + +# Read in the first (and hopefully only) record from the reference file +reference_reader = FASTA.Reader(open(reference_fasta, "r")) +reference_record = first(reference_reader) +if !isempty(reference_reader) + @warn "Reference $reference_fasta contains more than one sequence. Only the first will be used." +end #if +close(reference_reader) +reference_sequence = FASTA.sequence(reference_record) + +# Read in the first (and hopefully only) record from the consensus file +consensus_reader = FASTA.Reader(open(consensus_fasta, "r")) +consensus_record = first(consensus_reader) +if !isempty(consensus_reader) + @warn "Consensus $consensus_fasta contains more than one sequence. Only the first will be used." +end #if +close(consensus_reader) +consensus_sequence = FASTA.sequence(consensus_record) + +# Align the consensus sequence against the reference +consensus_alignment = Alignment("1M") +try + global consensus_alignment = align_sequences(consensus_sequence, reference_sequence) +catch + error("There was an error aligning the consensus sequence") +end #try + +haplotypes = Dict[] + +# Only log haplotypes if they are provided +if isfile(haplotype_fasta) && isfile(haplotype_yaml) + # Read in the haplotype yaml data + haplotype_data = YAML.load_file(haplotype_yaml)["haplotypes"] + + # Read in the haplotype sequences + haplotype_reader = FASTA.Reader(open(haplotype_fasta, "r")) + haplotype_records = collect(haplotype_reader) + close(haplotype_reader) + + # Convert each haplotype to an output record + for haplotype in haplotype_data + # Remove any angle brackets that might be left in the YAML (CliqueSNV tends to do this) + haplotype_name = replace(haplotype["name"], r"^>" => "") + + # Get the haplotype frequency + haplotype_frequency = haplotype["frequency"] + + # Get the record that matches this id + matching_haplotype_records = filter( + r -> occursin(haplotype_name, FASTA.identifier(r)), haplotype_records + ) + if isempty(matching_haplotype_records) + @warn "No sequence found for $haplotype_name: skipping" + continue + end #if + haplotype_record = first(matching_haplotype_records) + + # Align the sequence + try + haplotype_sequence = FASTA.sequence(haplotype_record) + haplotype_alignment = align_sequences(haplotype_sequence, reference_sequence) + + push!( + haplotypes, + Dict( + haplotype_name => Dict( + "frequency" => haplotype_frequency, + "sequence" => aligned_sequence(haplotype_alignment), + ), + ), + ) + catch e + if e isa BoundsError + @warn "$haplotype_name has gaps in its sequence and cannot be aligned" + else + @warn "$haplotype_name encountered an unknown error and cannot be aligned" + end #if + end #try + end #for +end #if + +# Write everything out to a dictionary +YAML.write_file( + output_file, + Dict( + sample_name => Dict( + "strain_name" => strain_name, + "strain_ncbi" => strain_ncbi, + "consensus_sequence" => aligned_sequence(consensus_alignment), + "haplotypes" => haplotypes, + ), + ), +) diff --git a/bin/igvgen b/bin/igvgen new file mode 100755 index 00000000..c7e8e593 --- /dev/null +++ b/bin/igvgen @@ -0,0 +1,34 @@ +#!/usr/bin/env julia +using JSON3 +using Mustache + +sample_list = ARGS[1] +igvsrc = ARGS[2] +template_file = ARGS[3] +output_file = ARGS[4] + +d = Dict( + "igvopts" => JSON3.write( + Dict( + "reference" => Dict( + "id" => "reference", + "fastaURL" => "reference.fasta", + "indexURL" => "reference.fasta.fai", + ), + "tracks" => [ + Dict( + "type" => "alignment", + "format" => "bam", + "url" => "$samplename.bam", + "indexURL" => "$samplename.bam.bai", + "name" => samplename, + ) for samplename in readlines(sample_list) + ], + ), + ), + "igvjs" => String(read(igvsrc)), +) + +open(output_file, "w") do f + write(f, render_from_file(template_file, d)) +end diff --git a/bin/jl-lib/arguments.jl b/bin/jl-lib/arguments.jl deleted file mode 100644 index af1fb01b..00000000 --- a/bin/jl-lib/arguments.jl +++ /dev/null @@ -1,43 +0,0 @@ - -function parse_arguments() - s = ArgParseSettings() - - # Disable Julia formatter as it doesn't understand the nested table syntax of ArgParse - #! format: off - - @add_arg_table! s begin - "strainstable" - help = "TSV of strains" - required = true - arg_type = String - range_tester = x -> isfile(x) - "alignments" - help = "SAM of haplotypes" - required = true - arg_type = String - range_tester = x -> isfile(x) - "reference" - help = "FASTA of reference genome" - required = true - arg_type = String - range_tester = x -> isfile(x) - "--multiqc" - help = "Do include a MultiQC report section" - action = :store_false - "--no-multiqc" - help = "Do not include a MultiQC report section" - action = :store_true - "--krona" - help = "Do include a Krona chart" - action = :store_false - "--no-krona" - help = "Do not include a Krona chart" - action = :store_true - "--newick" - help = "A file that containing the phylogenetic tree" - arg_type = String - range_tester = x -> isfile(x) - end #add_arg_table - - return parse_args(s) -end #function diff --git a/bin/jl-lib/functions.jl b/bin/jl-lib/functions.jl deleted file mode 100644 index 755d059b..00000000 --- a/bin/jl-lib/functions.jl +++ /dev/null @@ -1,90 +0,0 @@ -function ncbi_link(accession_number::AbstractString) - return a(accession_number; href="https://ncbi.nlm.nih.gov/nuccore/$accession_number") -end #function - -function base_table(sam::SAM.Record, refseq::NucleotideSeq) - sequence = SAM.sequence(sam) - alignment = SAM.alignment(sam) - seq_start = SAM.position(sam) - seq_end = SAM.rightposition(sam) - - basecells = [] - - for (i, refbase) in enumerate(refseq) - if i < seq_start || i > seq_end - basetext = "-" - baseclass = "" - else - (loc, op) = ref2seq(alignment, i) - if isdeleteop(op) || ismetaop(op) - basetext = "-" - baseclass = "variant" - else - altbase = sequence[loc] - basetext = string(altbase) - baseclass = string(altbase) - if altbase != refbase - baseclass = baseclass * " variant" - end #if - end #if - end #if - - push!(basecells, td(basetext; class=baseclass)) - end #for - - return basecells -end #function - -function sample_rows( - samplename::AbstractString, - reference::NucleotideSeq, - alignments::AbstractVector{SAM.Record}, - data::AbstractDataFrame, -) - sample_table = filter(v -> v.sample == samplename, data) - sample_strain = replace(first(sample_table.strain), "_" => ": ") - sample_accession = first(sample_table.accession) - - num_strains = length(eachrow(sample_table)) - - # Get the alignment of this sample's consensus sequence - consensus_record = first( - filter(s -> contains(SAM.tempname(s), "Consensus_$(samplename)"), alignments) - ) - - rows = EzXML.Node[] - push!( - rows, - tr( - th(samplename; rowspan=(num_strains + 1)), - td("Consensus"; colspan=2), - td(sample_strain), - td(ncbi_link(sample_accession)), - base_table(consensus_record, reference)..., - ), - ) - - for haplotype_table in eachrow(sample_table) - haplotype_name = haplotype_table.haplotype - haplotype_frequency = haplotype_table.frequency - - haplotype_record = first( - filter( - s -> contains(SAM.tempname(s), haplotype_table.haplotype), alignment_records - ), - ) - - push!( - rows, - tr( - td(haplotype_name), - td(em("$(round(haplotype_frequency * 100))%")), - td(sample_strain), - td(ncbi_link(sample_accession)), - base_table(haplotype_record, reference)..., - ), - ) - end #for - - return rows -end diff --git a/bin/jl-lib/phylotree-colorizer.js b/bin/jl-lib/phylotree-colorizer.js deleted file mode 100644 index 48a67e86..00000000 --- a/bin/jl-lib/phylotree-colorizer.js +++ /dev/null @@ -1,53 +0,0 @@ -tree = new phylotree.phylotree(newick); -selection_set = !tree.get_parsed_tags().length - ? tree.get_parsed_tags() - : ["Foreground"]; -nodeColorizer = function (element, data) { - try { - var count_class = 0; - selection_set.forEach(function (d, i) { - if (data[d]) { - count_class++; - element.style("fill", color_scale(i), "important"); - } - }); - if (count_class == 0) { - element.style("fill", null); - } - } catch (e) {} -}; -edgeColorizer = function (element, data) { - try { - var count_class = 0; - - selection_set.forEach(function (d, i) { - if (data[d]) { - count_class++; - element.style("stroke", color_scale(i), "important"); - } - }); - - if (count_class == 0) { - element.style("stroke", null).classes("branch-multiple", false); - } else { - element.classed("branch-multiple", true); - } - } catch (e) {} -}; -colorNodesByName = function (element, data) { - nodeColorizer(element, data); - var m = data.data.name.split("_"); - element.style("stroke", color_scale(m[0])); -}; -colorEdgesByTarget = function (element, data) { - edgeColorizer(element, data); - var m = data.target.data.name.split("_"); - element.style("stroke", color_scale(m[0])); -}; -rendered_tree = tree.render({ - container: "#phylotree", - "node-styler": colorNodesByName, - //'edge-styler': colorEdgesByTarget -}); -$(tree.display.container).empty(); -$(tree.display.container).html(tree.display.show()); diff --git a/bin/jl-lib/phylotree-utils.js b/bin/jl-lib/phylotree-utils.js deleted file mode 100644 index a70820a0..00000000 --- a/bin/jl-lib/phylotree-utils.js +++ /dev/null @@ -1,150 +0,0 @@ -/* - phylotree-utils.js - - Extra functions that make Phylotree.js more useful. - Ported from Phylotree.js's example document. - Copyright (c) 2016 iGEM/UCSD evolutionary biology and bioinformatics group - https://github.com/veg/phylotree.js/blob/93fdebb81503f83b3fffe0a56ad3c02c64535fea/index.html - - Used under MIT License - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -$("[data-direction]").on("click", function (e) { - var which_function = - $(this).data("direction") == "vertical" - ? tree.display.spacing_x.bind(tree.display) - : tree.display.spacing_y.bind(tree.display); - which_function(which_function() + +$(this).data("amount")).update(); -}); - -function sort_nodes(asc) { - tree.resortChildren(function (a, b) { - return (b.height - a.height || b.value - a.value) * (asc ? 1 : -1); - }); -} - -$("#sort-ascending").on("click", function (e) { - sort_nodes(true); - tree.display.update(); -}); - -$("#sort-descending").on("click", function (e) { - sort_nodes(false); - tree.display.update(); -}); - -var datamonkey_save_image = function (type, container) { - var prefix = { - xmlns: "http://www.w3.org/2000/xmlns/", - xlink: "http://www.w3.org/1999/xlink", - svg: "http://www.w3.org/2000/svg", - }; - - var svg = $(container).find("svg")[0]; - if (!svg) { - svg = $(container)[0]; - } - - svg.setAttribute("version", "1.1"); - - var defsEl = document.createElement("defs"); - svg.insertBefore(defsEl, svg.firstChild); - - var styleEl = document.createElement("style"); - defsEl.appendChild(styleEl); - styleEl.setAttribute("type", "text/css"); - - // removing attributes so they aren't doubled up - svg.removeAttribute("xmlns"); - svg.removeAttribute("xlink"); - - // These are needed for the svg - if (!svg.hasAttributeNS(prefix.xmlns, "xmlns")) { - svg.setAttributeNS(prefix.xmlns, "xmlns", prefix.svg); - } - - if (!svg.hasAttributeNS(prefix.xmlns, "xmlns:xlink")) { - svg.setAttributeNS(prefix.xmlns, "xmlns:xlink", prefix.xlink); - } - - // This is a deviation from the original Phylotree-utils - // We know that the 2nd stylesheet contains all the svg styles, - // so import them - styles = ""; - styleRules = [...document.styleSheets[1].cssRules]; - styleRules.forEach(function (style) { - styles = styles + style.cssText + "\n"; - }); - svg.getElementsByTagName("style")[0].innerText = styles; - - var source = new XMLSerializer().serializeToString(svg); - var doctype = new XMLSerializer().serializeToString( - document.implementation.createDocumentType( - "svg", - "-//W3C//DTD SVG 1.1//EN", - "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd" - ) - ); - var to_download = [doctype + source]; - var image_string = - "data:image/svg+xml;base66," + encodeURIComponent(to_download); - - if (navigator.msSaveBlob) { - // IE10 - download(image_string, "image.svg", "image/svg+xml"); - } else if (type == "png") { - b64toBlob( - image_string, - function (blob) { - var url = window.URL.createObjectURL(blob); - var pom = document.createElement("a"); - pom.setAttribute("download", "image.png"); - pom.setAttribute("href", url); - $("body").append(pom); - pom.click(); - pom.remove(); - }, - function (error) { - console.log(error); // eslint-disable-line - } - ); - } else { - var pom = document.createElement("a"); - pom.setAttribute("download", "image.svg"); - pom.setAttribute("href", image_string); - $("body").append(pom); - pom.click(); - pom.remove(); - } -}; - -$("#save-image").on("click", function (e) { - datamonkey_save_image("svg", "#phylotree"); -}); - -$(".phylotree-layout-mode").on("click", function (e) { - if (tree.display.radial() != ($(this).data("mode") == "radial")) { - $(".phylotree-layout-mode").toggleClass("active"); - tree.display.radial(!tree.display.radial()).update(); - } -}); - -$(".phylotree-align-toggler").on("click", function (e) { - var button_align = $(this).data("align"); - var tree_align = tree.display.options.alignTips; - - if (tree_align != button_align) { - tree.display.alignTips(button_align == "right"); - $(".phylotree-align-toggler").toggleClass("active"); - tree.display.update(); - } -}); -color_scale = d3.scaleOrdinal(d3.schemeCategory10); diff --git a/bin/jl-lib/static-divs.jl b/bin/jl-lib/static-divs.jl deleted file mode 100644 index b45873ba..00000000 --- a/bin/jl-lib/static-divs.jl +++ /dev/null @@ -1,153 +0,0 @@ -help_modal = html_div( - html_div( - html_div( - header( - h5("Help"; class="modal-title"), - button( - ""; - type="button", - class="btn-close", - data_bs_dismiss="modal", - aria_label="Close", - ); - class="modal-header", - ); - class="modal-content", - ), - ; - class="modal-dialog modal-dialog-centered", - ), - html_div("Lorum ipsum text..."; class="modal-body"); - id="help-dialog", - class="modal fade", - tabindex="-1", -) - -phylogenetic_section = section( - h2("phylotree.js"), - html_div( - nav( - html_div( - button( - i(""; class="fas fa-arrows-alt-v"); - class="btn btn-primary", - type="button", - data_direction="vertical", - data_amount="1", - data_bs_toggle="tooltip", - title="Expand vertical spacing", - ), - button( - i( - ""; - class="fas fa-compress-alt fa-rotate-by", - style="--fa-rotate-angle: 135deg;", - ); - class="btn btn-primary", - type="button", - data_direction="vertical", - data_amount="-1", - data_bs_toggle="tooltip", - title="Compress vertical spacing", - ), - button( - i(""; class="fas fa-arrows-alt-h"); - class="btn btn-primary", - type="button", - data_direction="horizontal", - data_amount="1", - data_bs_toggle="tooltip", - title="Expand horizontal spacing", - ), - button( - i( - ""; - class="fas fa-compress-alt fa-rotate-by", - style="--fa-rotate-angle: 45deg;", - ); - class="btn btn-primary", - type="button", - data_direction="horizontal", - data_amount="-1", - data_bs_toggle="tooltip", - title="Compress horizontal spacing", - ); - class="btn-group mx-1", - role="group", - ), - html_div( - button( - i(""; class="fas fa-sort-amount-down-alt"); - id="sort-ascending", - class="btn btn-primary", - type="button", - data_bs_toggle="tooltip", - title="Sort deepest clades to the bottom", - ), - button( - i(""; class="fas fa-sort-amount-up-alt"); - id="sort-descending", - class="btn btn-primary", - type="button", - data_bs_toggle="tooltip", - title="Sort deepest clades to the top", - ); - class="btn-group mx-1", - role="group", - ), - html_div( - button( - i(""; class="fas fa-file-image"); - id="save-image", - class="btn btn-primary", - type="button", - data_bs_toggle="tooltip", - title="Save image", - ); - class="btn-group mx-1", - ), - html_div( - button( - i(""; class="fas fa-align-left"); - class="btn btn-primary phylotree-layout-mode active", - type="button", - title="Linear", - data_mode="linear", - ), - button( - i(""; class="fas fa-circle-notch"); - class="btn btn-primary phylotree-layout-mode", - type="button", - title="Radial", - data_mode="radial", - ); - class="btn-group mx-1", - ), - html_div( - button( - i(""; class="fas fa-outdent"); - class="btn btn-primary phylotree-align-toggler active", - type="button", - title="Align left", - data_align="left", - ), - html_div( - button( - i(""; class="fas fa-indent"); - class="btn btn-primary phylotree-align-toggler", - type="button", - title="Align right", - data_align="right", - ), - ); - class="btn-group mx-1", - role="group", - ); - class="col-12", - ), - html_div(html_div(""; id="phylotree", class="my-2 p-3"); class="col-12"); - class="row", - ); - id="phylogenetics", - class="container-fluid min-vh-100", -) diff --git a/bin/jl-lib/structs.jl b/bin/jl-lib/structs.jl deleted file mode 100644 index 652602de..00000000 --- a/bin/jl-lib/structs.jl +++ /dev/null @@ -1,5 +0,0 @@ -struct ReportSection - href::AbstractString - title::AbstractString - fontawesome::AbstractString -end #struct diff --git a/bin/jl-lib/yavsap.css b/bin/jl-lib/yavsap.css deleted file mode 100644 index abcc6b5c..00000000 --- a/bin/jl-lib/yavsap.css +++ /dev/null @@ -1,50 +0,0 @@ -.nav-flush .nav-link { - border-radius: 0; -} - -table { - empty-cells: show; -} - -thead > tr > th { - text-align: center; - vertical-align: bottom; -} - -tbody { - overflow: scroll; -} - -th { - text-align: left; - vertical-align: top; -} - -.gene { - background-color: yellow !important; -} - -.variant { - background-color: purple !important; -} - -.A { - color: red; -} - -.C { - color: blue; -} - -.G { - color: orange; -} - -.T { - color: green; -} - -.iframe-content { - width: calc(100vw - 5rem); - height: 100vh; -} diff --git a/bin/phylotreegen b/bin/phylotreegen new file mode 100755 index 00000000..7887fbdc --- /dev/null +++ b/bin/phylotreegen @@ -0,0 +1,22 @@ +#!/usr/bin/env julia +using Mustache + +newick_file = ARGS[1] +template_file = ARGS[2] +output_file = ARGS[3] +phylotree_css = ARGS[4] +d3_js = ARGS[5] +underscore_js = ARGS[6] +phylotree_js = ARGS[7] + +d = Dict( + "newick" => first(readlines(newick_file)), + "stylesheet" => String(read(phylotree_css)), + "D3" => String(read(d3_js)), + "underscore" => String(read(underscore_js)), + "phylotree" => String(read(phylotree_js)), +) + +open(output_file, "w") do f + write(f, render_from_file(template_file, d)) +end diff --git a/bin/renamehapseqs b/bin/renamehapseqs deleted file mode 100755 index 42310853..00000000 --- a/bin/renamehapseqs +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env Rscript - -# Import libraries -library(phylotools) - -# Read in the cli arguments -args <- commandArgs(trailingOnly = TRUE) -fasta_in <- args[1] -sample_name <- args[2] -fasta_out <- args[3] - -# Get the old names -fasta_raw <- read.fasta(fasta_in, clean_name = FALSE) -name_changes <- data.frame(old = fasta_raw[1]$seq.name, new = NA) - -# iVar consensus sequence rename -# e.g. Consensus_SAMPLE_... -> SAMPLE_consensus -if (sum(startsWith(name_changes$old, "Consensus")) > 0) { - name_changes[startsWith(name_changes$old, "Consensus"), ]$new <- - paste(sample_name, "consensus", sep = "_") -} - -# HapLink.jl haplotype sequence rename -# e.g. ???????? ... -> SAMPLE_haplotype_???????? -# This might clash with CliqueSNV if the samplename is entirely hex, so do it -# first -if (sum(grepl("^[0-9a-f]{8}\\s.*", name_changes$old)) > 0) { - name_changes[grepl("^[0-9a-f]{8}\\s.*", name_changes$old), ]$new <- - paste( - sample_name, - "haplotype", - substr( - name_changes[grepl("^[0-9a-f]{8}.*", name_changes$old), ]$old, - 1, - 8 - ), - sep = "_" - ) -} - -# CliqueSNV haplotype sequence rename -# e.g. SAMPLE_##_... -> SAMPLE_haplotype_## -if (sum(grepl("^.+_[0-9]+_.*", name_changes$old)) > 0) { - name_changes[grepl("^.+_[0-9]+_.*", name_changes$old), ]$new <- - paste( - sample_name, - "haplotype", - sapply( - strsplit( - name_changes[grepl( - "^.+_[0-9]+_.*", - name_changes$old - ), ]$old, - "_" - ), - "[[", - 2 - ), - sep = "_" - ) -} - -rename.fasta(fasta_in, name_changes, fasta_out) diff --git a/bin/renamerefseqs b/bin/renamerefseqs deleted file mode 100755 index 64dce96c..00000000 --- a/bin/renamerefseqs +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/env Rscript - -# Import libraries -library(phylotools) - -# Read in the cli arguments -args <- commandArgs(trailingOnly = TRUE) -fasta_in <- args[1] -genomes_file <- args[2] -fasta_out <- args[3] - -# Remove the descriptions from the fasta records -fasta <- read.fasta(fasta_in) -fasta[1]$seq.name <- sapply(strsplit(fasta[1]$seq.name, " "), "[[", 1) -dat2fasta(fasta, outfile = "cleaned.fasta") - -# Read in the genome list -genome_table <- read.delim(genomes_file, header = FALSE, row.names = NULL) - -# Swap the order of the accession numbers and strain names -genome_table$old <- genome_table$V2 -genome_table$new <- genome_table$V1 -genome_table$V1 <- NULL # nolint -genome_table$V2 <- NULL # nolint - -# Remove the descriptor from the tree root -if (sum(startsWith(genome_table$new, "ROOT") > 0)) { - genome_table[startsWith(genome_table$new, "ROOT"), ]$new <- "ROOT" -} - -rename.fasta( - infile = "cleaned.fasta", - ref_table = genome_table, - outfile = fasta_out -) diff --git a/bin/sequence-table.jl b/bin/sequence-table.jl deleted file mode 100755 index 5e91dbd9..00000000 --- a/bin/sequence-table.jl +++ /dev/null @@ -1,322 +0,0 @@ -#!/usr/bin/env julia -using ArgParse -using BioAlignments -using BioSequences -using CSV -using DataFrames -using EzXML -using FASTX -using JSON3 -using Kelpie -using XAM - -include("jl-lib/structs.jl") -include("jl-lib/functions.jl") -include("jl-lib/arguments.jl") -include("jl-lib/static-divs.jl") - -const BIN_DIR = dirname(abspath(PROGRAM_FILE)) -const LIB_DIR = joinpath(BIN_DIR, "jl-lib") -const STYLESHEET = joinpath(LIB_DIR, "yavsap.css") -const PHYLOTREE_UTILS = joinpath(LIB_DIR, "phylotree-utils.js") -const PHYLOTREE_COLOR = joinpath(LIB_DIR, "phylotree-colorizer.js") - -args = parse_arguments() - -strains_file = args["strainstable"] -aligned_file = args["alignments"] -reference_file = args["reference"] -include_multiqc = !args["no-multiqc"] -include_krona = !args["no-krona"] -include_tree = !isnothing(args["newick"]) -tree_contents = nothing - -reference_reader = open(FASTA.Reader, reference_file) -reference_records = collect(reference_reader) -close(reference_reader) - -if length(reference_records) > 1 - @warn "More than one genome (chromosome) found in $reference_file. Only the first will be used" -end #if - -reference_name = FASTA.identifier(first(reference_records)) -reference_sequence = FASTA.sequence(first(reference_records)) - -strains_table = CSV.read( - strains_file, - DataFrame; - header=["sample", "accession", "strain", "haplotype", "frequency"], -) - -alignment_reader = open(SAM.Reader, aligned_file) -alignment_records = collect(alignment_reader) -close(alignment_reader) - -if include_tree - tree_contents = first(readlines(args["newick"])) -end #if - -report_sections = cat( - [ - ReportSection("summary", "Summary", "fas fa-list-ul"), - if include_multiqc - ReportSection( - "multiqc", "Read Quality and Trimming", "fas fa-magnifying-glass" - ) - else - [] - end, - if include_krona - ReportSection("krona", "Read Classification (Kraken)", "fas fa-chart-pie") - else - [] - end, - ReportSection("igv", "Alignments", "fas fa-bars-staggered"), - if include_tree - ReportSection("phylogenetics", "Phylogenetics", "fas fa-code-branch") - else - [] - end, - ReportSection("nextflow", "Nextflow Report", "fas fa-shuffle"), - ]...; - dims=1, -) - -custom_style_sheet = style(read(STYLESHEET, String)) -phylotree_utils_script = read(PHYLOTREE_UTILS, String) -phylotree_colorizer_script = read(PHYLOTREE_COLOR, String) - -navbar = nav( - a( - i(""; class="fab fa-bootstrap"); - href="#top", - class="d-block p-3 link-dark text-decoration-none text-white text-center border-bottom", - title="YAVSAP Report", - data_bs_toggle="tooltip", - data_bs_placement="right", - ), - ul( - [ - li( - a( - i(""; class=report_section.fontawesome); - href="#$(report_section.href)", - class="nav-link py-3 border-bottom text-light", - title=report_section.title, - data_bs_toggle="tooltip", - data_bs_placement="right", - ); - class="nav-item", - ) for report_section in report_sections - ]...; - class="nav nav-pills nav-flush flex-column mb-auto text-center", - ), - html_div( - a( - span( - i(""; class="fas fa-circle-question"); - data_bs_toggle="modal", - data_bs_target="#help-dialog", - ); - href="#", - class="d-block p-3 link-light text-decoration-none text-light text-center", - title="Help", - data_bs_toggle="tooltip", - data_bs_placement="right", - ); - class="dropdown border-top", - ); - class="d-flex flex-column flex-shrink-0 bg-dark", - style="width: 4.5rem", -) - -genome_table = html_div( - table( - thead( - tr( - th("Sample"; colspan=3), - th("Genotype"; colspan=2), - th("Sequence"; colspan=4), - [td(string(i); colspan=5) for i in 5:5:length(reference_sequence)]..., - ), - ), - tbody( - tr( - th("Reference"; colspan=4), - td( - a( - reference_name; - href="https://ncbi.nlm.nih.gov/nuccore/$reference_name", - ), - ), - [td(base; class=base) for base in reference_sequence]..., - ), - cat( - [ - sample_rows(s, reference_sequence, alignment_records, strains_table) for - s in unique(strains_table.sample) - ]...; - dims=1, - )..., - ); - id="genome-view", - class="table", - ); - id="genome-wrapper", -) - -igv_options = Dict( - "reference" => Dict( - "id" => "reference", - "fastaURL" => "reference.fasta", - "indexURL" => "reference.fasta.fai", - ), - "tracks" => [ - Dict( - "type" => "alignment", - "format" => "bam", - "url" => "$samplename.bam", - "indexURL" => "$samplename.bam.bai", - "name" => samplename, - ) for samplename in unique(strains_table.sample) - ], -) - -phylogenetic_scripts = [] -if include_tree - phylogenetic_scripts = [ - script( - ""; - src="https://cdn.jsdelivr.net/npm/d3@5.16.0/dist/d3.min.js", - integrity="sha256-Xb6SSzhH3wEPC4Vy3W70Lqh9Y3Du/3KxPqI2JHQSpTw=", - crossorigin="anonymous", - ), - script( - ""; - src="https://cdn.jsdelivr.net/npm/underscore@1.13.2/underscore-umd.min.js", - integrity="sha384-6URC9+r9R/tql/uTNEHRNRXFG53gIbvGSowjGwSBcHeQJfuL3QdHF+NsSgWlzqsr", - crossorigin="anonymous", - ), - script( - ""; - src="https://cdn.jsdelivr.net/npm/phylotree@1.0.13/dist/phylotree.min.js", - integrity="sha256-y6vYUVnYZ4w+6dZofvpSL2ItmXVGRiN56p5bovmu0Bw=", - crossorigin="anonymous", - ), - script(""" - $phylotree_utils_script - newick = "$tree_contents"; - $phylotree_colorizer_script - """), - ] -end #if - -middle_sections = [] -if include_multiqc - push!( - middle_sections, - section( - iframe(""; src="multiqc_report.html", class="iframe-content"); - id="multiqc", - class="min-vh-100", - ), - ) -end -if include_krona - push!( - middle_sections, - section( - iframe(""; src="krona.html", class="iframe-content"); - id="krona", - class="min-vh-100", - ), - ) -end - -EzXML.prettyprint( - html( - head( - meta(; charset="utf8"), - meta(; name="viewport", content="width=device-width, initial-scale=1"), - link(; - href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css", - rel="stylesheet", - integrity="sha384-1BmE4kWBq78iYhFldvKuhfTAU6auU8tT94WrHftjDbrCEXSU1oBoqyl2QvZ6jIW3", - crossorigin="anonymous", - ), - link(; - href="https://cdn.jsdelivr.net/npm/phylotree@1.0.13/dist/phylotree.css", - rel="stylesheet", - integrity="sha256-OMGYTWHSt3pK8AhnFhc18bkINhqrWz0+srfAkIi1Jdg=", - crossorigin="anonymous", - ), - custom_style_sheet, - title("YAVSAP Results"), - ), - body( - help_modal, - html_div( - navbar, - main( - header(h1("YAVSAP Results"); id="top", class="container py-3"), - section( - h2("Summary"), - genome_table; - id="summary", - class="container min-vh-100", - ), - middle_sections..., - section(""; id="igv", class="min-vh-100"), - include_tree ? phylogenetic_section : nothing, - section( - iframe(""; src="nextflow_report.html", class="iframe-content"); - id="nextflow", - class="min-vh-100", - ); - style="overflow-y: scroll", - ); - class="d-flex vh-100", - ), - script(""; src="https://cdn.jsdelivr.net/npm/jquery@3.2.1/dist/jquery.min.js"), - script( - ""; - src="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/js/bootstrap.bundle.min.js", - ), - script( - ""; - src="https://cdn.jsdelivr.net/npm/jquery-freeze-table@1.3.0/dist/js/freeze-table.min.js", - ), - script(""; src="https://kit.fontawesome.com/8f147eccd6.js"), - script( - ""; - src="https://cdn.jsdelivr.net/npm/igv@2.11.0/dist/igv.min.js", - integrity="sha256-sr6GZtbybttUnYJHVKTjxA/aj9zru7lgZnRUOV3o7Gc=", - crossorigin="anonymous", - ), - script(""" - var tooltipTriggerList = [].slice.call( - document.querySelectorAll('[data-bs-toggle="tooltip"]') - ); - var tooltipList = tooltipTriggerList.map(function (tooltipTriggerEl) { - return new bootstrap.Tooltip(tooltipTriggerEl); - }); - """), - script(""" - \$("#genome-wrapper").freezeTable({ - columnNum: 2, - scrollable: true, - shadow: true, - }); - """), - script(""" - options = $(JSON3.write(igv_options)); - igv - .createBrowser(document.getElementById("igv"), options) - .then(function (browser) { - igv.browser = browser; - }); - """), - phylogenetic_scripts..., - ), - ), -) diff --git a/bin/sequencetable b/bin/sequencetable new file mode 100755 index 00000000..4b0d5d3c --- /dev/null +++ b/bin/sequencetable @@ -0,0 +1,124 @@ +#!/usr/bin/env julia +using EzXML +using FASTX +using Kelpie +using Mustache +using YAML + +haplotype_yaml = popfirst!(ARGS) +reference_fasta = popfirst!(ARGS) +template_file = popfirst!(ARGS) +tool_meta_yaml = popfirst!(ARGS) +freeze_script = popfirst!(ARGS) +output_file = popfirst!(ARGS) + +function match_cols(query, reference) + cols = [] + for (seq, ref) in zip(query, reference) + classes = [string(seq)] + if seq != ref + push!(classes, "variant") + end #43 + push!(cols, td(seq; class=join(classes, " "))) + end #for + + return cols +end #function + +function sample_rows(sample, reference) + sample_name = first(sample) + sample_data = last(sample) + strain_name = sample_data["strain_name"] + strain_ncbi = sample_data["strain_ncbi"] + consensus_sequence = sample_data["consensus_sequence"] + haplotypes = sample_data["haplotypes"] + + ncbi_link = "https://ncib.nlm.nih.gov/nuccore/$strain_ncbi" + num_haplotypes = length(haplotypes) + + consensus_row = tr( + th(sample_name; rowspan=(num_haplotypes + 1)), + td("Consensus"; colspan=2), + td(strain_name), + td(a(strain_ncbi; href=ncbi_link)), + match_cols(consensus_sequence, reference)..., + ) + + haplotype_rows = [] + + for haplotype in haplotypes + haplotype_name = first(first(haplotype)) + haplotype_data = last(first(haplotype)) + haplotype_frequency = haplotype_data["frequency"] + haplotype_sequence = haplotype_data["sequence"] + + push!( + haplotype_rows, + tr( + td(haplotype_name), + td(em("$(round(haplotype_frequency * 100))%"); colspan=3), + match_cols(haplotype_sequence, reference)..., + ), + ) + end #for + + return [consensus_row, haplotype_rows...] +end #function + +# Read in the first (and hopefully only) record from the reference file +reference_reader = FASTA.Reader(open(reference_fasta, "r")) +reference_record = first(reference_reader) +if !isempty(reference_reader) + @warn "Reference $reference_fasta contains more than one sequence. Only the first will be used." +end #if +close(reference_reader) +reference_name = FASTA.identifier(reference_record) +reference_sequence = string(FASTA.sequence(reference_record)) + +# Get the header +header_row = tr( + th("Sample"; colspan=3), + th("Genotype"; colspan=2), + th("Sequence"; colspan=4), + [td(i; colspan=5) for i in 5:5:length(reference_sequence)]..., +) + +# Get the reference sequence row +reference_row = tr( + th("Reference"; colspan=4), + td(a(reference_name; href="https://ncib.nlm.nih.gov/nuccore/$reference_name")), + [td(base; class=base) for base in reference_sequence]..., +) + +# Get rows for each sample +other_rows = [] +for sample in YAML.load_file(haplotype_yaml) + push!(other_rows, sample_rows(sample, reference_sequence)...) +end #for + +# Plug it all into one big table +genome_table = table(thead(header_row), tbody(reference_row, other_rows...); class="table") + +# Get the table as xml +io = IOBuffer() +print(io, genome_table) +table_xml = String(take!(io)) + +# Read in the tool metadata +tool_metadata = YAML.load_file(tool_meta_yaml) + +# Create the mustache table +mustache_table = Dict( + "kelpieseqtable" => table_xml, + "jqueryfreezetable" => String(read(freeze_script)), + "toolname" => tool_metadata["name"], + "toolnamelower" => lowercase(tool_metadata["name"]), + "tooldescription" => tool_metadata["description"], + "toollink" => tool_metadata["url"], + "doi" => tool_metadata["doi"], +) + +# Output a rendered file +open(output_file, "w") do f + write(f, render_from_file(template_file, mustache_table)) +end #do diff --git a/conf/base.config b/conf/base.config index b3e85169..058620ba 100644 --- a/conf/base.config +++ b/conf/base.config @@ -1,6 +1,6 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - YAVSAP Nextflow base config file + yavsap Nextflow base config file ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ A 'blank slate' config file, appropriate for general use on most high performance compute environments. Assumes that all software is installed and available on @@ -8,42 +8,7 @@ ---------------------------------------------------------------------------------------- */ -// Function to ensure that resource requirements don't go beyond -// a maximum limit -// (Taken from nextflow.config of the nf-core template) -def check_max(obj, type) { - if (type == 'memory') { - try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'time') { - try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'cpus') { - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj - } - } -} - process { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } @@ -53,23 +18,27 @@ process { maxErrors = '-1' // Process-specific resource requirements + // NOTE - Please try and re-use the labels below as much as possible. + // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. + // If possible, it would be nice to keep the same label naming convention when + // adding in your local modules too. withLabel:process_low { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 12.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } } withLabel:process_medium { - cpus = { check_max( 16 * task.attempt, 'cpus' ) } - memory = { check_max( 36.GB * task.attempt, 'memory' ) } - time = { check_max( 24.h * task.attempt, 'time' ) } + cpus = { check_max( 6 * task.attempt, 'cpus' ) } + memory = { check_max( 36.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } } withLabel:process_high { - cpus = { check_max( 32 * task.attempt, 'cpus' ) } - memory = { check_max( 72.GB * task.attempt, 'memory' ) } - time = { check_max( 7.d * task.attempt, 'time' ) } + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + memory = { check_max( 72.GB * task.attempt, 'memory' ) } + time = { check_max( 16.h * task.attempt, 'time' ) } } withLabel:process_long { - time = { check_max( 14.d * task.attempt, 'time' ) } + time = { check_max( 20.h * task.attempt, 'time' ) } } withLabel:process_high_memory { memory = { check_max( 200.GB * task.attempt, 'memory' ) } @@ -84,4 +53,9 @@ process { withName:CUSTOM_DUMPSOFTWAREVERSIONS { cache = false } + // Run process on login node + // Useful for HPCs where the compute nodes don't have internet access + withLabel: run_local { + executor = 'local' + } } diff --git a/conf/containers.config b/conf/containers.config deleted file mode 100644 index 44ab73f2..00000000 --- a/conf/containers.config +++ /dev/null @@ -1,70 +0,0 @@ -// Per-process container environments -// Listed alphabetical, by the tool suite's name -process { - withLabel: blast { - container = 'docker.io/ncbi/blast:2.11.0' - } - withLabel: canu { - container = 'quay.io/staphb/canu:2.1.1' - } - withLabel: cliquesnv { - container = 'quay.io/biocontainers/cliquesnv:2.0.2--hdfd78af_0' - } - withLabel: fastqc { - container = 'quay.io/staphb/fastqc:0.11.9' - } - withLabel: haplink { - container = 'quay.io/millironx/haplink:0.5.1' - } - withLabel: ivar { - container = 'quay.io/staphb/ivar:1.3' - } - withLabel: kraken { - container = 'quay.io/staphb/kraken2:2.1.2-no-db' - } - withLabel: krakentools { - container = 'quay.io/biocontainers/krakentools:1.2--pyh5e36f6f_0' - } - withLabel: mafft { - container = 'quay.io/staphb/mafft:7.475' - } - withLabel: minimap { - container = 'docker.io/niemasd/minimap2_samtools:2.22_1.12' - } - withLabel: multiqc { - container = 'docker.io/ewels/multiqc:v1.12' - } - withLabel: nanofilt { - container = 'quay.io/biocontainers/nanofilt:2.8.0--py_0' - } - withLabel: parallelzip { - container = 'quay.io/millironx/parallelzip:latest' - } - withLabel: pbsim { - container = 'docker.io/metagenlab/pbsim2:2.0.1' - } - withLabel: nanostat { - conda = 'bioconda::nanostat=1.4' - } - withLabel: raxml { - container = 'docker.io/evolbioinfo/raxml-ng:v1.0.3' - } - withLabel: samtools { - container = 'quay.io/staphb/samtools:1.12' - } - withLabel: seqkit { - container = 'quay.io/biocontainers/seqkit:2.1.0--h9ee0642_0' - } - withLabel: seqtk { - container = 'quay.io/staphb/seqtk:1.3' - } - withLabel: seqret { - container = 'quay.io/biocontainers/emboss:6.6.0--h8719169_4' - } - withLabel: spades { - container = 'quay.io/staphb/spades:3.15.2' - } - withLabel: trimmomatic { - container = 'quay.io/staphb/trimmomatic:0.39' - } -} diff --git a/conf/environment.config b/conf/environment.config deleted file mode 100644 index 50ae3d20..00000000 --- a/conf/environment.config +++ /dev/null @@ -1,10 +0,0 @@ -// Export these variables to prevent local Python/R libraries from conflicting with those in the container -// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. -// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. - -env { - PYTHONNOUSERSITE = 1 - R_PROFILE_USER = "/.Rprofile" - R_ENVIRON_USER = "/.Renviron" - JULIA_DEPOT_PATH = "/usr/local/share/julia" -} diff --git a/conf/modules.config b/conf/modules.config index ecf27ab5..e06cbec1 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1,3 +1,14 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ import static java.lang.Math.sqrt import static java.lang.Math.round @@ -17,9 +28,58 @@ if (kraken2_db.isDirectory()) { } } -if (platform == 'illumina' && !params.skip_haplotype) { +if (platform == 'illumina') { process { - withName: 'HAPLOTYPING:JSON2YAML' { + // Trimming: argument propogation + withName: 'TRIMMOMATIC' { + ext.clipFlag = (params.trim_adapters && params.trim_mismatches && params.trim_pclip && params.trim_clip) ? "ILLUMINACLIP:/usr/local/share/trimmomatic-0.39-2/adapters/${params.trim_adapters}:${params.trim_mismatches}:${params.trim_pclip}:${params.trim_clip}" : '' + ext.winFlag = (params.trim_winsize && params.trim_winqual) ? "SLIDINGWINDOW:${params.trim_winsize}:${params.trim_winqual}" : '' + ext.leadFlag = (params.trim_leading) ? "LEADING:${params.trim_leading}" : '' + ext.trailFlag = (params.trim_trailing) ? "TRAILING:${params.trim_trailing}" : '' + ext.cropFlag = (params.trim_crop) ? "CROP:${params.trim_crop}" : '' + ext.headFlag = (params.trim_headcrop) ? "HEADCROP:${params.trim_headcrop}" : '' + ext.minlenFlag = (params.trim_minlen) ? "MINLEN:${params.trim_minlen}" : '' + ext.args = [ext.clipFlag, ext.winFlag, ext.leadFlag, ext.trailFlag, ext.cropFlag, ext.headFlag, ext.minlenFlag].join(' ') + } + + // CliqueSNV parameter propogation + withName: 'CLIQUESNV_.*' { + ext.args = """\ + -t ${params.haplotype_depth} \\ + -tf ${params.haplotype_frequency} \\ + -cm ${params.cliquesnv_method} \\ + -fdf extended \\ + -log \\ + """ + } + + // CliqueSNV consensus sequence output + withName: 'CLIQUESNV_CONSENSUSILLUMINA' { + publishDir = [ + path: { "${params.outdir}/consensus" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + // CliqueSNV variant output + withName: 'CLIQUESNV_ILLUMINAVC' { + publishDir = [ + path: { "${params.outdir}/variants" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + // CliqueSNV haplotype output + withName: 'CLIQUESNV_ILLUMINA' { + publishDir = [ + path: { "${params.outdir}/haplotypes" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.endsWith('fasta') ? filename : null } + ] + } + withName: 'JSON2YAML' { publishDir = [ path: { "${params.outdir}/haplotypes" }, mode: params.publish_dir_mode, @@ -29,6 +89,77 @@ if (platform == 'illumina' && !params.skip_haplotype) { } } +if (platform == 'nanopore') { + process { + withName: 'NANOFILT' { + ext.minlenflag = ( params.trim_minlen > 0 ) ? "--length ${params.trim_minlen}" : '' + ext.maxlenflag = ( params.trim_maxlen > 0 ) ? "--maxlength ${params.trim_maxlen}" : '' + ext.qualflag = ( params.trim_meanqual > 0 ) ? "--quality ${params.trim_meanqual}" : '' + ext.mingcflag = ( params.trim_mingc > 0 ) ? "--minGC ${params.trim_mingc}" : '' + ext.maxgcflag = ( params.trim_maxgc > 0 ) ? "--maxGC ${params.trim_maxgc}" : '' + ext.headflag = ( params.trim_headcrop > 0 ) ? "--headcrop ${params.trim_headcrop}" : '' + ext.tailflag = ( params.trim_tailcrop > 0 ) ? "--tailcrop ${params.trim_tailcrop}" : '' + ext.args = [ext.minlenflag, ext.maxlenflag, ext.qualflag, ext.mingcflag, ext.maxgcflag, ext.headflag, ext.tailflag].join(' ') + } + + // HapLink.jl variant parameter propogation + withName: 'HAPLINK_VARIANTS' { + ext.args = """\ + --quality ${params.variant_quality} \\ + --frequency ${params.variant_frequency} \\ + --position ${params.variant_position} \\ + --significance ${params.variant_significance} \\ + --depth ${params.variant_depth} \\ + """ + } + + // HapLink.jl consensus sequence output + withName: 'HAPLINK_CONSENSUS' { + publishDir = [ + path: { "${params.outdir}/consensus" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + // HapLink.jl variant output + withName: '.*VARIANTS:HAPLINK_VARIANTS' { + publishDir = [ + path: { "${params.outdir}/variants" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + // HapLink.jl haplotype parameter propogation and output + withName: 'HAPLINK_HAPLOTYPES' { + ext.args = """\ + --significance ${params.haplotype_significance} \\ + --depth ${params.haplotype_depth} \\ + --method ${params.haplotype_method} \\ + --overlap-min ${params.haplotype_overlap_min} \\ + --overlap-max ${params.haplotype_overlap_max} \\ + --iterations ${params.haplotype_iterations} \\ + --seed ${params.seed} \\ + """ + publishDir = [ + path: { "${params.outdir}/haplotypes" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + // HapLink.jl haplotype sequence output + withName: 'HAPLINK_SEQUENCES' { + publishDir = [ + path: { "${params.outdir}/haplotypes" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } +} + process { // Deinterleaving parameter: take every other read withName: 'SEQKIT_SPLIT2' { @@ -36,7 +167,7 @@ process { } // Reference genome download output - withName: 'GENOME_DOWNLOAD:EDIRECT_EFETCH' { + withName: '.*REFERENCE_DOWNLOAD:EDIRECT_EFETCH' { publishDir = [ [ path: { "${params.outdir}/reference" }, @@ -51,7 +182,7 @@ process { ] } - withName: 'GENOME_DOWNLOAD:SAMTOOLS_FAIDX' { + withName: '.*REFERENCE_DOWNLOAD:SAMTOOLS_FAIDX' { publishDir = [ [ path: { "${params.outdir}/reference" }, @@ -66,29 +197,6 @@ process { ] } - - // Trimming: argument propogation - withName: 'TRIMMOMATIC' { - ext.clipFlag = (params.trim_adapters && params.trim_mismatches && params.trim_pclip && params.trim_clip) ? "ILLUMINACLIP:/usr/local/share/trimmomatic-0.39-2/adapters/${params.trim_adapters}:${params.trim_mismatches}:${params.trim_pclip}:${params.trim_clip}" : '' - ext.winFlag = (params.trim_winsize && params.trim_winqual) ? "SLIDINGWINDOW:${params.trim_winsize}:${params.trim_winqual}" : '' - ext.leadFlag = (params.trim_leading) ? "LEADING:${params.trim_leading}" : '' - ext.trailFlag = (params.trim_trailing) ? "TRAILING:${params.trim_trailing}" : '' - ext.cropFlag = (params.trim_crop) ? "CROP:${params.trim_crop}" : '' - ext.headFlag = (params.trim_headcrop) ? "HEADCROP:${params.trim_headcrop}" : '' - ext.minlenFlag = (params.trim_minlen) ? "MINLEN:${params.trim_minlen}" : '' - ext.args = [ext.clipFlag, ext.winFlag, ext.leadFlag, ext.trailFlag, ext.cropFlag, ext.headFlag, ext.minlenFlag].join(' ') - } - withName: 'NANOFILT' { - ext.minlenflag = ( params.trim_minlen > 0 ) ? "--length ${params.trim_minlen}" : '' - ext.maxlenflag = ( params.trim_maxlen > 0 ) ? "--maxlength ${params.trim_maxlen}" : '' - ext.qualflag = ( params.trim_meanqual > 0 ) ? "--quality ${params.trim_meanqual}" : '' - ext.mingcflag = ( params.trim_mingc > 0 ) ? "--minGC ${params.trim_mingc}" : '' - ext.maxgcflag = ( params.trim_maxgc > 0 ) ? "--maxGC ${params.trim_maxgc}" : '' - ext.headflag = ( params.trim_headcrop > 0 ) ? "--headcrop ${params.trim_headcrop}" : '' - ext.tailflag = ( params.trim_tailcrop > 0 ) ? "--tailcrop ${params.trim_tailcrop}" : '' - ext.args = [ext.minlenflag, ext.maxlenflag, ext.qualflag, ext.mingcflag, ext.maxgcflag, ext.headflag, ext.tailflag].join(' ') - } - // Kraken reports output withName: 'KRAKEN2' { publishDir = [ @@ -113,24 +221,32 @@ process { ] } - // General alignment settings: match sequencing platform, and output SAM format - withName: '.*ALIGNMENT:MINIMAP2_ALIGN' { + // General alignment settings: match sequencing platform + withName: 'MINIMAP2_.*ALIGN' { ext.args = """\ ${(platform == 'nanopore') ? '-x map-ont' : (platform == 'illumina') ? '-x sr' : ''} \\ - -a \\ --MD \\ --eqx \ """ } - // Visualizer alignment settings: output SAM - withName: 'PRESENTATION:MINIMAP2_ALIGN' { - ext.args = '-a' - } - // First alignment output - withName: 'ALIGNMENT:SAMTOOLS_.*' { + withName: 'MINIMAP2_ALIGN' { + publishDir = [ + [ + path: { "${params.outdir}/alignment" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ], + [ + path: { "${params.outdir}/report" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ] + } + withName: 'SAMTOOLS_INDEX' { publishDir = [ [ path: { "${params.outdir}/alignment" }, @@ -146,22 +262,18 @@ process { } // Realignment output - withName: 'CLOSEST_REFERENCE:CUSTOM_ALIGNMENT:SAMTOOLS_.*' { + withName: 'MINIMAP2_REALIGN' { publishDir = [ path: { "${params.outdir}/realignment" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - - // Consensus sequences: match variant calling params and output - withName: 'IVAR_CONSENSUS' { - ext.args = "-q ${params.variant_quality} -m ${params.variant_depth}" - ext.args2 = "-aa -A -d 0 -Q 0" + withName: 'SAMTOOLS_REINDEX' { publishDir = [ - path: { "${params.outdir}/consensus" }, + path: { "${params.outdir}/realignment" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.endsWith('fa') ? "${filename}sta" : null } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } @@ -173,78 +285,6 @@ process { ext.args = '-num_alignments 1 -outfmt "6 saccver"' } - // CliqueSNV parameter propogation - withName: 'CLIQUESNV_.*' { - ext.args = """\ - -t ${params.haplotype_depth} \\ - -tf ${params.haplotype_frequency} \\ - -cm ${params.cliquesnv_method} \\ - -fdf extended \\ - -log \\ - """ - } - - // CliqueSNV variant output - withName: 'CLIQUESNV_ILLUMINAVC' { - publishDir = [ - path: { "${params.outdir}/variants" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - // CliqueSNV haplotype output - withName: 'CLIQUESNV_ILLUMINA' { - publishDir = [ - path: { "${params.outdir}/haplotypes" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.endsWith('fasta') ? filename : null } - ] - } - - // HapLink.jl variant parameter propgation and output - withName: 'HAPLINK_VARIANTS' { - ext.args = """\ - --quality ${params.variant_quality} \\ - --frequency ${params.variant_frequency} \\ - --position ${params.variant_position} \\ - --significance ${params.variant_significance} \\ - --depth ${params.variant_depth} \\ - """ - publishDir = [ - path: { "${params.outdir}/variants" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - // HapLink.jl haplotype parameter propogation and output - withName: 'HAPLINK_HAPLOTYPES' { - ext.args = """\ - --significance ${params.haplotype_significance} \\ - --depth ${params.haplotype_depth} \\ - --method ${params.haplotype_method} \\ - --overlap-min ${params.haplotype_overlap_min} \\ - --overlap-max ${params.haplotype_overlap_max} \\ - --iterations ${params.haplotype_iterations} \\ - --seed ${params.seed} \\ - """ - publishDir = [ - path: { "${params.outdir}/haplotypes" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - // HapLink.jl haplotype sequence output - withName: 'HAPLINK_SEQUENCES' { - publishDir = [ - path: { "${params.outdir}/haplotypes" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // MAFFT required parameter and output withName: 'MAFFT' { ext.args = "${params.mafft_method}" @@ -279,16 +319,15 @@ process { publishDir = [ path: { "${params.outdir}/report" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('multiqc_report.html') ? filename : null } + saveAs: { filename -> filename.equals('multiqc_report.html') ? 'index.html' : null } ] } - // YAVSAP report - withName: 'SEQUENCETABLE' { + withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ - path: { "${params.outdir}/report" }, + path: { "${params.outdir}/pipeline_info" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : 'index.html' } + pattern: '*_versions.yml' ] } } diff --git a/conf/parameters.config b/conf/parameters.config deleted file mode 100644 index 3b01f577..00000000 --- a/conf/parameters.config +++ /dev/null @@ -1,89 +0,0 @@ -// Global default parameters -params { - - // Input options - input = '.' - sra = false - platform = null - paired = (params.platform == 'illumina') - interleaved = false - samplesheet = null - seed = 42 - - // Reference genome options - genome = 'NC_001437.1' // JEV RefSeq - genome_list = 'jev' - - // Kraken 2 options - kraken2_db = null - keep_taxid = '0 10239' // 0 = Unclassified, 10239 = Viral - - // Read trimming options (shared) - trim_minlen = 100 - trim_headcrop = 0 - - // Illumina trimming options (Trimmomatic) - trim_adapters = 'NexteraPE-PE.fa' - trim_mismatches = 2 - trim_pclip = 30 - trim_clip = 10 - trim_winsize = 50 - trim_winqual = 15 - trim_leading = 15 - trim_trailing = 15 - trim_crop = 0 - - // ONT-specific trimming options (Filtlong) - trim_maxlen = 0 - trim_meanqual = 7 - trim_mingc = 0 - trim_maxgc = 0 - trim_tailcrop = 0 - - // Variant calling options - variant_quality = 12 - variant_depth = 10 - variant_position = 0.1 - variant_frequency = 0.05 - variant_significance = 1e-3 - - // Haplotyping options - haplotype_significance = 0.05 - haplotype_depth = 10 - haplotype_frequency = 0.05 - haplotype_method = 'ml-template' - haplotype_overlap_min = 0 - haplotype_overlap_max = 100 - haplotype_iterations = 1000 - cliquesnv_method = 'accurate' - - // Phylogenetics options - mafft_method = '--auto' - phylogenetic_bootstraps = 1000 - phylogenetic_bootstrap_cutoff = 0.03 - phylogenetic_model = 'GTR+G' - - // Boilerplate options - outdir = './results' - tracedir = "${params.outdir}/.trace" - publish_dir_mode = 'copy' - help = false - show_hidden_params = false - schema_ignore_params = false - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - enable_conda = false - - // Max resource options - // Defaults only, expecting to be overwritten - max_memory = '128.GB' - max_cpus = 32 - max_time = '240.h' - - // Step-skipping options - skip_trimming = false - skip_qc = false - skip_filtering = false - skip_haplotype = false - skip_phylogenetics = false -} diff --git a/conf/profiles.config b/conf/profiles.config deleted file mode 100644 index f32383f6..00000000 --- a/conf/profiles.config +++ /dev/null @@ -1,75 +0,0 @@ -// Configuration profiles to allow use of different container environments. Uses -// similar nomenclature and code as nf-core's pipelines - -// Load nf-core custom profiles from different Institutions -try { - includeConfig "${params.custom_config_base}/nfcore_custom.config" -} catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") -} - -profiles { - debug { process.beforeScript = 'echo $HOSTNAME' } - conda { - params.enable_conda = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - envWhitelist = 'NCBI_API_KEY' - } - docker { - docker.enabled = true - docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - envWhitelist = 'NCBI_API_KEY' - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - envWhitelist = 'NCBI_API_KEY' - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - envWhitelist = 'NCBI_API_KEY' - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - envWhitelist = 'NCBI_API_KEY' - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - envWhitelist = 'NCBI_API_KEY' - } - gh { - params { - max_cpus = 2 - max_memory = 6.GB - max_time = 6.h - } - } - test { includeConfig 'test.config' } // Illumina PE Deinterleaved - test_interleaved { includeConfig 'test_interleaved.config' } // Illumina PE Interleaved - test_se { includeConfig 'test_se.config' } // Illumina SE - test_nanopore { includeConfig 'test_nanopore.config' } // Nanopore -} diff --git a/conf/resources.config b/conf/resources.config deleted file mode 100644 index 361d9fba..00000000 --- a/conf/resources.config +++ /dev/null @@ -1,20 +0,0 @@ -// Per-process resource allocation configuration. Most elements are taken from the -// default nf-core template, but are heavily tweaked to ensure they work with the -// applications in this pipeline - -includeConfig './base.config' - -// Process allocation limits -process { - - withLabel: error_backoff { - errorStrategy = { sleep(Math.pow(2, task.attempt) * 500 as long); return 'retry' } - maxRetries = 4 - } - - // Run process on login node - // Useful for HPCs where the compute nodes don't have internet access - withLabel: run_local { - executor = 'local' - } -} diff --git a/conf/test.config b/conf/test.config index a9c45348..57f7630c 100644 --- a/conf/test.config +++ b/conf/test.config @@ -1,12 +1,27 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run ksumngs/yavsap -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ params { - input = 'https://github.com/ksumngs/nf-test-datasets/raw/yavsap/samplesheet/illumina-pe-deinterleaved.tsv' - platform = 'illumina' - paired = true - interleaved = false + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' - includeConfig 'test_params_common.config' + input = 'https://github.com/ksumngs/nf-test-datasets/raw/yavsap/samplesheet/illumina-pe-deinterleaved.tsv' + platform = 'illumina' + paired = true + interleaved = false + kraken2_db = 'https://genome-idx.s3.amazonaws.com/kraken/k2_viral_20210517.tar.gz' + cliquesnv_method = 'fast' + mafft_method = '--retree 1 --maxiterate 0' + phylogenetic_bootstraps = 10 } -process { - includeConfig 'test_process_common.config' -} +tower.enabled = false +process.errorStrategy = 'terminate' diff --git a/conf/test_full.config b/conf/test_full.config new file mode 100644 index 00000000..644737f9 --- /dev/null +++ b/conf/test_full.config @@ -0,0 +1,24 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running full-size tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/yavsap -profile test_full, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Full test profile' + config_profile_description = 'Full test dataset to check pipeline function' + + // Input data for full size test + // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) + // TODO nf-core: Give any required params for the test so that command line flags are not needed + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' + + // Genome references + genome = 'R64-1-1' +} diff --git a/conf/test_interleaved.config b/conf/test_interleaved.config index d98429ac..e3bf7ce4 100644 --- a/conf/test_interleaved.config +++ b/conf/test_interleaved.config @@ -1,12 +1,16 @@ params { + config_profile_name = 'Test profile (Interleaved)' + config_profile_description = 'Minimal test dataset to check pipeline function when processing interleaved Illumina reads' + input = 'https://github.com/ksumngs/nf-test-datasets/raw/yavsap/samplesheet/illumina-pe-interleaved.tsv' platform = 'illumina' paired = true interleaved = true - - includeConfig 'test_params_common.config' + kraken2_db = 'https://genome-idx.s3.amazonaws.com/kraken/k2_viral_20210517.tar.gz' + cliquesnv_method = 'fast' + mafft_method = '--retree 1 --maxiterate 0' + phylogenetic_bootstraps = 10 } -process { - includeConfig 'test_process_common.config' -} +tower.enabled = false +process.errorStrategy = 'terminate' diff --git a/conf/test_nanopore.config b/conf/test_nanopore.config index 72b55d16..160e86c9 100644 --- a/conf/test_nanopore.config +++ b/conf/test_nanopore.config @@ -1,16 +1,19 @@ params { - input = 'https://raw.githubusercontent.com/ksumngs/nf-test-datasets/yavsap/samplesheet/nanopore.tsv' - platform = 'nanopore' - paired = false - interleaved = false + config_profile_name = 'Test profile (Nanopore)' + config_profile_description = 'Minimal Oxford Nanopore test dataset to check pipeline function' + input = 'https://raw.githubusercontent.com/ksumngs/nf-test-datasets/yavsap/samplesheet/nanopore.tsv' + outdir = 'results-nanopore' + platform = 'nanopore' + paired = false + interleaved = false + kraken2_db = 'https://genome-idx.s3.amazonaws.com/kraken/k2_viral_20210517.tar.gz' variant_depth = 7 variant_frequency = 0.001 variant_significance = 0.25 - - includeConfig 'test_params_common.config' + mafft_method = '--retree 1 --maxiterate 0' + phylogenetic_bootstraps = 10 } -process { - includeConfig 'test_process_common.config' -} +tower.enabled = false +process.errorStrategy = 'terminate' diff --git a/conf/test_params_common.config b/conf/test_params_common.config deleted file mode 100644 index 9cfeb16d..00000000 --- a/conf/test_params_common.config +++ /dev/null @@ -1,4 +0,0 @@ -kraken2_db = 'https://genome-idx.s3.amazonaws.com/kraken/k2_viral_20210517.tar.gz' -cliquesnv_method = 'fast' -mafft_method = '--retree 1 --maxiterate 0' -phylogenetic_bootstraps = 10 diff --git a/conf/test_process_common.config b/conf/test_process_common.config deleted file mode 100644 index 11f26213..00000000 --- a/conf/test_process_common.config +++ /dev/null @@ -1 +0,0 @@ -errorStrategy = 'terminate' diff --git a/conf/test_se.config b/conf/test_se.config index 074a70e6..d9052439 100644 --- a/conf/test_se.config +++ b/conf/test_se.config @@ -1,12 +1,16 @@ params { + config_profile_name = 'Test profile (Single-end)' + config_profile_description = 'Minimal test dataset to check pipeline function when processing single-end Illumina reads' + input = 'https://github.com/ksumngs/nf-test-datasets/raw/yavsap/samplesheet/illumina-se.tsv' platform = 'illumina' paired = false interleaved = false - - includeConfig 'test_params_common.config' + kraken2_db = 'https://genome-idx.s3.amazonaws.com/kraken/k2_viral_20210517.tar.gz' + cliquesnv_method = 'fast' + mafft_method = '--retree 1 --maxiterate 0' + phylogenetic_bootstraps = 10 } -process { - includeConfig 'test_process_common.config' -} +tower.enabled = false +process.errorStrategy = 'terminate' diff --git a/conf/trace.config b/conf/trace.config deleted file mode 100644 index 8f88c368..00000000 --- a/conf/trace.config +++ /dev/null @@ -1,20 +0,0 @@ - -// Enable smart process tracing -def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') -timeline { - enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" -} -report { - enabled = true - // file = "${params.tracedir}/execution_report_${trace_timestamp}.html" - file = "${params.outdir}/report/nextflow_report.html" -} -trace { - enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" -} -dag { - enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.svg" -} diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 00000000..b27f2fd6 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,8 @@ +# yavsap: Documentation + +The yavsap documentation is split into the following pages: + +- [Usage](usage.md) + - An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. +- [Output](output.md) + - An overview of the different results produced by the pipeline and how to interpret them. diff --git a/docs/conf.py b/docs/conf.py index 8c3dfc25..87c11d44 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -32,6 +32,11 @@ "myst_parser", ] +myst_enable_extensions = [ + "strikethrough", + "deflist", +] + # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] @@ -47,14 +52,14 @@ # a list of builtin themes. # html_theme = "sphinx_rtd_theme" +html_logo = "images/yavsap_logo.png" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ["_static"] +# html_static_path = ["_static"] intersphinx_mapping = { "nextflow": ("https://nextflow.io/docs/latest/", None), - "canu": ("https://canu.readthedocs.io/en/latest", None), "singularity": ("https://apptainer.org/user-docs/master", None), } diff --git a/docs/images/mqc_fastqc.png b/docs/images/mqc_fastqc.png new file mode 100644 index 00000000..64989782 Binary files /dev/null and b/docs/images/mqc_fastqc.png differ diff --git a/docs/images/mqc_gen_stats.png b/docs/images/mqc_gen_stats.png new file mode 100644 index 00000000..b06648ba Binary files /dev/null and b/docs/images/mqc_gen_stats.png differ diff --git a/docs/images/mqc_haplink.png b/docs/images/mqc_haplink.png new file mode 100644 index 00000000..60d6c487 Binary files /dev/null and b/docs/images/mqc_haplink.png differ diff --git a/docs/images/mqc_kraken.png b/docs/images/mqc_kraken.png new file mode 100644 index 00000000..11ceebba Binary files /dev/null and b/docs/images/mqc_kraken.png differ diff --git a/docs/images/mqc_minimap2.png b/docs/images/mqc_minimap2.png new file mode 100644 index 00000000..87bd86d6 Binary files /dev/null and b/docs/images/mqc_minimap2.png differ diff --git a/docs/images/mqc_nanostat.png b/docs/images/mqc_nanostat.png new file mode 100644 index 00000000..06a3684c Binary files /dev/null and b/docs/images/mqc_nanostat.png differ diff --git a/docs/images/mqc_raxmlng.png b/docs/images/mqc_raxmlng.png new file mode 100644 index 00000000..bd118482 Binary files /dev/null and b/docs/images/mqc_raxmlng.png differ diff --git a/docs/images/mqc_trimmomatic.png b/docs/images/mqc_trimmomatic.png new file mode 100644 index 00000000..f26dfa9a Binary files /dev/null and b/docs/images/mqc_trimmomatic.png differ diff --git a/docs/images/mqc_versions.png b/docs/images/mqc_versions.png new file mode 100644 index 00000000..2416e76f Binary files /dev/null and b/docs/images/mqc_versions.png differ diff --git a/docs/images/mqc_workflow.png b/docs/images/mqc_workflow.png new file mode 100644 index 00000000..af00ab7e Binary files /dev/null and b/docs/images/mqc_workflow.png differ diff --git a/docs/images/report_cd_linux.png b/docs/images/report_cd_linux.png new file mode 100644 index 00000000..b9f8ca34 Binary files /dev/null and b/docs/images/report_cd_linux.png differ diff --git a/docs/images/report_cd_windows.png b/docs/images/report_cd_windows.png new file mode 100644 index 00000000..91eb37f3 Binary files /dev/null and b/docs/images/report_cd_windows.png differ diff --git a/docs/images/report_copy_linux.png b/docs/images/report_copy_linux.png new file mode 100644 index 00000000..03defdf0 Binary files /dev/null and b/docs/images/report_copy_linux.png differ diff --git a/docs/images/report_copy_windows.png b/docs/images/report_copy_windows.png new file mode 100644 index 00000000..76b0e62d Binary files /dev/null and b/docs/images/report_copy_windows.png differ diff --git a/docs/images/report_powershell_open_windows.png b/docs/images/report_powershell_open_windows.png new file mode 100644 index 00000000..c8ba8684 Binary files /dev/null and b/docs/images/report_powershell_open_windows.png differ diff --git a/docs/images/report_python_linux.png b/docs/images/report_python_linux.png new file mode 100644 index 00000000..0c78eb7a Binary files /dev/null and b/docs/images/report_python_linux.png differ diff --git a/docs/images/report_python_windows.png b/docs/images/report_python_windows.png new file mode 100644 index 00000000..82c466fb Binary files /dev/null and b/docs/images/report_python_windows.png differ diff --git a/docs/images/report_terminal_open_linux.png b/docs/images/report_terminal_open_linux.png new file mode 100644 index 00000000..cd02ef6e Binary files /dev/null and b/docs/images/report_terminal_open_linux.png differ diff --git a/docs/images/report_view_linux.png b/docs/images/report_view_linux.png new file mode 100644 index 00000000..fb298379 Binary files /dev/null and b/docs/images/report_view_linux.png differ diff --git a/docs/images/report_view_windows.png b/docs/images/report_view_windows.png new file mode 100644 index 00000000..1eec8e4b Binary files /dev/null and b/docs/images/report_view_windows.png differ diff --git a/docs/images/yavsap_logo.png b/docs/images/yavsap_logo.png new file mode 100644 index 00000000..cafcb0a2 Binary files /dev/null and b/docs/images/yavsap_logo.png differ diff --git a/docs/index.md b/docs/index.md index e8c3c31f..fe33afa6 100644 --- a/docs/index.md +++ b/docs/index.md @@ -12,4 +12,5 @@ install usage parameters output +report ``` diff --git a/docs/install.md b/docs/install.md index ea3a2590..a235e098 100644 --- a/docs/install.md +++ b/docs/install.md @@ -2,13 +2,13 @@ ## Prerequisites -Here's what you'll need to get started. Unless otherwise noted, every program +Here's what you'll need to get started. Every one of these programs must be available on your `PATH` in bash (other shells don't count). - Git v2.7.0 or higher (🌐/🍺/🐍) - Curl v2.41.0 or higher (🌐/🍺/🐍) - Java Runtime v8-v15 (🌐/🍺/🐍) -- Nextflow v20.10.0 or higher (🌐/🐍) +- Nextflow v21.10.3 or higher (🌐/🐍) - One or more of the following container engines - Docker v20.10.0 or higher (🌐) - Podman v3.0 or higher (🌐) diff --git a/docs/output.md b/docs/output.md index 03375b50..1dba2066 100644 --- a/docs/output.md +++ b/docs/output.md @@ -5,26 +5,21 @@ outputs and where to find them, but first... ## The Visualizer -If you like pictures, hate juggling files into various Java applications, and -are OCD about having everything in one spot, then we have you covered. The -pipeline outputs its own web application with every run that allows you to view, -interact with, and download nearly every output file. - -To get started with The Visualizer, you'll need [a web -server](https://developer.mozilla.org/en-US/docs/Learn/Common_questions/set_up_a_local_testing_server#running_a_simple_local_http_server). -To learn how to use The Visualizer, click the question mark in the bottom-left -corner of the web page. +Our favorite output is the (mostly) self-contained report file. The report is +contained in `report/index.html`. For the best experience, you will need all of +the files in the `report` directory, and will need to serve it with a web server +application. More details and step by step instructions are provided on the page +dedicated to {ref}`The YAVSAP Report`. ## File Output You don't like all the fluff, huh? Here's the lowdown on the file structure you can expect from each pipeline run. -### Visualizer Data +### Report stuff -All the following files and folders are for powering the Visualizer: if you want -to use The Visualizer on another machine, you will need to take them all to the -other computer in the exact same folder structure. +The report itself, and the alignment data needed to view it at full potential +are contained in the aptly-named `report` folder. ```text results @@ -34,9 +29,6 @@ results ├── 📝 pig-feces.bam ├── 📝 pig-feces.bam.bai ├── 📝 index.html - ├── 📝 krona.html - ├── 📝 multiqc_report.html - ├── 📝 nextflow_report.html ├── 📝 reference.fasta └── 📝 reference.fasta.fai ``` @@ -146,7 +138,7 @@ results ### Diagnostics -Pipeline runtime reports can be found in the `.trace` folder by default, but +Pipeline runtime reports can be found in the `pipeline_info` folder by default, but this can be modified using the {ref}`--tracedir ` parameter. Four types of Nextflow traces are output. @@ -161,7 +153,9 @@ date and time of execution appended, e.g. ```text results -└── 📁 .trace +└── 📁 pipeline_info + ├── 📝 execution_report_2021-09-01_12-00-00.html + ├── 📝 execution_report_2021-10-01_12-00-00.html ├── 📝 execution_timeline_2021-09-01_12-00-00.html ├── 📝 execution_timeline_2021-10-01_12-00-00.html ├── 📝 execution_trace_2021-09-01_12-00-00.txt @@ -169,6 +163,3 @@ results ├── 📝 pipeline_dag_2021-09-01_12-00-00.svg └── 📝 pipeline_dag_2021-09-01_12-00-00.svg ``` - -The latest execution report is saved to the `report/nextflow_report.html` file, -instead, and will be included in {ref}`The Visualizer`. diff --git a/docs/report.md b/docs/report.md new file mode 100644 index 00000000..9727b343 --- /dev/null +++ b/docs/report.md @@ -0,0 +1,420 @@ +# The YAVSAP report + +Formerly known as "The Visualizer." + +YAVSAP outputs a consolidated report containing statistics on your analysis and +the viral haplotypes found. The report is powered by [MultiQC], so you can view +it in any browser. + +## Launching the report + +The report can be viewed simply by double-clicking the `report/index.html` file +in your specified results folder, but for the best experience, you will need to +run a temporary server. Don't worry: it's not as daunting as it sounds. Just +follow the instructions below. + +### Windows + +You'll need to download some server software first. You won't need +administrative privileges for this, though. + +#### First time only + +1. Open a PowerShell terminal by pressing WIN+X, then + I +2. Type `Set-ExecutionPolicy RemoteSigned -Scope CurrentUser` ENTER + and answer `Y` to all the prompts +3. Type `Invoke-WebRequest get.scoop.sh | Invoke-Expression` ENTER +4. Type `scoop install python` ENTER +5. Close the PowerShell window + +#### Every time + +1. Copy the entire `report` directory from the server to a location on your + local computer + + ![Copying the directory](images/report_copy_windows.png) + +2. Open a PowerShell terminal by pressing WIN+X, then + I + + ![Opening PowerShell](images/report_powershell_open_windows.png) + +3. Type `cd` and paste the path of the `report` directory + + ![Changing to the report directory](images/report_cd_windows.png) + +4. Type `python -m http.server 8000` ENTER + + ![Launching the server](images/report_python_windows.png) + +5. Open a browser, and navigate to + + ![Viewing the report](images/report_view_windows.png) + + ```{note} + You can launch multiple reports at once by changing `8000` to another number + in both the PowerShell window and the browser URL. Just make sure the number + is larger than 1024, and for best results try to pick a number than doesn't + appear on [this list](https://en.wikipedia.org/wiki/List_of_TCP_and_UDP_port_numbers#Registered_ports). + ``` + +6. Once you are done with the report, close the browser, press + CTRL+C in PowerShell, and close PowerShell + +### Mac and Linux + +Hooray! Mac and Linux desktops come with server software pre-installed! + +1. Copy the entire `report` directory from the server to a location on your + local computer + + ![Copying the directory](images/report_copy_linux.png) + +2. Open a new terminal window + + - **Mac** + Press CMD+SPACE, type `terminal` and press enter + - **Linux** + Press CTRL+ALT+T + + ![Opening a terminal](images/report_terminal_open_linux.png) + +3. Type `cd` and the paste the path of the `report` directory + + ![Changing to the report directory](images/report_cd_linux.png) + +4. Type `python -m http.server 8000` ENTER + + ![Launching the server](images/report_python_linux.png) + +5. Open a browser, and navigate to + + ![Viewing the report](images/report_view_linux.png) + + ```{note} + You can launch multiple reports at once by changing `8000` to another number + in both the terminal window and the browser URL. Just make sure the number + is larger than 1024, and for best results try to pick a number than doesn't + appear on [this list](https://en.wikipedia.org/wiki/List_of_TCP_and_UDP_port_numbers#Registered_ports). + ``` + +6. Once you are done with the report, close the browser, press + CTRL+C in the terminal, and close the terminal + +## Using the Report + +Here's a section-by-section breakdown of how everything in the report works. + +### General Statistics + +![General Statistics Screenshot](images/mqc_gen_stats.png) + +A table with a general overview of statistics that MultiQC cares about. These +numbers have some limited use cases, but overall are just repeats of everything +contained in the other sections. + +(FastQC)= + +### FastQC (Illumina-only) + +![FastQC Screenshot](images/mqc_fastqc.png) + +FastQC checks for + +- **Number of reads per sample** +- **Read quality** +- Sequence read diversity +- GC content anomalies +- **Read length** +- Sequence duplication and over-representation +- **Adapter contamination** + +Because YAVSAP is primarily a viral analysis pipeline for metagenomic reads, +diversity, GC content, and duplication are typically irrelevant: use them if you +know what you are doing. + +#### Number of reads + read length + +These two numbers can be used to estimate coverage. The equation for coverage +is[^1] + +```{math} +--- +label: coverage +--- +c = \frac{LN}{G} +``` + +where + +- {math}`c` = coverage +- {math}`L` = average read length +- {math}`N` = number of reads +- {math}`G` = genome length + +For variant- and haplotype-calling purposes, the coverage {math}`c` should be +well in excess of 60. + +#### Read quality + +Illumina reads are famous for being high quality. If you have reads that fail in +the quality assessment, then there are likely errors in the upstream sample +preparation (extraction, amplification, library prep) or sequencing. Low-quality +reads _can_ be used by YAVSAP, but will reduce the ability to call variants and +haplotypes. + +#### Adapter contamination + +If adapters are found in your reads, then you can compensate by adjusting the +{ref}`read trimming options`. Note that FastQC runs _before_ read trimming, so +this check will fail even if the adapters are being trimmed off. + +More detailed instructions for interpreting FastQC results can be found in [the +FastQC documentation]. + +--- + +(NanoStat)= + +### NanoStat (Nanopore-only) + +![NanoStat Screenshot](images/mqc_nanostat.png) + +NanoStat calculates the + +- Median read length +- N50 read length +- Median read quality +- Number of reads +- Total base count +- Read quality distribution + +For long reads, the N50 is generally more informative than the median length. +Substituting {math}`L` = N50 into Equation {math:numref}`coverage` will give an +indication of the coverage of the sample. + +If a large proportion of reads are low (<Q10), then YAVSAP will struggle to +identify strains, and call variants and haplotypes. Quality that low indicates +errors in either the upstream sample preparation (extraction, library prep) or +sequencing, and the sequencing should be redone. + +(Trimmomatic)= + +### Trimmomatic (Illumina-only) + +![Trimmomatic Screenshot](images/mqc_trimmomatic.png) + +Trimmomatic removes adapters, low-quality reads, and ends of reads in a highly +configurable manner. This section shows how many reads were left by Trimmomatic. +Use this section in conjunction with {ref}`FastQC ` to ensure only +high-quality sequence reads are processed by YAVSAP. + +### Kraken + +![Kraken Screenshot](images/mqc_kraken.png) + +Kraken identifies whether sequencing reads belong to the virus of interest or +the host. This plot shows the five most abundant taxa in each sample pool. +Check this plot to see if host reads are overcoming the viral reads and +hindering the analysis. Depending on your Kraken database, host reads might be +`unclassified` and viral reads are identified, or it might be that host reads +are identified and viral reads are `unclassified`. The results from this plot +should be interpreted as the number of reads that will actually be analyzed by +YAVSAP. + +#### Examples + +##### Example 1 + +Virus +: SARS-CoV2 (SC2) + +Host +: _Homo sapiens_ (human) + +Kraken Database +: [standard](https://github.com/DerrickWood/kraken2/wiki/Manual#standard-kraken-2-database) + +Kept reads +: `10239` (viral) + +In this case, both the virus and host will be identified in `standard`. The plot +will show the abundances of both `Beta coronavirus` and `Homo sapiens` reads. +Thanks to the `--keep_taxid` value, we know that only SC2 and related, non-human +reads will be aligned and variant-called. High percentages of `Homo sapiens` +reads indicate low viral load or a faulty extraction and/or amplification +protocol. + +##### Example 2 + +Virus +: Japanese Encephalitis Virus (JEV) + +Host +: _Sus scrofa_ (domestic pig) + +Kraken Database +: [standard](https://github.com/DerrickWood/kraken2/wiki/Manual#standard-kraken-2-database) + +Kept reads +: `10239` (viral) + +In this case, JEV is characterized in RefSeq, and its genome will be identified +with the `standard` Kraken database. In contrast, _Sus scrofa_ will be marked as +`unclassified` with this database, as non-human mammals are not included in +`standard`. In this case, the unclassified reads should be filtered out. High +percentages of `unclassified` indicate a faulty extraction protocol and/or an +amplification protocol that is too non-specific + +##### Example 3 + +Virus +: Bovine Rotavirus (RV) + +Host +: _Bos taurus_ (domestic cattle) + +Kraken Database +: [nt](https://github.com/DerrickWood/kraken2/wiki/Manual#custom-databases) + +Kept reads +: `10239` (viral) + +RV is not present in RefSeq, and _Bos taurus_ is a non-human mammal, so both +would be `unclassified` if using `standard`. Instead, we switch to the `nt` +database. Both RV and _Bos taurus_ sequences are present in the `nt` database, +so a plot should be interpreted in the same manner as the +{ref}`first example`. + +##### Example 4 + +Virus +: Foot and Mouth Disease Virus (FMDV) + +Host +: _Homo sapiens_ (human) + +Kraken Database +: [human](https://s3.console.aws.amazon.com/s3/object/nf-core-awsmegatests?prefix=viralrecon/input_data/kraken2_human.tar.gz) + +Kept reads +: `unclassifed` + +In this case, FMDV is in RefSeq, but it doesn't matter because the database only +contains human sequences. In this case _only_ the host reads will be identified, +so keep the `unclassifed` reads, and interpret them as viral reads. + +### minimap2 + +![minimap2 Screenshot](images/mqc_minimap2.png) + +The alignments against the reference genome are displayed using [IGV]. The IGV +interface is described in detail at +. + +```{note} +There are minor differences between the desktop application and the web application. +``` + +Briefly, the reference genome is listed in the top, left corner. Display options +(cursor, labels, zoom) are located in the top, right corner. Immediately below +the gray bar the genome annotations are shown along the length of the reference +genome. These are colored based on the type of annotation. + +- Gene +- CDS +- Mature Protein +- Everything else + +A coverage graph is displayed in a histogram labelled with each sample name. +Along those coverage graphs, IGV will attempt to call variants using colored +bars. These variants **are not** precise, and **do not** reflect the variants +called by YAVSAP. Each gray bar below the coverage graph represents an aligned +read. Variant calls are also shown on the reads that support them. Variant calls +have the following legend: + +- |: C +- |: T +- |: G +- |: A +- |: Insertion +- : Deletion + +IGV also attempts to call structural variants. These are generally meaningless +in the context of viral haplotyping, and should be verified with some other +tool. + +- + ⤆⤇ + : Normal +- + ⤆⤇ + : Inversion +- + ⤆⤇ + : Inversion +- + ⤆⤇ + : Duplication or translocation + +(Haplotypes-report)= + +### Haplotypes (CliqueSNV for Illumina, HapLink.jl for Nanopore) + +![HapLink Screenshot](images/mqc_haplink.png) + +A rough multi-sequence alignment for every consensus sequence and haplotype +sequence found in each sample. The frequency of each minority haplotype, the +closest strain, and that strain's NCBI GenBank link is listed in the table. The +bases are colored (A, C, G, T), and every variant site is highlighted purple. + +### RAxML-NG + +![RAxML-NG Screenshot](images/mqc_raxmlng.png) + +The phylogenetic tree of all consensus sequences and haplotypes, as well as all +the strains included in the genome list is shown in an interactive viewer. The +toolbar across the top has controls for + +- Zoom +- Sorting +- Saving (image or Newick) +- Tree style (linear, circular) +- Alignment + +Node names take the form of one of + +- `_` +- `_consensus` +- `_haplotype_` + +Each node is colored based on the node name before the first underscore (`_`). +This means that strains of the same genotype have the same color, and each +sample's consensus and haplotype sequences all have the same colors. There are +only 10 colors, so having more samples than that might result in color +collision. In that case, it may be advisable to download the Newick file and +reformat it in another tree program. + +### Software Versions + +![Software Versions Screenshot](images/mqc_versions.png) + +A table of all the software used to produce the report. Just to make sure you +give credit where it is due. + +### Workflow Summary + +![Workflow Summary Screenshot](images/mqc_workflow.png) + +A list of all the parameters (and various other metadata) used to produce this +particular analysis and report. Sort of a light version of the +{ref}`Nextflow report `. + +[^1]: Lander ES, Waterman MS. Genomic mapping by fingerprinting random clones: a mathematical analysis. Genomics. 1988 Apr;2(3):231-9. doi: 10.1016/0888-7543(88)90007-9. PMID: 3294162. + +[multiqc]: https://multiqc.info/ +[igv]: https://igv.org/ +[the fastqc documentation]: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ diff --git a/docs/usage.md b/docs/usage.md index 9a0a84d8..485c13ea 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -1,3 +1,5 @@ + + # Usage Basic jist: @@ -8,7 +10,7 @@ nextflow run ksumngs/yavsap \ --platform --kraken2_db ``` -## Input Preparation +## Input preparation ### Using a Directory as Input @@ -77,8 +79,6 @@ odd-number columns (`3,5,7`). #### Single-end example - - | #samplename | | | | ----------- | -------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------- | | pig-serum | `/data/run/fastq{pass,fail}/barcode01/FAP01234_{pass,fail}_barcode01_abcde01_*.fastq*` | `/data/run2/fastq{pass,fail}/barcode07/FAP01234_{pass,fail}_barcode07_abcde01_*.fastq*` | @@ -86,12 +86,8 @@ odd-number columns (`3,5,7`). | mosquito1 | `/data/run/fastq{pass,fail}/barcode03/FAP01234_{pass,fail}_barcode03_abcde01_*.fastq*` | | | mosquito2 | `` ./seq-results/mosquito2/*.fastq*` `` | | - - #### Paired-end example - - | #Sample | Forward1 | Reverse1 | Forward2 | Reverse2 | | --------- | ---------------------------------------- | ---------------------------------------- | ------------------------------------- | ------------------------------------- | | pig-serum | `/basespace/run/PIG-SERUM*_R1*.fastq.gz` | `/basespace/run/PIG-SERUM*_R2*.fastq.gz` | `/dragen/run/PIG-SERUM*_R1*.fastq.gz` | `/dragen/run/PIG-SERUM*_R2*.fastq.gz` | @@ -99,8 +95,6 @@ odd-number columns (`3,5,7`). | mosquito1 | `/basespace/run/MOSQUITO1*_R1*.fastq.gz` | `/basespace/run/MOSQUITO1*_R1*.fastq.gz` | | | | mosquito2 | `./seq-results/mosquito2/*_R1*.fastq.gz` | `./seq-results/mosquito2/*_R2*.fastq.gz` | | | - - Once the samplesheet is constructed, pass it on the command line as: ```bash @@ -111,72 +105,91 @@ nextflow run ksumngs/yavsap \ --kraken2_db ``` -## Profile Selection +## Running the pipeline + +The typical command for running the pipeline is as follows: + +```console +nextflow run ksumngs/yavsap -profile docker --input . --outdir --platform illumina --kraken2_db https://genome-idx.s3.amazonaws.com/kraken/k2_viral_20210517.tar.gz --keep_taxid classified +``` + +This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. + +Note that the pipeline will create the following files in your working directory: + +```console +work # Directory containing the nextflow working files + # Finished results in specified location (defined with --outdir) +.nextflow_log # Log file from Nextflow +# Other nextflow hidden files, eg. history of pipeline runs and old logs. +``` + +### Updating the pipeline + +When you run the above command, Nextflow automatically pulls YAVSAP from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if we've updated it since. To make sure that you're running the latest version of the pipeline, you can update the cached version of the pipeline: + +```console +nextflow pull ksumngs/yavsap +``` -Profiles allow for unique combinations of settings within a Nextflow pipeline. -For the purposes of YAVSAP, they reconfigure the pipeline to run on a particular -container engine. Whichever engine you choose must be installed and available -(e.g. `module load`) on each node that pipeline processes are running on. The -available options are +### Reproducibility -(none) +It is often a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. -: Don't use a container engine. Requires that every tool and the right version -of every tool be installed on each machine the pipeline is running on. Don't -use this one. +First, go to the [yavsap releases page](https://github.com/ksumngs/yavsap/releases) and find the latest version number (eg. `v1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r v1.3.1`. -docker +This version number will be logged in {ref}`the YAVSAP report` when you run the pipeline, so that you'll know what you used when you look back in the future. -: Use [Docker](https://docker.com) as the container engine. Note that Docker -often requires root or nearly-root permissions that usually aren't available -on HPCs, and has a weird license that might forbid its use in commercial -settings. Works well on local machines, though. +## Core Nextflow arguments -podman +```{note} +These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +``` -: Use [Podman](https://podman.io) instead of Docker. Podman is similar enough -to Docker that they can often be used interchangably, but doesn't require root -permissions and has a free license. Some files might not be accessible via -container on RHEL-based distros thanks to their particular SELinux -implementation. +### `-profile` -singularity +Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. -: **Recommended** +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below. When using Biocontainers, most of these software packaging methods pull Docker containers from [quay.io](https://quay.io). Singularity instead directly downloads Singularity images via https hosted by the [Galaxy project](https://depot.galaxyproject.org/singularity/), though.. -Use the {doc}`Singularity ` container engine. This engine -was built with HPC use in mind, and doesn't require any special permissions to -run. Singularity's downfall is how it will expose your home directory to the -container, resulting in rare, but difficult to explain bugs when files -conflict. Every effort has been made to minimize the effects of Singularity's -file mounting in this pipeline. +```{note} +We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility: they just work!. +``` -INSTITUTE +The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). -: If your computing center is listed in [nf-core configs](https://github.com/nf-core/configs/) -then you can pass that name to `-profile` to have the container engine and -resource limits configured automatically. +Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! +They are loaded in sequence, so later profiles can overwrite earlier profiles. -test +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended. -: Download a test dataset from nf-core and run a sample to ensure your machine -configuration is correct. Must be used with one of the container engine -profiles. +- `docker` + - A generic configuration profile to be used with [Docker](https://docker.com/) +- `singularity` + - A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) +- `podman` + - A generic configuration profile to be used with [Podman](https://podman.io/) +- `shifter` + - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) +- `charliecloud` + - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- ~~`conda`~~ + - ~~A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud.~~ **YAVSAP doesn't currently have conda support, but we're working on it!. Don't use this profile!** +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters -test_nanopore +### `-resume` -: Download a MinION test dataset and run just like `test`. +Specify this when restarting a pipeline. Nextflow will use cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. For input to be considered the same, not only the names must be identical but the files' contents as well. For more info about this parameter, see [this blog post](https://www.nextflow.io/blog/2019/demystifying-nextflow-resume.html). -gh +You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names. -: Used to limit resource usage during continuous integration on GitHub Actions. -You should never have to use these for real-life analysis. +### `-c` -To select a profile, you must pass the desired profile name to Nextflow's -`-profile` flag. Note that this is a Nextflow flag, and not a pipeline flag, -so it is a single dash (`-profile`), not a double dash (`--profile`). +Specify the path to a specific config file (this is a core Nextflow command). See the [nf-core website documentation](https://nf-co.re/usage/configuration) for more information. -## Mandatory Parameters +## Mandatory pipeline parameters See {doc}`the page on parameters ` for the complete lowdown on parameters. @@ -190,21 +203,12 @@ success with placing these parameters in a `nextflow.config` file, so keeping them on the command-line is best. --kraken2_db - -The path to a Kraken2 database. See {ref}`--kraken2_db `. +: The path to a Kraken2 database. See {ref}`--kraken2_db `. --platform - -Must be set to `illumina` or `nanopore`, depending on the type of reads +: Must be set to `illumina` or `nanopore`, depending on the type of reads you are analyzing. See {ref}`--platform `. --p - -rofile - -So, this isn't really a parameter, but the container engine needs to be set -using Nextflow's `-profile` flag. See {ref}`Profile Selection`. - ## Genome Preparation YAVSAP needs some idea of what kind of viruses it's looking for. You will need @@ -236,16 +240,30 @@ Once a comparison genomes file is prepared, the path to it can be passed to the YAVSAP comes with some example comparison genomes files. These can be referred to by name, rather than by path(e.g. `nextflow run ksumngs/yavsap --genome_list jev`). They are - - | Virus | Name | Recommended Reference | | ---------------------------------------------------------------------------------------------------------------------------- | ----- | --------------------------------------------------------------- | | [Japanese Encephalitis Virus (JEV)](https://www.ncbi.nlm.nih.gov/data-hub/taxonomy/11072/) | `jev` | [NC_001437.1](https://www.ncbi.nlm.nih.gov/nuccore/NC_001437.1) | | [Severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2)](https://www.ncbi.nlm.nih.gov/labs/data-hub/taxonomy/2697049/) | `sc2` | [NC_045512.2](https://www.ncbi.nlm.nih.gov/nuccore/NC_045512.2) | - +## Running in the background -## Setting up for HPC Job Schedulers +Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished. + +The Nextflow `-bg` flag launches Nextflow in the background, detached from your terminal so that the workflow does not stop if you log out of your session. The logs are saved to a file. + +Alternatively, you can use `screen` / `tmux` or similar tool to create a detached session which you can log back into at a later time. +Some HPC setups also allow you to run nextflow within a cluster job submitted your job scheduler (from where it submits more jobs). + +## Nextflow memory requirements + +In some cases, the Nextflow Java virtual machines can start to request a large amount of memory. +We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`): + +```console +NXF_OPTS='-Xms1g -Xmx4g' +``` + +## Setting up for HPC job schedulers YAVSAP comes preconfigured for local use only. Yes, that's about as ridiculous as it sounds. What's even more ridiculous is trying to make a configuration that @@ -327,7 +345,7 @@ process { } withLabel: process_high_memory { queue = 'mem' - } + } } ``` diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy new file mode 100755 index 00000000..b3d092f8 --- /dev/null +++ b/lib/NfcoreSchema.groovy @@ -0,0 +1,529 @@ +// +// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. +// + +import org.everit.json.schema.Schema +import org.everit.json.schema.loader.SchemaLoader +import org.everit.json.schema.ValidationException +import org.json.JSONObject +import org.json.JSONTokener +import org.json.JSONArray +import groovy.json.JsonSlurper +import groovy.json.JsonBuilder + +class NfcoreSchema { + + // + // Resolve Schema path relative to main workflow directory + // + public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { + return "${workflow.projectDir}/${schema_filename}" + } + + // + // Function to loop over all parameters defined in schema and check + // whether the given parameters adhere to the specifications + // + /* groovylint-disable-next-line UnusedPrivateMethodParameter */ + public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { + def has_error = false + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + // Check for nextflow core params and unexpected params + def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text + def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') + def nf_params = [ + // Options for base `nextflow` command + 'bg', + 'c', + 'C', + 'config', + 'd', + 'D', + 'dockerize', + 'h', + 'log', + 'q', + 'quiet', + 'syslog', + 'v', + 'version', + + // Options for `nextflow run` command + 'ansi', + 'ansi-log', + 'bg', + 'bucket-dir', + 'c', + 'cache', + 'config', + 'dsl2', + 'dump-channels', + 'dump-hashes', + 'E', + 'entry', + 'latest', + 'lib', + 'main-script', + 'N', + 'name', + 'offline', + 'params-file', + 'pi', + 'plugins', + 'poll-interval', + 'pool-size', + 'profile', + 'ps', + 'qs', + 'queue-size', + 'r', + 'resume', + 'revision', + 'stdin', + 'stub', + 'stub-run', + 'test', + 'w', + 'with-charliecloud', + 'with-conda', + 'with-dag', + 'with-docker', + 'with-mpi', + 'with-notification', + 'with-podman', + 'with-report', + 'with-singularity', + 'with-timeline', + 'with-tower', + 'with-trace', + 'with-weblog', + 'without-docker', + 'without-podman', + 'work-dir' + ] + def unexpectedParams = [] + + // Collect expected parameters from the schema + def expectedParams = [] + def enums = [:] + for (group in schemaParams) { + for (p in group.value['properties']) { + expectedParams.push(p.key) + if (group.value['properties'][p.key].containsKey('enum')) { + enums[p.key] = group.value['properties'][p.key]['enum'] + } + } + } + + for (specifiedParam in params.keySet()) { + // nextflow params + if (nf_params.contains(specifiedParam)) { + log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'" + has_error = true + } + // unexpected params + def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' + def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } + def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() + def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase)) + if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) { + // Temporarily remove camelCase/camel-case params #1035 + def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} + if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ + unexpectedParams.push(specifiedParam) + } + } + } + + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + // Validate parameters against the schema + InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() + JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) + + // Remove anything that's in params.schema_ignore_params + raw_schema = removeIgnoredParams(raw_schema, params) + + Schema schema = SchemaLoader.load(raw_schema) + + // Clean the parameters + def cleanedParams = cleanParameters(params) + + // Convert to JSONObject + def jsonParams = new JsonBuilder(cleanedParams) + JSONObject params_json = new JSONObject(jsonParams.toString()) + + // Validate + try { + schema.validate(params_json) + } catch (ValidationException e) { + println '' + log.error 'ERROR: Validation of pipeline parameters failed!' + JSONObject exceptionJSON = e.toJSON() + printExceptions(exceptionJSON, params_json, log, enums) + println '' + has_error = true + } + + // Check for unexpected parameters + if (unexpectedParams.size() > 0) { + Map colors = NfcoreTemplate.logColours(params.monochrome_logs) + println '' + def warn_msg = 'Found unexpected parameters:' + for (unexpectedParam in unexpectedParams) { + warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}" + } + log.warn warn_msg + log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}" + println '' + } + + if (has_error) { + System.exit(1) + } + } + + // + // Beautify parameters for --help + // + public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { + Map colors = NfcoreTemplate.logColours(params.monochrome_logs) + Integer num_hidden = 0 + String output = '' + output += 'Typical pipeline command:\n\n' + output += " ${colors.cyan}${command}${colors.reset}\n\n" + Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) + Integer max_chars = paramsMaxChars(params_map) + 1 + Integer desc_indent = max_chars + 14 + Integer dec_linewidth = 160 - desc_indent + for (group in params_map.keySet()) { + Integer num_params = 0 + String group_output = colors.underlined + colors.bold + group + colors.reset + '\n' + def group_params = params_map.get(group) // This gets the parameters of that particular group + for (param in group_params.keySet()) { + if (group_params.get(param).hidden && !params.show_hidden_params) { + num_hidden += 1 + continue; + } + def type = '[' + group_params.get(param).type + ']' + def description = group_params.get(param).description + def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : '' + def description_default = description + colors.dim + defaultValue + colors.reset + // Wrap long description texts + // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap + if (description_default.length() > dec_linewidth){ + List olines = [] + String oline = "" // " " * indent + description_default.split(" ").each() { wrd -> + if ((oline.size() + wrd.size()) <= dec_linewidth) { + oline += wrd + " " + } else { + olines += oline + oline = wrd + " " + } + } + olines += oline + description_default = olines.join("\n" + " " * desc_indent) + } + group_output += " --" + param.padRight(max_chars) + colors.dim + type.padRight(10) + colors.reset + description_default + '\n' + num_params += 1 + } + group_output += '\n' + if (num_params > 0){ + output += group_output + } + } + if (num_hidden > 0){ + output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset + } + output += NfcoreTemplate.dashedLine(params.monochrome_logs) + return output + } + + // + // Groovy Map summarising parameters/workflow options used by the pipeline + // + public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { + // Get a selection of core Nextflow workflow options + def Map workflow_summary = [:] + if (workflow.revision) { + workflow_summary['revision'] = workflow.revision + } + workflow_summary['runName'] = workflow.runName + if (workflow.containerEngine) { + workflow_summary['containerEngine'] = workflow.containerEngine + } + if (workflow.container) { + workflow_summary['container'] = workflow.container + } + workflow_summary['launchDir'] = workflow.launchDir + workflow_summary['workDir'] = workflow.workDir + workflow_summary['projectDir'] = workflow.projectDir + workflow_summary['userName'] = workflow.userName + workflow_summary['profile'] = workflow.profile + workflow_summary['configFiles'] = workflow.configFiles.join(', ') + + // Get pipeline parameters defined in JSON Schema + def Map params_summary = [:] + def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) + for (group in params_map.keySet()) { + def sub_params = new LinkedHashMap() + def group_params = params_map.get(group) // This gets the parameters of that particular group + for (param in group_params.keySet()) { + if (params.containsKey(param)) { + def params_value = params.get(param) + def schema_value = group_params.get(param).default + def param_type = group_params.get(param).type + if (schema_value != null) { + if (param_type == 'string') { + if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { + def sub_string = schema_value.replace('\$projectDir', '') + sub_string = sub_string.replace('\${projectDir}', '') + if (params_value.contains(sub_string)) { + schema_value = params_value + } + } + if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { + def sub_string = schema_value.replace('\$params.outdir', '') + sub_string = sub_string.replace('\${params.outdir}', '') + if ("${params.outdir}${sub_string}" == params_value) { + schema_value = params_value + } + } + } + } + + // We have a default in the schema, and this isn't it + if (schema_value != null && params_value != schema_value) { + sub_params.put(param, params_value) + } + // No default in the schema, and this isn't empty + else if (schema_value == null && params_value != "" && params_value != null && params_value != false) { + sub_params.put(param, params_value) + } + } + } + params_summary.put(group, sub_params) + } + return [ 'Core Nextflow options' : workflow_summary ] << params_summary + } + + // + // Beautify parameters for summary and return as string + // + public static String paramsSummaryLog(workflow, params) { + Map colors = NfcoreTemplate.logColours(params.monochrome_logs) + String output = '' + def params_map = paramsSummaryMap(workflow, params) + def max_chars = paramsMaxChars(params_map) + for (group in params_map.keySet()) { + def group_params = params_map.get(group) // This gets the parameters of that particular group + if (group_params) { + output += colors.bold + group + colors.reset + '\n' + for (param in group_params.keySet()) { + output += " " + colors.blue + param.padRight(max_chars) + ": " + colors.green + group_params.get(param) + colors.reset + '\n' + } + output += '\n' + } + } + output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" + output += NfcoreTemplate.dashedLine(params.monochrome_logs) + return output + } + + // + // Loop over nested exceptions and print the causingException + // + private static void printExceptions(ex_json, params_json, log, enums, limit=5) { + def causingExceptions = ex_json['causingExceptions'] + if (causingExceptions.length() == 0) { + def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ + // Missing required param + if (m.matches()) { + log.error "* Missing required parameter: --${m[0][1]}" + } + // Other base-level error + else if (ex_json['pointerToViolation'] == '#') { + log.error "* ${ex_json['message']}" + } + // Error with specific param + else { + def param = ex_json['pointerToViolation'] - ~/^#\// + def param_val = params_json[param].toString() + if (enums.containsKey(param)) { + def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices" + if (enums[param].size() > limit) { + log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )" + } else { + log.error "${error_msg}: ${enums[param].join(', ')})" + } + } else { + log.error "* --${param}: ${ex_json['message']} (${param_val})" + } + } + } + for (ex in causingExceptions) { + printExceptions(ex, params_json, log, enums) + } + } + + // + // Remove an element from a JSONArray + // + private static JSONArray removeElement(json_array, element) { + def list = [] + int len = json_array.length() + for (int i=0;i + if(raw_schema.keySet().contains('definitions')){ + raw_schema.definitions.each { definition -> + for (key in definition.keySet()){ + if (definition[key].get("properties").keySet().contains(ignore_param)){ + // Remove the param to ignore + definition[key].get("properties").remove(ignore_param) + // If the param was required, change this + if (definition[key].has("required")) { + def cleaned_required = removeElement(definition[key].required, ignore_param) + definition[key].put("required", cleaned_required) + } + } + } + } + } + if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { + raw_schema.get("properties").remove(ignore_param) + } + if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { + def cleaned_required = removeElement(raw_schema.required, ignore_param) + raw_schema.put("required", cleaned_required) + } + } + return raw_schema + } + + // + // Clean and check parameters relative to Nextflow native classes + // + private static Map cleanParameters(params) { + def new_params = params.getClass().newInstance(params) + for (p in params) { + // remove anything evaluating to false + if (!p['value']) { + new_params.remove(p.key) + } + // Cast MemoryUnit to String + if (p['value'].getClass() == nextflow.util.MemoryUnit) { + new_params.replace(p.key, p['value'].toString()) + } + // Cast Duration to String + if (p['value'].getClass() == nextflow.util.Duration) { + new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) + } + // Cast LinkedHashMap to String + if (p['value'].getClass() == LinkedHashMap) { + new_params.replace(p.key, p['value'].toString()) + } + } + return new_params + } + + // + // This function tries to read a JSON params file + // + private static LinkedHashMap paramsLoad(String json_schema) { + def params_map = new LinkedHashMap() + try { + params_map = paramsRead(json_schema) + } catch (Exception e) { + println "Could not read parameters settings from JSON. $e" + params_map = new LinkedHashMap() + } + return params_map + } + + // + // Method to actually read in JSON file using Groovy. + // Group (as Key), values are all parameters + // - Parameter1 as Key, Description as Value + // - Parameter2 as Key, Description as Value + // .... + // Group + // - + private static LinkedHashMap paramsRead(String json_schema) throws Exception { + def json = new File(json_schema).text + def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') + def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') + /* Tree looks like this in nf-core schema + * definitions <- this is what the first get('definitions') gets us + group 1 + title + description + properties + parameter 1 + type + description + parameter 2 + type + description + group 2 + title + description + properties + parameter 1 + type + description + * properties <- parameters can also be ungrouped, outside of definitions + parameter 1 + type + description + */ + + // Grouped params + def params_map = new LinkedHashMap() + schema_definitions.each { key, val -> + def Map group = schema_definitions."$key".properties // Gets the property object of the group + def title = schema_definitions."$key".title + def sub_params = new LinkedHashMap() + group.each { innerkey, value -> + sub_params.put(innerkey, value) + } + params_map.put(title, sub_params) + } + + // Ungrouped params + def ungrouped_params = new LinkedHashMap() + schema_properties.each { innerkey, value -> + ungrouped_params.put(innerkey, value) + } + params_map.put("Other parameters", ungrouped_params) + + return params_map + } + + // + // Get maximum number of characters across all parameter names + // + private static Integer paramsMaxChars(params_map) { + Integer max_chars = 0 + for (group in params_map.keySet()) { + def group_params = params_map.get(group) // This gets the parameters of that particular group + for (param in group_params.keySet()) { + if (param.size() > max_chars) { + max_chars = param.size() + } + } + } + return max_chars + } +} diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy new file mode 100755 index 00000000..4af04bd6 --- /dev/null +++ b/lib/NfcoreTemplate.groovy @@ -0,0 +1,432 @@ +// +// This file holds several functions used within the nf-core pipeline template. +// + +import org.yaml.snakeyaml.Yaml + +class NfcoreTemplate { + + // + // Check AWS Batch related parameters have been specified correctly + // + public static void awsBatch(workflow, params) { + if (workflow.profile.contains('awsbatch')) { + // Check params.awsqueue and params.awsregion have been set if running on AWSBatch + assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" + // Check outdir paths to be S3 buckets if running on AWSBatch + assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" + } + } + + // + // Warn if a -profile or Nextflow config has not been provided to run the pipeline + // + public static void checkConfigProvided(workflow, log) { + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + + "Please refer to the quick start section and usage docs for the pipeline.\n " + } + } + + // + // Construct and send completion email + // + public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = workflow.manifest.version + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + } + mqc_report = mqc_report[0] + } + } + } catch (all) { + if (multiqc_report) { + log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + } + } + + // Check if we are only sending emails on failure + def email_address = params.email + if (!params.email && params.email_on_fail && !workflow.success) { + email_address = params.email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("$projectDir/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("$projectDir/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] + def sf = new File("$projectDir/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + Map colors = logColours(params.monochrome_logs) + if (email_address) { + try { + if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { + mail_cmd += [ '-A', mqc_report ] + } + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_d = new File("${params.outdir}/pipeline_info/") + if (!output_d.exists()) { + output_d.mkdirs() + } + def output_hf = new File(output_d, "pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + def output_tf = new File(output_d, "pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + } + + // + // Print pipeline summary on completion + // + public static void summary(workflow, params, log) { + Map colors = logColours(params.monochrome_logs) + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } + } + + // + // ANSII Colours used for terminal logging + // + public static Map logColours(Boolean monochrome_logs) { + Map colorcodes = [:] + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes + } + + // + // Cattle breeds converted into ASCII colors + public static Map breedColors(Boolean monochrome_logs) { + Map colors = logColours(monochrome_logs) + + Map breedcolors = [ + plain: + [ + ears: colors.reset, + eyes: colors.reset, + poll: colors.reset, + face: colors.reset, + nose: colors.reset, + body1: colors.reset, + body2: colors.reset, + tail: colors.reset, + udder: colors.reset, + hooves: colors.reset + ], + charolais: + [ + ears: colors.white, + eyes: colors.white, + poll: colors.white, + face: colors.white, + nose: colors.white, + body1: colors.white, + body2: colors.white, + tail: colors.white, + udder: colors.white, + hooves: colors.white + ], + angus: + [ + ears: colors.black, + eyes: colors.black, + poll: colors.black, + face: colors.black, + nose: colors.black, + body1: colors.black, + body2: colors.black, + tail: colors.black, + udder: colors.black, + hooves: colors.black + ], + baldy: + [ + ears: colors.white, + eyes: colors.white, + poll: colors.white, + face: colors.white, + nose: colors.white, + body1: colors.black, + body2: colors.black, + tail: colors.black, + udder: colors.white, + hooves: colors.black + ], + hereford: + [ + ears: colors.white, + eyes: colors.white, + poll: colors.white, + face: colors.white, + nose: colors.white, + body1: colors.bred, + body2: colors.bred, + tail: colors.white, + udder: colors.white, + hooves: colors.bred + ], + holstein: + [ + ears: colors.black, + eyes: colors.white, + poll: colors.black, + face: colors.black, + nose: colors.black, + body1: colors.black, + body2: colors.white, + tail: colors.white, + udder: colors.white, + hooves: colors.white + ], + aryshire: + [ + ears: colors.bred, + eyes: colors.white, + poll: colors.bred, + face: colors.bred, + nose: colors.bred, + body1: colors.bred, + body2: colors.white, + tail: colors.white, + udder: colors.white, + hooves: colors.white + ] + ] + + return breedcolors + } + + // + // Groovy cowsay implementation with color support + // + public static String cowsayColor( + String message, + Boolean monochrome_logs = false, + String balloonColor = 'reset', + String cowBreed = 'plain' + ) { + Map allBreeds = breedColors(monochrome_logs) + Map ccolor = [:] + if (cowBreed == 'random') { + def breedNames = allBreeds.keySet() + Integer numBreeds = allBreeds.size() + def r = new Random() + String randBreed = breedNames[r.nextInt(numBreeds)] + ccolor = allBreeds[randBreed] + } + else { + ccolor = allBreeds[cowBreed] + } + + String bcolor = logColours(monochrome_logs)[balloonColor] + String reset = logColours(monochrome_logs)['reset'] + + String[] messagelines = message.split('\n') + Integer nlines = messagelines.length + Integer linelength = 0 + messagelines.each{ + l -> if ( l.replaceAll(/\S+\[([0-9];)*[0-9]+m/, '').length() > linelength ) { linelength = l.replaceAll(/\S+\[([0-9];)*[0-9]+m/, '').length() } + } + Integer paddinglength = linelength + 2 + + String balloon = "" + + if ( nlines == 1 ) { + balloon = + """\ + + ${bcolor}${"_"*paddinglength} + +<${reset} ${message} ${bcolor}> + + ${"-"*paddinglength}${reset} + +""".stripMargin('+') + } + else { + balloon = + """\ + + ${bcolor}${"_"*paddinglength} + +/ ${reset}${messagelines[0].padRight(linelength)} ${bcolor}\\ + +""".stripMargin('+') + for (int i=1;i<(nlines-1);i++) { + balloon += "|${reset} ${messagelines[i].padRight(linelength)} ${bcolor}|\n" + } + balloon += + """\ + +\\ ${reset}${messagelines[nlines-1].padRight(linelength)} ${bcolor}/ + + ${"-"*paddinglength}${reset} + +""".stripMargin('+') + } + + String cow = + """\ + + ${bcolor}\\${reset} ${ccolor.ears}^${ccolor.poll}__${ccolor.ears}^ + + ${bcolor}\\${reset} ${ccolor.face}(${ccolor.eyes}oo${ccolor.face})${ccolor.body1}\\_${ccolor.body2}___${ccolor.body1}___ + + ${ccolor.face}(${ccolor.nose}__${ccolor.face})${ccolor.body1}\\ )\\/${ccolor.tail}\\ + + ${ccolor.body1}|${ccolor.body2}|--${ccolor.body1}--${ccolor.udder}w ${ccolor.body2}| + + ${ccolor.hooves}|| ||${reset} + +""".stripMargin('+') + + return "${balloon}${cow}\n" + } + + // + // Does what is says on the tin + // + public static String dashedLine(monochrome_logs) { + Map colors = logColours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" + } + + // + // nf-core logo + // + public static String logo(workflow, monochrome_logs, breed) { + Map colors = logColours(monochrome_logs) + String tagline = "${colors.blue}(Yet Another Viral Subspecies Analysis Pipeline)" + Integer maxlength = tagline.length() + String figlet = + """\ + + ${colors.bgreen}__ __ ___ ______ _ ____ ${colors.reset} + + ${colors.bgreen}\\ \\ / // \\ \\ / / ___| / \\ | _ \\ ${colors.reset} + + ${colors.bgreen}\\ V // _ \\ \\ / /\\___ \\ / _ \\ | |_) | ${colors.reset} + + ${colors.bgreen}| |/ ___ \\ V / ___) / ___ \\| __/ ${colors.reset} + + ${colors.bgreen}|_/_/ \\_\\_/ |____/_/ \\_\\_| ${colors.reset} + +""".stripMargin('+') + + String version = "${colors.cyan}v${workflow.manifest.version}".center(maxlength) + return cowsayColor("${figlet}\n${tagline}\n${version}", monochrome_logs, 'purple', breed) + } +} diff --git a/lib/Utils.groovy b/lib/Utils.groovy new file mode 100755 index 00000000..28567bd7 --- /dev/null +++ b/lib/Utils.groovy @@ -0,0 +1,40 @@ +// +// This file holds several Groovy functions that could be useful for any Nextflow pipeline +// + +import org.yaml.snakeyaml.Yaml + +class Utils { + + // + // When running with -profile conda, warn if channels have not been set-up appropriately + // + public static void checkCondaChannels(log) { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + def required_channels = ['conda-forge', 'bioconda', 'defaults'] + def conda_check_failed = !required_channels.every { ch -> ch in channels } + + // Check that they are in the right order + conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda')) + conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults')) + + if (conda_check_failed) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" + + " NB: The order of the channels matters!\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } + } +} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy new file mode 100755 index 00000000..0d7d4416 --- /dev/null +++ b/lib/WorkflowMain.groovy @@ -0,0 +1,94 @@ +// +// This file holds several functions specific to the main.nf workflow in the nf-core/yavsap pipeline +// + +class WorkflowMain { + + // + // Citation string for pipeline + // + public static String citation(workflow) { + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + // TODO nf-core: Add Zenodo DOI for pipeline after first release + //"* The pipeline\n" + + //" https://doi.org/10.5281/zenodo.XXXXXXX\n\n" + + "* The nf-core framework\n" + + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" + } + + // + // Print help to screen if required + // + public static String help(workflow, params, log) { + def command = "nextflow run ksumngs/yavsap --platform illumina --kraken2_db /db/kraken/viral -profile docker" + def help_string = '' + help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs, params.breed) + help_string += NfcoreSchema.paramsHelp(workflow, params, command) + help_string += '\n' + citation(workflow) + '\n' + help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) + return help_string + } + + // + // Print parameter summary log to screen + // + public static String paramsSummaryLog(workflow, params, log) { + def summary_log = '' + summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs, params.breed) + summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) + summary_log += '\n' + citation(workflow) + '\n' + summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) + return summary_log + } + + // + // Validate parameters and print summary to screen + // + public static void initialise(workflow, params, log) { + // Print help to screen if required + if (params.help) { + log.info help(workflow, params, log) + System.exit(0) + } + + // Validate workflow parameters via the JSON schema + if (params.validate_params) { + NfcoreSchema.validateParameters(workflow, params, log) + } + + // Print parameter summary log to screen + log.info paramsSummaryLog(workflow, params, log) + + // Check that a -profile or Nextflow config has been provided to run the pipeline + NfcoreTemplate.checkConfigProvided(workflow, log) + + // Check that conda channels are set-up correctly + if (params.enable_conda) { + Utils.checkCondaChannels(log) + } + + // Check AWS batch settings + NfcoreTemplate.awsBatch(workflow, params) + + // Check input has been provided + if (!params.input) { + log.error "Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'" + System.exit(1) + } + } + + // + // Get attribute from genome config file e.g. fasta + // + public static String getGenomeAttribute(params, attribute) { + def val = '' + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + val = params.genomes[ params.genome ][ attribute ] + } + } + return val + } +} diff --git a/lib/WorkflowYavsap.groovy b/lib/WorkflowYavsap.groovy new file mode 100755 index 00000000..099b0d38 --- /dev/null +++ b/lib/WorkflowYavsap.groovy @@ -0,0 +1,66 @@ +// +// This file holds several functions specific to the workflow/yavsap.nf in the nf-core/yavsap pipeline +// + +class WorkflowYavsap { + + // + // Check and validate parameters + // + public static void initialise(params, log) { + pairedNanoporeError(params, log) + pairedInterleaveError(params, log) + } + + // + // Get workflow summary for MultiQC + // + public static String paramsSummaryMultiqc(workflow, summary) { + String summary_section = '' + for (group in summary.keySet()) { + def group_params = summary.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += "

$group

\n" + summary_section += "
\n" + for (param in group_params.keySet()) { + summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" + } + summary_section += "
\n" + } + } + + String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + return yaml_file_text + } + + // + // Exit pipeline if Nanopore reads are given as paired-end + // + private static void pairedNanoporeError(params, log) { + if (params.platform == 'nanopore' && params.paired) { + log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Oxford Nanopore reads are not paired-end. Either set --platform to 'illumina' or \n" + + " --paired to false.\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + System.exit(1) + } + } + + // + // Exit pipeline if --paired and --interleaved are incompatible + // + private static void pairedInterleaveError(params, log) { + if (params.interleaved && !params.paired) { + log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " --interleaved cannot be specified if --paired is false.\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + System.exit(1) + } + } +} diff --git a/lib/cowsay.nf b/lib/cowsay.nf deleted file mode 100644 index 9f82bf88..00000000 --- a/lib/cowsay.nf +++ /dev/null @@ -1,43 +0,0 @@ -// editorconfig-checker-disable-file -def cowsay(message) { - messagelines = message.split('\n') - nlines = messagelines.length - linelength = 0 - messagelines.each{ l -> if ( l.length() > linelength ) { linelength = l.length() } } - paddinglength = linelength + 2 - - if ( nlines == 1 ) { - balloon = - """ ${"_"*paddinglength} - < ${message} > - ${"-"*paddinglength}""" - } - else { - balloon = - """ ${"_"*paddinglength} -/ ${messagelines[0].padRight(linelength)} \\""" - for (int i=1;i<(nlines-1);i++) { - balloon = - """${balloon} -| ${messagelines[i].padRight(linelength)} |""" - } - balloon = - """${balloon} -\\ ${messagelines[nlines-1].padRight(linelength)} / - ${"-"*paddinglength}""" - } - - cow = - """ \\ ^__^ - \\ (oo)\\_______ - (__)\\ )\\/\\ - ||----w | - || ||""" - - cowput = - """${balloon} - ${cow} -""" - - log.info cowput -} diff --git a/lib/logo.nf b/lib/logo.nf deleted file mode 100644 index d8f9e814..00000000 --- a/lib/logo.nf +++ /dev/null @@ -1,24 +0,0 @@ -// editorconfig-checker-disable-file -def yavsap_logo() { - -tagline = "(Yet Another Viral Subspecies Analysis Pipeline)" -maxlength = tagline.length() - -figlet = -"""\ - __ __ ___ ______ _ ____ - \\ \\ / // \\ \\ / / ___| / \\ | _ \\ - \\ V // _ \\ \\ / /\\___ \\ / _ \\ | |_) | - | |/ ___ \\ V / ___) / ___ \\| __/ - |_/_/ \\_\\_/ |____/_/ \\_\\_|\ -""" - -version = "v${workflow.manifest.version}".center(maxlength) - -return \ -"""\ -${figlet} -${tagline} -${version}\ -""" -} diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar new file mode 100644 index 00000000..805c8bb5 Binary files /dev/null and b/lib/nfcore_external_java_deps.jar differ diff --git a/main.nf b/main.nf index 67f4dbfa..766f1f6d 100755 --- a/main.nf +++ b/main.nf @@ -1,238 +1,56 @@ #!/usr/bin/env nextflow -nextflow.enable.dsl = 2 - -include { ALIGNMENT } from './subworkflows/alignment.nf' -include { CLOSEST_REFERENCE } from './subworkflows/closest-reference.nf' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from './modules/nf-core/modules/custom/dumpsoftwareversions/main.nf' -include { FILTERING } from './subworkflows/filtering.nf' -include { GENOME_DOWNLOAD } from './subworkflows/reference.nf' -include { HAPLOTYPING } from './subworkflows/haplotype.nf' -include { KRAKEN2_DBPREPARATION } from './modules/local/kraken2/dbpreparation.nf' -include { MULTIQC } from './modules/nf-core/modules/multiqc/main.nf' -include { PHYLOGENETIC_TREE } from './subworkflows/phylogenetics.nf' -include { PRESENTATION } from './subworkflows/presentation.nf' -include { QC } from './subworkflows/qc.nf' -include { READS_INGEST } from './subworkflows/ingest.nf' -include { TRIMMING } from './subworkflows/trimming.nf' -include { cowsay } from './lib/cowsay.nf' -include { yavsap_logo } from './lib/logo.nf' - -cowsay(yavsap_logo()) - -if (params.help) { - log.info( - """\ - YAVSAP (Yet Another Viral Subspecies Analysis Pipeline) - Intra-sample viral - population analysis - - Usage: - - nextflow run ksumngs/yavsap - - Options: - - --input Relative or absolute path to directory containing - gzipped fastq files - type: path, default: . - - --platform Type of reads to process. Options are 'illumina' and - 'nanopore' - type: string, default: none - - --genome NCBI accession number of the reference genome to - align reads against - type: string, default: 'NC_001437.1' +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + nf-core/yavsap +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Github : https://github.com/nf-core/yavsap + Website: https://nf-co.re/yavsap + Slack : https://nfcore.slack.com/channels/yavsap +---------------------------------------------------------------------------------------- +*/ - --kraken2_db Kraken2-compatible database for classifying reads - type: path, default: none +nextflow.enable.dsl = 2 - --keep_taxid Space-separated list of NCBI taxids to keep and - process after classifying - type: string, default: '0 10239' +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + GENOME PARAMETER VALUES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ - --outdir Directory in which to place results - type: path, default: ./results +// params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') - --help Print this message and exit +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VALIDATE & PRINT PARAMETER SUMMARY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ - For more information on usage and parameters, visit the website at - https://ksumngs.github.io/yavsap - """.stripIndent() - ) +WorkflowMain.initialise(workflow, params, log) - exit 0 -} +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + NAMED WORKFLOW FOR PIPELINE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ -if (params.platform != 'illumina' && params.platform != 'nanopore') { - log.error "ERROR: --platform must be specified" - exit 1 -} +include { YAVSAP } from './workflows/yavsap' -log.info( - """\ - Input folder: ${params.input} - Sequencing platform: ${params.platform} - Reference genome: ${params.genome} - Kraken2 Database: ${params.kraken2_db} - Taxonomic Ids: '${params.keep_taxid}' - Output folder ${params.outdir} - Diagnostics folder: ${params.tracedir} - """.stripIndent() -) +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN ALL WORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ +// +// WORKFLOW: Execute a single named workflow for the pipeline +// See: https://github.com/nf-core/rnaseq/issues/619 +// workflow { - LogFiles = Channel.empty() - VersionFiles = Channel.empty() - - GENOME_DOWNLOAD() - ReferenceGenome = GENOME_DOWNLOAD.out.fasta - VersionFiles = VersionFiles.mix(GENOME_DOWNLOAD.out.versions) - - // Bring in the reads files - READS_INGEST() - RawReads = READS_INGEST.out.sample_info - VersionFiles = VersionFiles.mix(READS_INGEST.out.versions) - - if (!params.skip_qc) { - QC(RawReads) - LogFiles = LogFiles.mix(QC.out.report) - VersionFiles = VersionFiles.mix(QC.out.versions) - } - - if (!params.skip_trimming) { - TRIMMING(RawReads) - TRIMMING.out.fastq.set{ TrimmedReads } - LogFiles = LogFiles.mix(TRIMMING.out.log_out) - VersionFiles = VersionFiles.mix(TRIMMING.out.versions) - } - else { - RawReads.set { TrimmedReads } - } - - KronaChart = Channel.of([]) - - if (!params.skip_filtering) { - KrakenDb = file("${params.kraken2_db}", checkIfExists: true) - if (KrakenDb.isDirectory()) { - // This is a local database, and everything is ready to pass to the - // filtering process - - } - else { - if (KrakenDb.getExtension() == 'k2d') { - // The user got confused, and passed a database file, we'll try to - // correct it for them - log.warn "WARNING: ${params.kraken2_db} appears to be a file that is a *part* of a Kraken2 database." - log.warn " Kraken databases are folders that contain multiple files." - log.warn " YAVSAP will attempt to use the parent directory as the database, but it might fail!" - KrakenDb = KrakenDb.getParent() - } - else { - // We'll assume this is a tarballed database - KRAKEN2_DBPREPARATION(KrakenDb) - KrakenDb = KRAKEN2_DBPREPARATION.out.db - VersionFiles = VersionFiles.mix(KRAKEN2_DBPREPARATION.out.versions) - } - } - FILTERING( - TrimmedReads, - KrakenDb, - "${params.keep_taxid}" - ) - FILTERING.out.filtered.set { FilteredReads } - FILTERING.out.krona.set { KronaChart } - LogFiles = LogFiles.mix(FILTERING.out.log_out) - VersionFiles = VersionFiles.mix(FILTERING.out.versions) - } - else { - TrimmedReads.set { FilteredReads } - } - - ALIGNMENT(FilteredReads, ReferenceGenome) - ALIGNMENT.out.bam - .join(ALIGNMENT.out.bai) - .set { AlignedReads } - - VersionFiles = VersionFiles.mix(ALIGNMENT.out.versions) - - // Find the strain genomes list - genomePath = params.genome_list - genomeFile = file(genomePath, type: 'file') - if (genomeFile.toFile().exists()) { - genomeFile = [genomeFile] - } - else { - genomePath = "${workflow.projectDir}/genomes/${params.genome_list}*" - genomeFile = file(genomePath, checkIfExists: true, type: 'file') - } - - // Realign reads to their closest strain - CLOSEST_REFERENCE( - ALIGNMENT.out.bam, - ReferenceGenome, - genomeFile - ) - - VersionFiles = VersionFiles.mix(CLOSEST_REFERENCE.out.versions) - - HaplotypeFastas = Channel.empty() - HaplotypeYamls = Channel.empty() - - if (!params.skip_haplotype) { - HAPLOTYPING( - CLOSEST_REFERENCE.out.bam - .join( - CLOSEST_REFERENCE.out.bai - ), - CLOSEST_REFERENCE.out.fasta - ) - - HAPLOTYPING.out.fasta.set{ HaplotypeFastas } - HAPLOTYPING.out.yaml.set{ HaplotypeYamls } - - VersionFiles = VersionFiles.mix(HAPLOTYPING.out.versions) - } - - PhylogeneticTree = Channel.of([]) - - if (!params.skip_phylogenetics) { - PHYLOGENETIC_TREE( - HaplotypeFastas, - CLOSEST_REFERENCE.out.consensus_fasta, - CLOSEST_REFERENCE.out.genome_fasta, - genomeFile - ) - - PHYLOGENETIC_TREE.out.tree.set{ PhylogeneticTree } - - VersionFiles = VersionFiles.mix(PHYLOGENETIC_TREE.out.versions) - } - - LogFiles = LogFiles - .map{ (it instanceof Path) ? it : it.drop(1) } - .mix(Channel.of(file("${workflow.projectDir}/assets/multiqc_config.yml"))) - .flatten() - .collect() - - MULTIQC(LogFiles) - VersionFiles = VersionFiles.mix(MULTIQC.out.versions) - - // Note: The Visualizer cannot be output if haplotyping is skipped - PRESENTATION( - ALIGNMENT.out.bam, - ALIGNMENT.out.bai, - GENOME_DOWNLOAD.out.fasta, - GENOME_DOWNLOAD.out.fai, - CLOSEST_REFERENCE.out.consensus_fasta, - CLOSEST_REFERENCE.out.accession, - CLOSEST_REFERENCE.out.strain, - HaplotypeYamls, - HaplotypeFastas, - PhylogeneticTree, - MULTIQC.out.report, - KronaChart - ) - VersionFiles = VersionFiles.mix(PRESENTATION.out.versions) - - CUSTOM_DUMPSOFTWAREVERSIONS(VersionFiles.unique().collectFile(name: 'collated_versions.yml')) + YAVSAP() } + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ diff --git a/modules.json b/modules.json index 1f1419cd..5ece52d4 100644 --- a/modules.json +++ b/modules.json @@ -4,55 +4,58 @@ "repos": { "ksumngs/nf-modules": { "cat/fastq": { - "git_sha": "da5728117c6caaee428ed8729e88b93abc60c5fc" + "git_sha": "9bd90a7333479b2ba7f835d1ddfc3f869e992c56" + }, + "cliquesnv/consensusillumina": { + "git_sha": "2362072d4e1659d9096e047990e2959f56134820" }, "cliquesnv/illumina": { - "git_sha": "3c4ecd5017df87d5224100ab87bc4879bf043473" + "git_sha": "c1a3e48e237a64766199d83b391bdd07c5344f86" }, "cliquesnv/illuminavc": { - "git_sha": "0c47a6672c68c47a15d2ef3255b63f95b7bc1758" + "git_sha": "c1a3e48e237a64766199d83b391bdd07c5344f86" }, "edirect/efetch": { - "git_sha": "21873b5c2ed4c2defea089d6070a0f2c032aa11d" + "git_sha": "c1a3e48e237a64766199d83b391bdd07c5344f86" }, "edirect/esearch": { - "git_sha": "54e488eab3013f48124a4dfd54ec2b05e0a75b62" + "git_sha": "c1a3e48e237a64766199d83b391bdd07c5344f86" }, "kraken2": { - "git_sha": "21873b5c2ed4c2defea089d6070a0f2c032aa11d" + "git_sha": "e0b2138159eab5b54a4618f320895a308fcf53fa" }, "krakentools/extract": { - "git_sha": "181a04778a81494dc71b1d68b514b43cfef1512c" + "git_sha": "c1a3e48e237a64766199d83b391bdd07c5344f86" }, "krakentools/kreport2krona": { - "git_sha": "c6c9d4fad20eb77232756fc08403fbc4e9988ce2" + "git_sha": "deed98820b459fffdf8f218e5dccaab20ae1c04c" }, "krona/importtext": { - "git_sha": "f423673c57aba461cf16bd3bc628e36e33b20029" + "git_sha": "c1a3e48e237a64766199d83b391bdd07c5344f86" }, "minimap2/align": { - "git_sha": "ae81650d918dd11707cd61d61a4f2c966739de21" + "git_sha": "dbfcbf9c170606766bb4e59d43816064151c2cf5" }, "nanofilt": { - "git_sha": "8e2140d607a9bbbf82ddf1604609f22e7371f9fb" + "git_sha": "c1a3e48e237a64766199d83b391bdd07c5344f86" }, "nanostat": { - "git_sha": "02e9fc52eb0c57550c2820a62b865313ab764a85" + "git_sha": "c1a3e48e237a64766199d83b391bdd07c5344f86" }, "raxmlng/bootstrap": { - "git_sha": "7bc0cad69ab9dc65cdd5dd389f85766cf189612c" + "git_sha": "c1a3e48e237a64766199d83b391bdd07c5344f86" }, "raxmlng/parse": { - "git_sha": "3ad9561f34733ce646bb3149b1928c81eb052960" + "git_sha": "c1a3e48e237a64766199d83b391bdd07c5344f86" }, "raxmlng/search": { - "git_sha": "c807ee98a0c789a2b45795e7ec9592167f477675" + "git_sha": "c1a3e48e237a64766199d83b391bdd07c5344f86" }, "raxmlng/support": { - "git_sha": "1589b66c23d176829e73c2ce1c7db039cd237fed" + "git_sha": "c1a3e48e237a64766199d83b391bdd07c5344f86" }, "trimmomatic": { - "git_sha": "0f1c3ecb06bd5695a37dcb6dc9c4e75698aae905" + "git_sha": "c1a3e48e237a64766199d83b391bdd07c5344f86" } }, "nf-core/modules": { @@ -62,38 +65,26 @@ "blast/makeblastdb": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, - "cat/cat": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, "custom/dumpsoftwareversions": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "fastqc": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "ivar/consensus": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" }, "mafft": { - "git_sha": "841c661cad7181a36fd20bb306258d17b750c873" + "git_sha": "f0800157544a82ae222931764483331a81812012" }, "minimap2/align": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "1a5a9e7b4009dcf34e6867dd1a5a1d9a718b027b" }, "multiqc": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" }, "samtools/faidx": { - "git_sha": "1ad73f1b2abdea9398680d6d20014838135c9a35" - }, - "samtools/fastq": { "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" }, "samtools/index": { - "git_sha": "1ad73f1b2abdea9398680d6d20014838135c9a35" - }, - "samtools/sort": { - "git_sha": "1ad73f1b2abdea9398680d6d20014838135c9a35" + "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" }, "seqkit/split2": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" diff --git a/modules/ksumngs/nf-modules/cat/fastq/main.nf b/modules/ksumngs/nf-modules/cat/fastq/main.nf index e2d50200..d942d79e 100644 --- a/modules/ksumngs/nf-modules/cat/fastq/main.nf +++ b/modules/ksumngs/nf-modules/cat/fastq/main.nf @@ -57,7 +57,7 @@ process CAT_FASTQ { cat <<-END_VERSIONS > versions.yml "${task.process}": - gzip: echo \$(gzip --version | head -n1 | sed 's/^gzip //') + gzip: \$(gzip --version | head -n1 | sed 's/^gzip //') END_VERSIONS """ } diff --git a/modules/ksumngs/nf-modules/cliquesnv/consensusillumina/main.nf b/modules/ksumngs/nf-modules/cliquesnv/consensusillumina/main.nf new file mode 100644 index 00000000..53aecf2a --- /dev/null +++ b/modules/ksumngs/nf-modules/cliquesnv/consensusillumina/main.nf @@ -0,0 +1,38 @@ +process CLIQUESNV_CONSENSUSILLUMINA { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::cliquesnv=2.0.3" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cliquesnv:2.0.3--hdfd78af_0': + 'quay.io/biocontainers/cliquesnv:2.0.3--hdfd78af_0' }" + + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.fasta"), emit: fasta + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def jmemstring = task.memory.toMega() + 'M' + """ + cliquesnv \\ + -Xmx${jmemstring} \\ + -threads ${task.cpus} \\ + -m consensus-illumina \\ + -in ${bam} \\ + ${args} \\ + -outDir . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cliquesnv: \$(cliquesnv | head -n1 | sed 's/CliqueSNV version: //') + END_VERSIONS + """ +} diff --git a/modules/ksumngs/nf-modules/cliquesnv/consensusillumina/meta.yml b/modules/ksumngs/nf-modules/cliquesnv/consensusillumina/meta.yml new file mode 100644 index 00000000..825e9949 --- /dev/null +++ b/modules/ksumngs/nf-modules/cliquesnv/consensusillumina/meta.yml @@ -0,0 +1,41 @@ +name: cliquesnv_consensusillumina +description: Utility method to calculate consensus string base from reconstruction of Intra-Host Viral Populations with Illumina reads +keywords: + - viral + - consensus + - illumina +tools: + - cliquesnv: + description: "Scalable Reconstruction of Intra-Host Viral Populations from NGS Reads" + homepage: https://github.com/vtsyvina/CliqueSNV + doi: "10.1093/nar/gkab576" + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted bam file to call variants from + pattern: "*.bam" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fasta: + type: file + description: Consensus sequence in FASTA format + pattern: "*.{fasta}" + +authors: + - "@MillironX" diff --git a/modules/ksumngs/nf-modules/cliquesnv/illumina/meta.yml b/modules/ksumngs/nf-modules/cliquesnv/illumina/meta.yml index 0d4bf3dd..e3fff7b5 100644 --- a/modules/ksumngs/nf-modules/cliquesnv/illumina/meta.yml +++ b/modules/ksumngs/nf-modules/cliquesnv/illumina/meta.yml @@ -9,7 +9,7 @@ tools: description: Scalable Reconstruction of Intra-Host Viral Populations from NGS Reads homepage: https://github.com/vtsyvina/CliqueSNV doi: "10.1093/nar/gkab576 " - licence: ['MIT'] + licence: ["MIT"] input: - meta: diff --git a/modules/ksumngs/nf-modules/cliquesnv/illuminavc/meta.yml b/modules/ksumngs/nf-modules/cliquesnv/illuminavc/meta.yml index 4781f02e..114db03a 100644 --- a/modules/ksumngs/nf-modules/cliquesnv/illuminavc/meta.yml +++ b/modules/ksumngs/nf-modules/cliquesnv/illuminavc/meta.yml @@ -9,7 +9,7 @@ tools: description: Scalable Reconstruction of Intra-Host Viral Populations from NGS Reads homepage: https://github.com/vtsyvina/CliqueSNV doi: "10.1093/nar/gkab576 " - licence: ['MIT'] + licence: ["MIT"] input: - meta: diff --git a/modules/ksumngs/nf-modules/edirect/efetch/meta.yml b/modules/ksumngs/nf-modules/edirect/efetch/meta.yml index 960abd68..e9af68bf 100644 --- a/modules/ksumngs/nf-modules/edirect/efetch/meta.yml +++ b/modules/ksumngs/nf-modules/edirect/efetch/meta.yml @@ -8,7 +8,7 @@ tools: description: Entrez Direct (EDirect) is an advanced method for accessing the NCBI's set of interconnected databases (publication, sequence, structure, gene, variation, expression, etc.) from a UNIX terminal window. Functions take search terms from command-line arguments. Individual operations are combined to build multi-step queries. Record retrieval and formatting normally complete the process. homepage: https://eutils.ncbi.nlm.nih.gov/ documentation: https://www.ncbi.nlm.nih.gov/books/NBK179288/ - licence: ['PUBLIC DOMAIN'] + licence: ["PUBLIC DOMAIN"] input: - search: diff --git a/modules/ksumngs/nf-modules/edirect/esearch/meta.yml b/modules/ksumngs/nf-modules/edirect/esearch/meta.yml index 596ce640..680b7780 100644 --- a/modules/ksumngs/nf-modules/edirect/esearch/meta.yml +++ b/modules/ksumngs/nf-modules/edirect/esearch/meta.yml @@ -8,7 +8,7 @@ tools: description: Entrez Direct (EDirect) is an advanced method for accessing the NCBI's set of interconnected databases (publication, sequence, structure, gene, variation, expression, etc.) from a UNIX terminal window. Functions take search terms from command-line arguments. Individual operations are combined to build multi-step queries. Record retrieval and formatting normally complete the process. homepage: https://eutils.ncbi.nlm.nih.gov/ documentation: https://www.ncbi.nlm.nih.gov/books/NBK179288/ - licence: ['PUBLIC DOMAIN'] + licence: ["PUBLIC DOMAIN"] input: - query: diff --git a/modules/ksumngs/nf-modules/kraken2/meta.yml b/modules/ksumngs/nf-modules/kraken2/meta.yml index e820dd83..df9983a2 100644 --- a/modules/ksumngs/nf-modules/kraken2/meta.yml +++ b/modules/ksumngs/nf-modules/kraken2/meta.yml @@ -1,4 +1,4 @@ -name: kraken2_kraken2 +name: kraken2 description: Classifies metagenomic sequence data keywords: - classify diff --git a/modules/ksumngs/nf-modules/krakentools/extract/meta.yml b/modules/ksumngs/nf-modules/krakentools/extract/meta.yml index 859a1b63..fc6356b5 100644 --- a/modules/ksumngs/nf-modules/krakentools/extract/meta.yml +++ b/modules/ksumngs/nf-modules/krakentools/extract/meta.yml @@ -9,7 +9,7 @@ tools: description: KrakenTools is a suite of scripts to be used for post-analysis of Kraken/KrakenUniq/Kraken2/Bracken results. Please cite the relevant paper if using KrakenTools with any of the listed programs. homepage: https://ccb.jhu.edu/software/krakentools/ tool_dev_url: https://github.com/jenniferlu717/KrakenTools - licence: ['GPL v3'] + licence: ["GPL v3"] input: - meta: @@ -25,8 +25,8 @@ input: type: file description: Kraken output describing the classification of each reads - kreport: - type: file - description: Kraken report with aggregate classification stats + type: file + description: Kraken report with aggregate classification stats - taxids: type: string description: Space-separated list of taxonomic ids to extract diff --git a/modules/ksumngs/nf-modules/krakentools/kreport2krona/meta.yml b/modules/ksumngs/nf-modules/krakentools/kreport2krona/meta.yml index 4342de89..aa4de6a9 100644 --- a/modules/ksumngs/nf-modules/krakentools/kreport2krona/meta.yml +++ b/modules/ksumngs/nf-modules/krakentools/kreport2krona/meta.yml @@ -1,32 +1,27 @@ name: krakentools_kreport2krona -## TODO nf-core: Add a description of the module and list keywords -description: write your description here +description: Takes a Kraken report file and prints out a krona-compatible TEXT file keywords: - - sort + - kraken + - krona + - metagenomics + - visualization tools: - krakentools: - ## TODO nf-core: Add a description and other details for the software below description: KrakenTools is a suite of scripts to be used for post-analysis of Kraken/KrakenUniq/Kraken2/Bracken results. Please cite the relevant paper if using KrakenTools with any of the listed programs. - homepage: None - documentation: None - tool_dev_url: None - doi: "" - licence: ['GPL v3'] + homepage: https://github.com/jenniferlu717/KrakenTools + licence: ["GPL v3"] -## TODO nf-core: Add a description of all of the variables used as input input: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - ## TODO nf-core: Delete / customise this example input - - bam: + - kreport: type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + description: Kraken report + pattern: "*.{txt,kreport}" -## TODO nf-core: Add a description of all of the variables used as output output: - meta: type: map @@ -37,11 +32,10 @@ output: type: file description: File containing software versions pattern: "versions.yml" - ## TODO nf-core: Delete / customise this example output - - bam: + - krona: type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + description: Krona text file converted from Kraken report + pattern: "*.{krona}" authors: - "@MillironX" diff --git a/modules/ksumngs/nf-modules/krona/importtext/meta.yml b/modules/ksumngs/nf-modules/krona/importtext/meta.yml index 9bf434ba..12817e01 100644 --- a/modules/ksumngs/nf-modules/krona/importtext/meta.yml +++ b/modules/ksumngs/nf-modules/krona/importtext/meta.yml @@ -8,7 +8,7 @@ tools: - krona: description: Krona Tools is a set of scripts to create Krona charts from several Bioinformatics tools as well as from text and XML files. homepage: https://github.com/marbl/Krona/wiki - licence: ['BSD'] + licence: ["BSD"] input: - krona: diff --git a/modules/ksumngs/nf-modules/minimap2/align/main.nf b/modules/ksumngs/nf-modules/minimap2/align/main.nf index 40267fa3..254db8e4 100644 --- a/modules/ksumngs/nf-modules/minimap2/align/main.nf +++ b/modules/ksumngs/nf-modules/minimap2/align/main.nf @@ -9,17 +9,21 @@ process MINIMAP2_ALIGN { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? 'bioconda::minimap2=2.21' : null) + conda (params.enable_conda ? 'bioconda::minimap2=2.21 bioconda::samtools=1.12' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/minimap2:2.21--h5bf99c6_0' : - 'quay.io/biocontainers/minimap2:2.21--h5bf99c6_0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' : + 'quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' }" input: tuple val(meta), path(reads), path(reference) + val bam_format + val cigar_paf_format + val cigar_bam output: - tuple val(meta), path("*.paf"), emit: paf - path "versions.yml" , emit: versions + tuple val(meta), path("*.paf"), optional: true, emit: paf + tuple val(meta), path("*.bam"), optional: true, emit: bam + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -28,13 +32,19 @@ process MINIMAP2_ALIGN { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def input_reads = meta.single_end ? "$reads" : "${reads[0]} ${reads[1]}" + def bam_output = bam_format ? "-a | samtools sort | samtools view -@ ${task.cpus} -b -h -o ${prefix}.bam" : "-o ${prefix}.paf" + def cigar_paf = cigar_paf_format && !bam_format ? "-c" : '' + def set_cigar_bam = cigar_bam && bam_format ? "-L" : '' """ minimap2 \\ $args \\ -t $task.cpus \\ $reference \\ $input_reads \\ - > ${prefix}.paf + $cigar_paf \\ + $set_cigar_bam \\ + $bam_output + cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/ksumngs/nf-modules/minimap2/align/meta.yml b/modules/ksumngs/nf-modules/minimap2/align/meta.yml index e8a3f316..f5f888f1 100644 --- a/modules/ksumngs/nf-modules/minimap2/align/meta.yml +++ b/modules/ksumngs/nf-modules/minimap2/align/meta.yml @@ -29,6 +29,17 @@ input: type: file description: | Reference database in FASTA format. + - bam_format: + type: boolean + description: Specify that output should be in BAM format + - cigar_paf_format: + type: boolean + description: Specify that output CIGAR should be in PAF format + - cigar_bam: + type: boolean + description: | + Write CIGAR with >65535 ops at the CG tag. This is recommended when + doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations) output: - meta: type: map @@ -39,10 +50,17 @@ output: type: file description: Alignment in PAF format pattern: "*.paf" + - bam: + type: file + description: Alignment in BAM format + pattern: "*.bam" - versions: type: file description: File containing software versions pattern: "versions.yml" authors: - "@heuermh" + - "@sofstam" + - "@sateeshperi" + - "@jfy133" - "@MillironX" diff --git a/modules/ksumngs/nf-modules/nanofilt/meta.yml b/modules/ksumngs/nf-modules/nanofilt/meta.yml index da1c439a..c5546c86 100644 --- a/modules/ksumngs/nf-modules/nanofilt/meta.yml +++ b/modules/ksumngs/nf-modules/nanofilt/meta.yml @@ -8,7 +8,7 @@ tools: homepage: https://gigabaseorgigabyte.wordpress.com/2017/06/05/trimming-and-filtering-oxford-nanopore-sequencing-reads/ tool_dev_url: https://github.com/wdecoster/nanofilt doi: "10.1093/bioinformatics/bty149" - licence: ['MIT'] + licence: ["MIT"] input: - meta: @@ -40,6 +40,5 @@ output: description: File with stats on filtered reads pattern: "*.{log}" - authors: - "@MillironX" diff --git a/modules/ksumngs/nf-modules/nanostat/main.nf b/modules/ksumngs/nf-modules/nanostat/main.nf index 8b2543d9..d666c006 100644 --- a/modules/ksumngs/nf-modules/nanostat/main.nf +++ b/modules/ksumngs/nf-modules/nanostat/main.nf @@ -11,7 +11,7 @@ process NANOSTAT { tuple val(meta), path(reads) output: - tuple val(meta), path("*.log"), emit: log + tuple val(meta), path("*_NanoStats"), emit: log path "versions.yml" , emit: versions when: @@ -33,7 +33,7 @@ process NANOSTAT { -t ${task.cpus} \\ --${analysis_flag} ${reads} \\ ${args} \\ - > ${prefix}_nanostat.log + > ${prefix}_NanoStats cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/ksumngs/nf-modules/nanostat/meta.yml b/modules/ksumngs/nf-modules/nanostat/meta.yml index 2f5718e5..a447b719 100644 --- a/modules/ksumngs/nf-modules/nanostat/meta.yml +++ b/modules/ksumngs/nf-modules/nanostat/meta.yml @@ -8,7 +8,7 @@ tools: description: Calculate statistics for Oxford Nanopore sequencing data and alignments homepage: https://github.com/wdecoster/nanostat doi: "10.1093/bioinformatics/bty149" - licence: ['MIT'] + licence: ["MIT"] input: - meta: @@ -36,6 +36,5 @@ output: description: Read/alignment statistics report pattern: "*.{log}" - authors: - "@MillironX" diff --git a/modules/ksumngs/nf-modules/raxmlng/bootstrap/meta.yml b/modules/ksumngs/nf-modules/raxmlng/bootstrap/meta.yml index e5f4d973..bc3a48f6 100644 --- a/modules/ksumngs/nf-modules/raxmlng/bootstrap/meta.yml +++ b/modules/ksumngs/nf-modules/raxmlng/bootstrap/meta.yml @@ -6,7 +6,7 @@ keywords: - maximum likelihood tools: - raxmlng: - description: 'RAxML Next Generation: faster, easier-to-use and more flexible' + description: "RAxML Next Generation: faster, easier-to-use and more flexible" homepage: https://github.com/amkozlov/raxml-ng documentation: https://github.com/amkozlov/raxml-ng/wiki tool_dev_url: https://github.com/amkozlov/raxml-ng diff --git a/modules/ksumngs/nf-modules/raxmlng/parse/meta.yml b/modules/ksumngs/nf-modules/raxmlng/parse/meta.yml index ddb50c7d..44553d82 100644 --- a/modules/ksumngs/nf-modules/raxmlng/parse/meta.yml +++ b/modules/ksumngs/nf-modules/raxmlng/parse/meta.yml @@ -8,7 +8,7 @@ keywords: - maximum likelihood tools: - raxmlng: - description: 'RAxML Next Generation: faster, easier-to-use and more flexible' + description: "RAxML Next Generation: faster, easier-to-use and more flexible" homepage: https://github.com/amkozlov/raxml-ng documentation: https://github.com/amkozlov/raxml-ng/wiki tool_dev_url: https://github.com/amkozlov/raxml-ng diff --git a/modules/ksumngs/nf-modules/raxmlng/search/meta.yml b/modules/ksumngs/nf-modules/raxmlng/search/meta.yml index 2d7f592b..16065172 100644 --- a/modules/ksumngs/nf-modules/raxmlng/search/meta.yml +++ b/modules/ksumngs/nf-modules/raxmlng/search/meta.yml @@ -6,7 +6,7 @@ keywords: - maximum likelihood tools: - raxmlng: - description: 'RAxML Next Generation: faster, easier-to-use and more flexible' + description: "RAxML Next Generation: faster, easier-to-use and more flexible" homepage: https://github.com/amkozlov/raxml-ng documentation: https://github.com/amkozlov/raxml-ng/wiki tool_dev_url: https://github.com/amkozlov/raxml-ng diff --git a/modules/ksumngs/nf-modules/raxmlng/support/meta.yml b/modules/ksumngs/nf-modules/raxmlng/support/meta.yml index 2f4f879e..ea2d15ff 100644 --- a/modules/ksumngs/nf-modules/raxmlng/support/meta.yml +++ b/modules/ksumngs/nf-modules/raxmlng/support/meta.yml @@ -6,7 +6,7 @@ keywords: - maximum likelihood tools: - raxmlng: - description: 'RAxML Next Generation: faster, easier-to-use and more flexible' + description: "RAxML Next Generation: faster, easier-to-use and more flexible" homepage: https://github.com/amkozlov/raxml-ng documentation: https://github.com/amkozlov/raxml-ng/wiki tool_dev_url: https://github.com/amkozlov/raxml-ng diff --git a/modules/ksumngs/nf-modules/trimmomatic/main.nf b/modules/ksumngs/nf-modules/trimmomatic/main.nf index e2c0068d..2078e03b 100644 --- a/modules/ksumngs/nf-modules/trimmomatic/main.nf +++ b/modules/ksumngs/nf-modules/trimmomatic/main.nf @@ -34,7 +34,7 @@ process TRIMMOMATIC { ${reads} \\ ${trimmed} \\ ${args} \\ - 2> ${prefix}.trimmomatic.log + 2> >(tee ${prefix}.log >&2) cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/ksumngs/nf-modules/trimmomatic/meta.yml b/modules/ksumngs/nf-modules/trimmomatic/meta.yml index 5a1d49aa..739eddc4 100644 --- a/modules/ksumngs/nf-modules/trimmomatic/meta.yml +++ b/modules/ksumngs/nf-modules/trimmomatic/meta.yml @@ -8,7 +8,7 @@ tools: homepage: http://www.usadellab.org/cms/?page=trimmomatic tool_dev_url: https://github.com/usadellab/Trimmomatic doi: "10.1093/bioinformatics/btu170" - licence: ['GPL v3'] + licence: ["GPL v3"] input: - meta: diff --git a/modules/local/haplink/consensus.nf b/modules/local/haplink/consensus.nf new file mode 100644 index 00000000..98ff8daf --- /dev/null +++ b/modules/local/haplink/consensus.nf @@ -0,0 +1,33 @@ +process HAPLINK_CONSENSUS { + tag "$meta.id" + label 'process_low' + + container 'quay.io/millironx/haplink:0.7.0' + + input: + tuple val(meta), path(variantcalls), path(reference) + + output: + tuple val(meta), path("*.fasta"), emit: fasta + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + haplink consensus \\ + --reference ${reference} \\ + --variants ${variantcalls} \\ + --prefix ${prefix} \\ + --output ${prefix}.consensus.fasta \\ + ${args} --julia-args -t${task.cpus} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + haplink: \$(haplink --version) + END_VERSIONS + """ +} diff --git a/modules/local/modules/haplink/haplotypes/main.nf b/modules/local/haplink/haplotypes.nf similarity index 87% rename from modules/local/modules/haplink/haplotypes/main.nf rename to modules/local/haplink/haplotypes.nf index 8668ee3c..691d7caf 100644 --- a/modules/local/modules/haplink/haplotypes/main.nf +++ b/modules/local/haplink/haplotypes.nf @@ -2,7 +2,7 @@ process HAPLINK_HAPLOTYPES { tag "$meta.id" label 'process_high' - container 'quay.io/millironx/haplink:0.6.1' + container 'quay.io/millironx/haplink:0.7.0' input: tuple val(meta), path(bam), path(vcf), path(reference) @@ -23,8 +23,7 @@ process HAPLINK_HAPLOTYPES { --variants "${vcf}" \\ --reference "${reference}" \\ --output "${prefix}.haplotypes.yaml" \\ - ${args} \\ - --julia-args -t${task.cpus} + ${args} --julia-args -t${task.cpus} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/modules/haplink/sequences/main.nf b/modules/local/haplink/sequences.nf similarity index 93% rename from modules/local/modules/haplink/sequences/main.nf rename to modules/local/haplink/sequences.nf index 4537263a..ba61e385 100644 --- a/modules/local/modules/haplink/sequences/main.nf +++ b/modules/local/haplink/sequences.nf @@ -2,7 +2,7 @@ process HAPLINK_SEQUENCES { tag "$meta.id" label 'process_low' - container 'quay.io/millironx/haplink:0.6.1' + container 'quay.io/millironx/haplink:0.7.0' input: tuple val(meta), file(yaml), file(reference) diff --git a/modules/local/modules/haplink/variants/main.nf b/modules/local/haplink/variants.nf similarity index 86% rename from modules/local/modules/haplink/variants/main.nf rename to modules/local/haplink/variants.nf index 385159cd..44d35c8d 100644 --- a/modules/local/modules/haplink/variants/main.nf +++ b/modules/local/haplink/variants.nf @@ -2,7 +2,7 @@ process HAPLINK_VARIANTS { tag "$meta.id" label 'process_medium' - container 'quay.io/millironx/haplink:0.6.1' + container 'quay.io/millironx/haplink:0.7.0' input: tuple val(meta), file(bam), file(bai), file(reference) @@ -22,8 +22,7 @@ process HAPLINK_VARIANTS { --bam ${bam} \\ --reference ${reference} \\ --output ${prefix}.vcf \\ - ${args} \\ - --julia-args -t${task.cpus} + ${args} --julia-args -t${task.cpus} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/haplotype-yaml2tsv.nf b/modules/local/haplotype-yaml2tsv.nf deleted file mode 100644 index e0fe41d0..00000000 --- a/modules/local/haplotype-yaml2tsv.nf +++ /dev/null @@ -1,30 +0,0 @@ -process HAPLOTYPE_YAML2TSV { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "conda-forge::pyaml=15.8.2" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pyaml:15.8.2--py36_0' : - 'quay.io/biocontainers/pyaml:15.8.2--py36_0' }" - - input: - tuple val(meta), path(yaml) - - output: - tuple val(meta), path("*.tsv"), emit: tsv - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - haplotype-parser ${prefix} ${yaml} ${prefix}.tsv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ -} diff --git a/modules/local/haplotypeconvert.nf b/modules/local/haplotypeconvert.nf new file mode 100644 index 00000000..7ae58d02 --- /dev/null +++ b/modules/local/haplotypeconvert.nf @@ -0,0 +1,41 @@ +process HAPLOTYPECONVERT { + tag "$meta.id" + label 'process_low' + + container 'quay.io/millironx/biojulia:1.6.6-2.0.5-9877308' + + input: + tuple val(meta), val(strain), val(ncbi), path(consensus), path(haplotype_fasta), path(haplotype_yaml) + path(reference) + + output: + tuple val(meta), path("*.yaml"), emit: yaml + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def fasta = haplotype_fasta ?: 'nothing.fasta' + def yaml = haplotype_yaml ?: 'nothing.yaml' + """ + haplotypestandardizer \\ + ${meta.id} \\ + ${reference} \\ + ${strain} \\ + ${ncbi} \\ + ${consensus} \\ + ${yaml} \\ + ${fasta} \\ + ${prefix}.haplotypes.std.yaml + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + julia: \$(julia -v | awk '{print \$3}') + "BioAlignments.jl": \$(julia -e 'using Pkg, UUIDs; println(string(Pkg.dependencies()[UUID("00701ae9-d1dc-5365-b64a-a3a3ebf5695e")].version))') + "FASTX.jl": \$(julia -e 'using Pkg, UUIDs; println(string(Pkg.dependencies()[UUID("c2308a5c-f048-11e8-3e8a-31650f418d12")].version))') + "YAML.jl": \$(julia -e 'using Pkg, UUIDs; println(string(Pkg.dependencies()[UUID("ddb6d928-2868-570f-bddf-ab3f9cf99eb6")].version))') + END_VERSIONS + """ +} diff --git a/modules/local/igv.nf b/modules/local/igv.nf new file mode 100644 index 00000000..f58cea91 --- /dev/null +++ b/modules/local/igv.nf @@ -0,0 +1,33 @@ +process IGV { + tag "$samplelist" + label 'process_low' + + container 'quay.io/millironx/juliapro:1.6.6-2ed3693' + input: + path(samplelist, stageAs: 'samples.txt') + path(igvjs, stageAs: 'igv.js') + path(template, stageAs: 'template.html') + + output: + path "*_mqc.html", emit: mqc_html + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + igvgen \\ + ${samplelist} \\ + ${igvjs} \\ + ${template} \\ + igv_mqc.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + julia: \$(julia -v | awk '{print \$3}') + "JSON3.jl": \$(julia -e 'using Pkg, UUIDs; println(string(Pkg.dependencies()[UUID("0f8b85d8-7281-11e9-16c2-39a750bddbf1")].version))') + "Mustache.jl": \$(julia -e 'using Pkg, UUIDs; println(string(Pkg.dependencies()[UUID("ffc61752-8dc7-55ee-8c37-f3e9cdd09e70")].version))') + END_VERSIONS + """ +} diff --git a/modules/local/json2yaml.nf b/modules/local/json2yaml.nf index 6d26c89e..5f6e1269 100644 --- a/modules/local/json2yaml.nf +++ b/modules/local/json2yaml.nf @@ -24,7 +24,8 @@ process JSON2YAML { cat <<-END_VERSIONS > versions.yml "${task.process}": - python: \$(python --version | sed 's/Python //g') + python: \$(python --version 2>&1 | sed 's/Python //g' | sed 's/ ::.*//g') + pyyaml: \$(printf "import yaml\\nprint(yaml.__version__)" | python) END_VERSIONS """ } diff --git a/modules/local/phylotreejs.nf b/modules/local/phylotreejs.nf new file mode 100644 index 00000000..1a051cdc --- /dev/null +++ b/modules/local/phylotreejs.nf @@ -0,0 +1,39 @@ +process PHYLOTREEJS { + tag "${newick}" + label 'process_low' + + container 'quay.io/millironx/juliapro:1.6.6-2ed3693' + + input: + path(newick, stageAs: 'tree.nwk') + path(template, stageAs: 'template.html') + path(css, stageAs: 'styles.css') + path(d3, stageAs: 'd3.js') + path(underscore, stageAs: 'underscore.js') + path(phylotree, stageAs: 'phylotree.js') + + output: + path "*_mqc.html", emit: mqc_html + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + phylotreegen \\ + ${newick} \\ + ${template} \\ + phylotree_mqc.html \\ + ${css} \\ + ${d3} \\ + ${underscore} \\ + ${phylotree} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + julia: \$(julia -v | awk '{print \$3}') + "Mustache.jl": \$(julia -e 'using Pkg, UUIDs; println(string(Pkg.dependencies()[UUID("ffc61752-8dc7-55ee-8c37-f3e9cdd09e70")].version))') + END_VERSIONS + """ +} diff --git a/modules/local/rename-haplotypes.nf b/modules/local/rename-haplotypes.nf deleted file mode 100644 index 50a67b1c..00000000 --- a/modules/local/rename-haplotypes.nf +++ /dev/null @@ -1,28 +0,0 @@ -process RENAME_HAPLOTYPES { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "conda-forge::r-phylotools=0.2.2" : null) - container 'docker.io/biocontainers/phylotools:v0.2.4_cv1' - - input: - tuple val(meta), path(fasta, stageAs: 'input.fasta') - - output: - tuple val(meta), path("*.fasta"), emit: fasta - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - renamehapseqs input.fasta ${prefix} ${prefix}.fasta - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/rename-ncbi.nf b/modules/local/rename-ncbi.nf deleted file mode 100644 index cba00df0..00000000 --- a/modules/local/rename-ncbi.nf +++ /dev/null @@ -1,28 +0,0 @@ -process RENAME_NCBI { - tag "$fasta" - label 'process_low' - - conda (params.enable_conda ? "conda-forge::r-phylotools=0.2.2" : null) - container 'docker.io/biocontainers/phylotools:v0.2.4_cv1' - - input: - path(fasta) - path(tsv) - - output: - path "renamed.fasta", emit: fasta - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - """ - renamerefseqs ${fasta} ${tsv} renamed.fasta - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/sequencetable.nf b/modules/local/sequencetable.nf index e9b5df14..e9ec23cb 100644 --- a/modules/local/sequencetable.nf +++ b/modules/local/sequencetable.nf @@ -1,42 +1,42 @@ process SEQUENCETABLE { - tag "$tsv" + tag "$haplotypes" label 'process_low' cache false - container 'quay.io/millironx/biojulia:1.6.6-1.1.4-9409225' + container 'quay.io/millironx/biojulia:1.6.6-2.0.5-9877308' input: - path tsv - path sam - path reference - path tree - path multiqc - path krona + path(haplotypes, stageAs: 'haplotypes.yml') + path(reference, stageAs: 'reference.fasta') + path(template, stageAs: 'template.html') + path(toolmeta, stageAs: 'tool.yml') + path(freezetable_js, stageAs: 'freezetable.jquery.js') output: - path "*.html", emit: html - path "versions.yml" , emit: versions + path "*_mqc.html", emit: mqc_html + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when script: - multiqc_flag = multiqc ? '--multiqc' : '--no-multiqc' - krona_flag = krona ? '--krona' : '--no-krona' - tree_flag = tree ? "--newick ${tree}" : '' """ - sequence-table.jl \\ - ${tsv} \\ - ${sam} \\ + sequencetable \\ + ${haplotypes} \\ ${reference} \\ - ${multiqc_flag} \\ - ${krona_flag} \\ - ${tree_flag} \\ - > seq-table.partial.html + ${template} \\ + ${toolmeta} \\ + ${freezetable_js} \\ + sequencetable_mqc.html cat <<-END_VERSIONS > versions.yml "${task.process}": julia: \$(julia -v | awk '{print \$3}') + "EzXML.jl": \$(julia -e 'using Pkg, UUIDs; println(string(Pkg.dependencies()[UUID("8f5d6c58-4d21-5cfd-889c-e3ad7ee6a615")].version))') + "FASTX.jl": \$(julia -e 'using Pkg, UUIDs; println(string(Pkg.dependencies()[UUID("c2308a5c-f048-11e8-3e8a-31650f418d12")].version))') + "Kelpie.jl": \$(julia -e 'using Pkg, UUIDs; println(string(Pkg.dependencies()[UUID("1b112299-d6bc-44e2-912a-478f25731460")].version))') + "Mustache.jl": \$(julia -e 'using Pkg, UUIDs; println(string(Pkg.dependencies()[UUID("ffc61752-8dc7-55ee-8c37-f3e9cdd09e70")].version))') + "YAML.jl": \$(julia -e 'using Pkg, UUIDs; println(string(Pkg.dependencies()[UUID("ddb6d928-2868-570f-bddf-ab3f9cf99eb6")].version))') END_VERSIONS """ } diff --git a/modules/nf-core/modules/cat/cat/main.nf b/modules/nf-core/modules/cat/cat/main.nf deleted file mode 100644 index 4ee44599..00000000 --- a/modules/nf-core/modules/cat/cat/main.nf +++ /dev/null @@ -1,48 +0,0 @@ -process CAT_CAT { - label 'process_low' - - conda (params.enable_conda ? "conda-forge::pigz=2.3.4" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pigz:2.3.4' : - 'quay.io/biocontainers/pigz:2.3.4' }" - - input: - path files_in - val file_out - - output: - path "${file_out}*" , emit: file_out - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - def file_list = files_in.collect { it.toString() } - - // | input | output | command1 | command2 | - // |-----------|------------|----------|----------| - // | gzipped | gzipped | cat | | - // | ungzipped | ungzipped | cat | | - // | gzipped | ungzipped | zcat | | - // | ungzipped | gzipped | cat | pigz | - - def in_zip = file_list[0].endsWith('.gz') - def out_zip = file_out.endsWith('.gz') - def command1 = (in_zip && !out_zip) ? 'zcat' : 'cat' - def command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : '' - """ - $command1 \\ - $args \\ - ${file_list.join(' ')} \\ - $command2 \\ - > $file_out - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/cat/cat/meta.yml b/modules/nf-core/modules/cat/cat/meta.yml deleted file mode 100644 index e0a6361d..00000000 --- a/modules/nf-core/modules/cat/cat/meta.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: cat_cat -description: A module for concatenation of gzipped or uncompressed files -keywords: - - concatenate - - gzip - - cat -tools: - - cat: - description: Just concatenation - homepage: None - documentation: https://man7.org/linux/man-pages/man1/cat.1.html - tool_dev_url: None - licence: ["GPL-3.0-or-later"] -input: - - files_in: - type: file - description: List of compressed / uncompressed files - pattern: "*" - - file_out: - type: value - description: Full name of output file with or without .gz extension - -output: - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - file_out: - type: file - description: Concatenated file. Will be gzipped if file_out ends with ".gz" - pattern: "${file_out}" - -authors: - - "@erikrikarddaniel" diff --git a/modules/nf-core/modules/fastqc/main.nf b/modules/nf-core/modules/fastqc/main.nf index ed6b8c50..05730368 100644 --- a/modules/nf-core/modules/fastqc/main.nf +++ b/modules/nf-core/modules/fastqc/main.nf @@ -44,4 +44,16 @@ process FASTQC { END_VERSIONS """ } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.html + touch ${prefix}.zip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/ivar/consensus/main.nf b/modules/nf-core/modules/ivar/consensus/main.nf deleted file mode 100644 index db6301e9..00000000 --- a/modules/nf-core/modules/ivar/consensus/main.nf +++ /dev/null @@ -1,46 +0,0 @@ -process IVAR_CONSENSUS { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::ivar=1.3.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ivar:1.3.1--h089eab3_0' : - 'quay.io/biocontainers/ivar:1.3.1--h089eab3_0' }" - - input: - tuple val(meta), path(bam) - path fasta - val save_mpileup - - output: - tuple val(meta), path("*.fa") , emit: fasta - tuple val(meta), path("*.qual.txt"), emit: qual - tuple val(meta), path("*.mpileup") , optional:true, emit: mpileup - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def mpileup = save_mpileup ? "| tee ${prefix}.mpileup" : "" - """ - samtools \\ - mpileup \\ - --reference $fasta \\ - $args2 \\ - $bam \\ - $mpileup \\ - | ivar \\ - consensus \\ - $args \\ - -p $prefix - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - ivar: \$(echo \$(ivar version 2>&1) | sed 's/^.*iVar version //; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/ivar/consensus/meta.yml b/modules/nf-core/modules/ivar/consensus/meta.yml deleted file mode 100644 index fb562603..00000000 --- a/modules/nf-core/modules/ivar/consensus/meta.yml +++ /dev/null @@ -1,56 +0,0 @@ -name: ivar_consensus -description: Generate a consensus sequence from a BAM file using iVar -keywords: - - amplicon sequencing - - consensus - - fasta -tools: - - ivar: - description: | - iVar - a computational package that contains functions broadly useful for viral amplicon-based sequencing. - homepage: https://github.com/andersen-lab/ivar - documentation: https://andersen-lab.github.io/ivar/html/manualpage.html - licence: ["GPL-3.0-or-later"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: A sorted (with samtools sort) and trimmed (with iVar trim) bam file - pattern: "*.bam" - - fasta: - type: file - description: The reference sequence used for mapping and generating the BAM file - pattern: "*.fa" - - save_mpileup: - type: boolean - description: Save mpileup file generated by ivar consensus - patter: "*.mpileup" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: iVar generated consensus sequence - pattern: "*.fa" - - qual: - type: file - description: iVar generated quality file - pattern: "*.qual.txt" - - mpileup: - type: file - description: mpileup output from samtools mpileup [OPTIONAL] - pattern: "*.mpileup" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@andersgs" - - "@drpatelh" diff --git a/modules/nf-core/modules/mafft/meta.yml b/modules/nf-core/modules/mafft/meta.yml index 10c7f0c2..66bb10b9 100644 --- a/modules/nf-core/modules/mafft/meta.yml +++ b/modules/nf-core/modules/mafft/meta.yml @@ -10,7 +10,7 @@ tools: documentation: https://mafft.cbrc.jp/alignment/software/manual/manual.html tool_dev_url: https://mafft.cbrc.jp/alignment/software/source.html doi: "10.1093/nar/gkf436" - licence: ['BSD'] + licence: ["BSD"] input: - meta: diff --git a/modules/nf-core/modules/minimap2/align/main.nf b/modules/nf-core/modules/minimap2/align/main.nf index fe06f14d..08ac6eef 100644 --- a/modules/nf-core/modules/minimap2/align/main.nf +++ b/modules/nf-core/modules/minimap2/align/main.nf @@ -2,18 +2,22 @@ process MINIMAP2_ALIGN { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? 'bioconda::minimap2=2.21' : null) + conda (params.enable_conda ? 'bioconda::minimap2=2.21 bioconda::samtools=1.12' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/minimap2:2.21--h5bf99c6_0' : - 'quay.io/biocontainers/minimap2:2.21--h5bf99c6_0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' : + 'quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' }" input: tuple val(meta), path(reads) path reference + val bam_format + val cigar_paf_format + val cigar_bam output: - tuple val(meta), path("*.paf"), emit: paf - path "versions.yml" , emit: versions + tuple val(meta), path("*.paf"), optional: true, emit: paf + tuple val(meta), path("*.bam"), optional: true, emit: bam + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -22,13 +26,19 @@ process MINIMAP2_ALIGN { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def input_reads = meta.single_end ? "$reads" : "${reads[0]} ${reads[1]}" + def bam_output = bam_format ? "-a | samtools sort | samtools view -@ ${task.cpus} -b -h -o ${prefix}.bam" : "-o ${prefix}.paf" + def cigar_paf = cigar_paf_format && !bam_format ? "-c" : '' + def set_cigar_bam = cigar_bam && bam_format ? "-L" : '' """ minimap2 \\ $args \\ -t $task.cpus \\ $reference \\ $input_reads \\ - > ${prefix}.paf + $cigar_paf \\ + $set_cigar_bam \\ + $bam_output + cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/minimap2/align/meta.yml b/modules/nf-core/modules/minimap2/align/meta.yml index 89e24283..991b39a0 100644 --- a/modules/nf-core/modules/minimap2/align/meta.yml +++ b/modules/nf-core/modules/minimap2/align/meta.yml @@ -29,6 +29,17 @@ input: type: file description: | Reference database in FASTA format. + - bam_format: + type: boolean + description: Specify that output should be in BAM format + - cigar_paf_format: + type: boolean + description: Specify that output CIGAR should be in PAF format + - cigar_bam: + type: boolean + description: | + Write CIGAR with >65535 ops at the CG tag. This is recommended when + doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations) output: - meta: type: map @@ -39,9 +50,16 @@ output: type: file description: Alignment in PAF format pattern: "*.paf" + - bam: + type: file + description: Alignment in BAM format + pattern: "*.bam" - versions: type: file description: File containing software versions pattern: "versions.yml" authors: - "@heuermh" + - "@sofstam" + - "@sateeshperi" + - "@jfy133" diff --git a/modules/nf-core/modules/multiqc/main.nf b/modules/nf-core/modules/multiqc/main.nf index 1264aac1..ae019dbf 100644 --- a/modules/nf-core/modules/multiqc/main.nf +++ b/modules/nf-core/modules/multiqc/main.nf @@ -28,4 +28,16 @@ process MULTIQC { multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) END_VERSIONS """ + + stub: + """ + touch multiqc_data + touch multiqc_plots + touch multiqc_report.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/samtools/faidx/main.nf b/modules/nf-core/modules/samtools/faidx/main.nf index 7732a4ec..fdce7d9b 100644 --- a/modules/nf-core/modules/samtools/faidx/main.nf +++ b/modules/nf-core/modules/samtools/faidx/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_FAIDX { tag "$fasta" label 'process_low' - conda (params.enable_conda ? "bioconda::samtools=1.15" : null) + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15--h1170115_1' : - 'quay.io/biocontainers/samtools:1.15--h1170115_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" input: tuple val(meta), path(fasta) @@ -29,4 +29,14 @@ process SAMTOOLS_FAIDX { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ + + stub: + """ + touch ${fasta}.fai + cat <<-END_VERSIONS > versions.yml + + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/samtools/fastq/main.nf b/modules/nf-core/modules/samtools/fastq/main.nf deleted file mode 100644 index 8d9b9d08..00000000 --- a/modules/nf-core/modules/samtools/fastq/main.nf +++ /dev/null @@ -1,37 +0,0 @@ -process SAMTOOLS_FASTQ { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" - - input: - tuple val(meta), path(bam) - - output: - tuple val(meta), path("*.fastq.gz"), emit: fastq - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def endedness = meta.single_end ? "-0 ${prefix}.fastq.gz" : "-1 ${prefix}_1.fastq.gz -2 ${prefix}_2.fastq.gz" - """ - samtools \\ - fastq \\ - $args \\ - --threads ${task.cpus-1} \\ - $endedness \\ - $bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/samtools/fastq/meta.yml b/modules/nf-core/modules/samtools/fastq/meta.yml deleted file mode 100644 index 41055cfb..00000000 --- a/modules/nf-core/modules/samtools/fastq/meta.yml +++ /dev/null @@ -1,43 +0,0 @@ -name: samtools_fastq -description: Converts a SAM/BAM/CRAM file to FASTQ -keywords: - - bam - - sam - - cram - - fastq -tools: - - samtools: - description: | - SAMtools is a set of utilities for interacting with and post-processing - short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. - These files are generated as output by short read aligners like BWA. - homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html - doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fastq: - type: file - description: compressed FASTQ file - pattern: "*.fastq.gz" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@suzannejin" diff --git a/modules/nf-core/modules/samtools/index/main.nf b/modules/nf-core/modules/samtools/index/main.nf index e41cdcc8..e04e63e8 100644 --- a/modules/nf-core/modules/samtools/index/main.nf +++ b/modules/nf-core/modules/samtools/index/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_INDEX { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::samtools=1.15" : null) + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15--h1170115_1' : - 'quay.io/biocontainers/samtools:1.15--h1170115_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" input: tuple val(meta), path(input) @@ -33,4 +33,16 @@ process SAMTOOLS_INDEX { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ + + stub: + """ + touch ${input}.bai + touch ${input}.crai + touch ${input}.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/samtools/sort/main.nf b/modules/nf-core/modules/samtools/sort/main.nf deleted file mode 100644 index 0e2de8ba..00000000 --- a/modules/nf-core/modules/samtools/sort/main.nf +++ /dev/null @@ -1,31 +0,0 @@ -process SAMTOOLS_SORT { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::samtools=1.15" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15--h1170115_1' : - 'quay.io/biocontainers/samtools:1.15--h1170115_1' }" - - input: - tuple val(meta), path(bam) - - output: - tuple val(meta), path("*.bam"), emit: bam - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - """ - samtools sort $args -@ $task.cpus -o ${prefix}.bam -T $prefix $bam - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/samtools/sort/meta.yml b/modules/nf-core/modules/samtools/sort/meta.yml deleted file mode 100644 index a820c55a..00000000 --- a/modules/nf-core/modules/samtools/sort/meta.yml +++ /dev/null @@ -1,44 +0,0 @@ -name: samtools_sort -description: Sort SAM/BAM/CRAM file -keywords: - - sort - - bam - - sam - - cram -tools: - - samtools: - description: | - SAMtools is a set of utilities for interacting with and post-processing - short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. - These files are generated as output by short read aligners like BWA. - homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html - doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@ewels" diff --git a/nextflow.config b/nextflow.config index 0f92596e..64da3e95 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,30 +1,279 @@ /* -==================================================================================== - YAVSAP Nextflow config file -==================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + yavsap Nextflow config file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Default config options for all compute environments ------------------------------------------------------------------------------------- +---------------------------------------------------------------------------------------- */ -// Process metadata -manifest { - name = 'yavsap' - author = 'Thomas A. Christensen II' - homePage = 'https://ksumngs.github.io/yavsap' - description = 'Intra-sample viral population analysis' - mainScript = 'main.nf' - nextflowVersion = '!>=21.10.6' - version = '0.7.1' - recurseSubmodules = true +// Global default params, used in configs +params { + // Input options + input = '.' + platform = null + paired = (params.platform == 'illumina') + interleaved = false + seed = 42 + + // Reference genome options + genome = 'NC_001437.1' // JEV RefSeq + genome_list = 'jev' + + // Kraken 2 options + kraken2_db = null + keep_taxid = '0 10239' // 0 = Unclassified, 10239 = Viral + + // Read trimming options (shared) + trim_minlen = 100 + trim_headcrop = 0 + + // Illumina trimming options (Trimmomatic) + trim_adapters = 'NexteraPE-PE.fa' + trim_mismatches = 2 + trim_pclip = 30 + trim_clip = 10 + trim_winsize = 50 + trim_winqual = 15 + trim_leading = 15 + trim_trailing = 15 + trim_crop = 0 + + // ONT-specific trimming options (Filtlong) + trim_maxlen = 0 + trim_meanqual = 7 + trim_mingc = 0 + trim_maxgc = 0 + trim_tailcrop = 0 + + // Variant calling options + variant_quality = 12 + variant_depth = 10 + variant_position = 0.1 + variant_frequency = 0.05 + variant_significance = 1e-3 + + // Haplotyping options + haplotype_significance = 0.05 + haplotype_depth = 10 + haplotype_frequency = 0.05 + haplotype_method = 'ml-template' + haplotype_overlap_min = 0 + haplotype_overlap_max = 100 + haplotype_iterations = 1000 + cliquesnv_method = 'accurate' + + // Phylogenetics options + mafft_method = '--auto' + phylogenetic_bootstraps = 1000 + phylogenetic_bootstrap_cutoff = 0.03 + phylogenetic_model = 'GTR+G' + + // Step-skipping options + skip_trimming = false + skip_qc = false + skip_filtering = false + skip_haplotype = false + skip_phylogenetics = false + + // Remote resource options + freezetable_js = 'https://cdn.jsdelivr.net/npm/jquery-freeze-table@1.3.0/dist/js/freeze-table.min.js' + igv_js = 'https://cdn.jsdelivr.net/npm/igv@2.11.0/dist/igv.js' + phylotree_css = 'https://cdn.jsdelivr.net/npm/phylotree@1.0.13/dist/phylotree.css' + d3_js = 'https://cdn.jsdelivr.net/npm/d3@5.16.0/dist/d3.min.js' + underscore_js = 'https://cdn.jsdelivr.net/npm/underscore@1.13.2/underscore-umd.min.js' + phylotree_js = 'https://cdn.jsdelivr.net/npm/phylotree@1.0.13/dist/phylotree.min.js' + + // MultiQC options + multiqc_config = null + multiqc_title = null + max_multiqc_email_size = '25.MB' + + // Boilerplate options + outdir = null + tracedir = "${params.outdir}/pipeline_info" + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + breed = 'plain' + help = false + validate_params = true + show_hidden_params = false + schema_ignore_params = 'genomes' + enable_conda = false + + // Config options + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_description = null + config_profile_contact = null + config_profile_url = null + config_profile_name = null + + // Max resource options + // Defaults only, expecting to be overwritten + max_memory = '128.GB' + max_cpus = 16 + max_time = '240.h' + +} + +// Load base.config by default for all pipelines +includeConfig 'conf/base.config' + +// Load nf-core custom profiles from different Institutions +try { + includeConfig "${params.custom_config_base}/nfcore_custom.config" +} catch (Exception e) { + System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") +} + +// Load nf-core/yavsap custom profiles from different institutions. +// Warning: Uncomment only if a pipeline-specific instititutional config already exists on nf-core/configs! +// try { +// includeConfig "${params.custom_config_base}/pipeline/yavsap.config" +// } catch (Exception e) { +// System.err.println("WARNING: Could not load nf-core/config/yavsap profiles: ${params.custom_config_base}/pipeline/yavsap.config") +// } + + +profiles { + debug { process.beforeScript = 'echo $HOSTNAME' } + conda { + params.enable_conda = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + docker { + docker.enabled = true + docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } + gh { + params { + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + } + } + test { includeConfig 'conf/test.config' } + test_full { includeConfig 'conf/test_full.config' } + test_interleaved { includeConfig 'conf/test_interleaved.config' } // Illumina PE Interleaved + test_se { includeConfig 'conf/test_se.config' } // Illumina SE + test_nanopore { includeConfig 'conf/test_nanopore.config' } // Nanopore +} + +// Export these variables to prevent local Python/R libraries from conflicting with those in the container +// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. +// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. + +env { + PYTHONNOUSERSITE = 1 + R_PROFILE_USER = "/.Rprofile" + R_ENVIRON_USER = "/.Renviron" + JULIA_DEPOT_PATH = "\$PWD:/usr/local/share/julia" } // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] -includeConfig 'conf/parameters.config' -includeConfig 'conf/profiles.config' -includeConfig 'conf/containers.config' -includeConfig 'conf/resources.config' -includeConfig 'conf/environment.config' -includeConfig 'conf/trace.config' +def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') +timeline { + enabled = true + file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" +} +report { + enabled = true + file = "${params.tracedir}/execution_report_${trace_timestamp}.html" +} +trace { + enabled = true + file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" +} +dag { + enabled = true + file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.svg" +} + +manifest { + name = 'yavsap' + author = 'Thomas A. Christensen II' + homePage = 'https://ksumngs.github.io/yavsap' + description = 'Identification and analysis of viral haplotypes in metagenomic NGS reads' + mainScript = 'main.nf' + nextflowVersion = '!>=21.10.3' + version = '0.8.0' + recurseSubmodules = true +} + +// Load modules.config for DSL2 module specific options includeConfig 'conf/modules.config' + +// Function to ensure that resource requirements don't go beyond +// a maximum limit +def check_max(obj, type) { + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min( obj, params.max_cpus as int ) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } + } +} diff --git a/nextflow_schema.json b/nextflow_schema.json index 58404bcf..3f5e670f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -2,7 +2,7 @@ "$schema": "http://json-schema.org/draft-07/schema", "$id": "https://raw.githubusercontent.com/yavsap/master/nextflow_schema.json", "title": "yavsap pipeline parameters", - "description": "Intra-sample viral population analysis", + "description": "Identification and analysis of viral haplotypes in metagenomic NGS reads", "type": "object", "definitions": { "input_output_options": { @@ -10,22 +10,16 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", + "required": ["platform", "outdir"], "properties": { "input": { "type": "string", "format": "file-path", - "description": "Path the folder containing input reads.", + "description": "Path the folder containing input reads or a TSV samplesheet", "help_text": "For Illumina (paired-end) reads, the file names must be identical until the ending underscore with a read number, e.g. 'sample1_S10_L001_R1_001.fastq.gz' and 'sample1_S10_L001_R2_001.fastq.gz'. The read number must be designated using either '_1' and '_2' or '_R1' and '_R2'. For Nanopore reads, each fastq file is assumed to be a unique sample, so, e.g. 'FAP01234_pass_barcode05_abcd01234_0.fastq.gz' and 'FAP01234_pass_barcode05_abcd01234_1.fastq.gz' are assumed to be different samples even though they are from the same barcode. All read files must be gzipped, and have the extension '.fastq.gz' or '.fq.gz'.", "fa_icon": "fas fa-folder", "default": "." }, - "sra": { - "type": "boolean", - "hidden": true, - "description": "Pull input reads from NCBI SRA", - "fa_icon": "fas fa-cloud-download-alt", - "help_text": "This flag switches the meaning of `--input` to mean an NCBI Short Read Archive (SRA) accession number, then pulls the associated files directly from NCBI and runs the pipeline on them. To use this flag, you **must** have an [NCBI API key](https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/) and it **must** be exported in the shell environment as `NCBI_API_KEY`, e.g. `NCBI_API_KEY=0123456789abcdef`. This feature is currenly broken due to an upstream issue in Nextflow, but should be fixed by Nextflow version 21.10.0." - }, "platform": { "type": "string", "fa_icon": "fas fa-tram", @@ -43,13 +37,6 @@ "fa_icon": "fas fa-random", "description": "Whether paired-end reads interleaved into a single fastq file" }, - "samplesheet": { - "type": "string", - "default": "None", - "fa_icon": "fas fa-file-csv", - "description": "Path to a tab-separated file containing a table of samplenames and reads files", - "help_text": "For more info, see {ref}`Using a Samplesheet as Input`" - }, "seed": { "type": "integer", "fa_icon": "fas fa-seedling", @@ -59,57 +46,16 @@ "outdir": { "type": "string", "description": "Path to the output directory where the results will be saved.", - "default": "./results", "fa_icon": "fas fa-folder-open" }, - "publish_dir_mode": { - "type": "string", - "default": "copy", - "fa_icon": "fab fa-leanpub", - "description": "How to create results files", - "enum": [ - "symlink", - "relink", - "link", - "copy", - "copyNoFollow", - "move" - ], - "hidden": true - }, - "tracedir": { + "email": { "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info", - "fa_icon": "fas fa-cogs", - "hidden": true - }, - "help": { - "type": "boolean", - "description": "Display help text.", - "fa_icon": "fas fa-question-circle", - "hidden": true - }, - "show_hidden_params": { - "type": "boolean", - "fa_icon": "fas fa-eye", - "description": "Show all params when using `--help`", - "hidden": true - }, - "custom_config_version": { - "type": "string", - "description": "Git commit id for Institutional configs.", - "default": "master", - "fa_icon": "fas fa-history" - }, - "custom_config_base": { - "type": "string", - "description": "Base directory for Institutional configs.", - "default": "https://raw.githubusercontent.com/nf-core/configs/master", - "fa_icon": "fas fa-globe" + "description": "Email address for completion summary.", + "fa_icon": "fas fa-envelope", + "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" } - }, - "required": ["platform"] + } }, "reference_genome_options": { "title": "Reference genome options", @@ -121,7 +67,7 @@ "type": "string", "description": "NCBI accession number of reference genome", "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details.", + "help_text": "The NCBI Genbank accession.version number of the reference genome to call strains from", "default": "NC_001437.1" }, "genome_list": { @@ -141,7 +87,7 @@ "kraken2_db": { "type": "string", "description": "Path to a Kraken2 database", - "help_text": "The path to a [Kraken2 database](https://github.com/DerrickWood/kraken2/wiki/Manual#custom-databases) that will be used to filter out host reads in the pipeline. This path will be automatically mounted into the container environments if a containerized profile is used.\n\nCorresponds to the [--db](https://github.com/DerrickWood/kraken2/wiki/Manual#classification) option of Kraken2.", + "help_text": "The path to a [Kraken2 database](https://github.com/DerrickWood/kraken2/wiki/Manual#custom-databases) that will be used to filter out host reads in the pipeline. This path will be automatically mounted into the container environments if a containerized profile is used. Corresponds to the [--db](https://github.com/DerrickWood/kraken2/wiki/Manual#classification) option of Kraken2.", "fa_icon": "fas fa-database", "default": "None" }, @@ -150,7 +96,7 @@ "default": "0 10239", "fa_icon": "fas fa-dog", "description": "Taxonomic IDs to keep and analyze", - "help_text": "A space-separated list (use quotes on the command line), of the taxonomic ids to keep based on Kraken2's classification.\n\nDefaults to keeping all unclassified reads and all viral reads. Note that this requires the host to be present in the Kraken2 database. When dealing with animals and the databases available from `kraken2-build`, this is not the case, and this parameter should be modified." + "help_text": "A space-separated list (use quotes on the command line), of the taxonomic ids to keep based on Kraken2's classification. Defaults to keeping all unclassified reads and all viral reads. Note that this requires the host to be present in the Kraken2 database. When dealing with animals and the databases available from `kraken2-build`, this is not the case, and this parameter should be modified." } }, "required": ["kraken2_db"], @@ -168,40 +114,35 @@ "default": 100, "fa_icon": "fas fa-ruler-horizontal", "description": "Minimum length of reads", - "help_text": "Corresponds to the [MINLEN](http://www.usadellab.org/cms/?page=trimmomatic)\noption of Trimmomatic for Illumina reads.\n\nCorresponds to the [--length](https://github.com/wdecoster/nanofilt/#usage)\noption of NanoFilt for Nanopore reads." + "help_text": "Corresponds to the [MINLEN](http://www.usadellab.org/cms/?page=trimmomatic) option of Trimmomatic for Illumina reads. Corresponds to the [--length](https://github.com/wdecoster/nanofilt/#usage) option of NanoFilt for Nanopore reads." }, "trim_maxlen": { "type": "integer", "default": 0, "fa_icon": "fas fa-ruler-vertical", "description": "Maximum length of reads", - "help_text": "Only applies to Nanopore reads.\n\nCorresponds to the [--maxlength](https://github.com/wdecoster/nanofilt/#usage) option of\nNanoFilt." + "help_text": "Only applies to Nanopore reads. Corresponds to the [--maxlength](https://github.com/wdecoster/nanofilt/#usage) option of NanoFilt." }, "trim_adapters": { "type": "string", "default": "NexteraPE-PE.fa", "fa_icon": "fas fa-align-right", "description": "Sequences to be removed during trimming", - "help_text": "Only applies to Illumina reads. Corresponds to the first\n[ILLUMINACLIP](http://www.usadellab.org/cms/?page=trimmomatic) option of\nTrimmomatic. If left blank (i.e. `--trim_adapters ''`), then adapter trimming is\ndisabled. Custom adapters cannot be used, and the parameter corresponds to\none of the prebuilt sequence files provided with Trimmomatic.\n", - "enum": [ - "NexteraPE-PE.fa", - "TruSeq2-PE.fa", - "TruSeq3-PE-2.fa", - "TruSeq3-PE.fa" - ] + "help_text": "Only applies to Illumina reads. Corresponds to the first [ILLUMINACLIP](http://www.usadellab.org/cms/?page=trimmomatic) option of Trimmomatic. If left blank (i.e. `--trim_adapters ''`), then adapter trimming is disabled. Custom adapters cannot be used, and the parameter corresponds to one of the prebuilt sequence files provided with Trimmomatic. ", + "enum": ["NexteraPE-PE.fa", "TruSeq2-PE.fa", "TruSeq3-PE-2.fa", "TruSeq3-PE.fa"] }, "trim_mismatches": { "type": "integer", "default": 2, "description": "Max number of base mismatches to allow an adapter match", - "help_text": "Only applies to Illumina reads. Corresponds to the second\n[ILLUMINACLIP](http://www.usadellab.org/cms/?page=trimmomatic) option of\nTrimmomatic.", + "help_text": "Only applies to Illumina reads. Corresponds to the second [ILLUMINACLIP](http://www.usadellab.org/cms/?page=trimmomatic) option of Trimmomatic.", "fa_icon": "fab fa-buromobelexperte" }, "trim_pclip": { "type": "integer", "default": 30, "description": "How accurate the match between adapter ligated reads must be for paired-end palindrome read alignment", - "help_text": "Only applies to Illumina reads. Corresponds to the third\n[ILLUMINACLIP](http://www.usadellab.org/cms/?page=trimmomatic) option of\nTrimmomatic.", + "help_text": "Only applies to Illumina reads. Corresponds to the third [ILLUMINACLIP](http://www.usadellab.org/cms/?page=trimmomatic) option of Trimmomatic.", "fa_icon": "fas fa-clipboard" }, "trim_clip": { @@ -209,48 +150,48 @@ "default": 10, "fa_icon": "far fa-clipboard", "description": "How accurate the match between any adapter must be against a read", - "help_text": "Only applies to Illumina reads. Corresponds to the final\n[ILLUMINACLIP](http://www.usadellab.org/cms/?page=trimmomatic) option of\nTrimmomatic." + "help_text": "Only applies to Illumina reads. Corresponds to the final [ILLUMINACLIP](http://www.usadellab.org/cms/?page=trimmomatic) option of Trimmomatic." }, "trim_winsize": { "type": "integer", "default": 50, "fa_icon": "fab fa-windows", "description": "Number of bases to average quality across", - "help_text": "Only applies to Illumina reads. If set to `0`, then sliding window trimming is disabled.\nCorresponds to the first\n[SLIDINGWINDOW](http://www.usadellab.org/cms/?page=trimmomatic) option of\nTrimmomatic." + "help_text": "Only applies to Illumina reads. If set to `0`, then sliding window trimming is disabled. Corresponds to the first [SLIDINGWINDOW](http://www.usadellab.org/cms/?page=trimmomatic) option of Trimmomatic." }, "trim_winqual": { "type": "integer", "default": 15, "fa_icon": "fas fa-tachometer-alt", "description": "Required average window base quality", - "help_text": "Only applies to Illumina reads. If set to `0`, then sliding window trimming is disabled.\nCorresponds to the second\n[SLIDINGWINDOW](http://www.usadellab.org/cms/?page=trimmomatic) option of\nTrimmomatic." + "help_text": "Only applies to Illumina reads. If set to `0`, then sliding window trimming is disabled. Corresponds to the second [SLIDINGWINDOW](http://www.usadellab.org/cms/?page=trimmomatic) option of Trimmomatic." }, "trim_leading": { "type": "integer", "default": 15, "fa_icon": "fas fa-angle-double-left", "description": "Minimum quality of bases in leading end of read", - "help_text": "Only applies to Illumina reads. If set to `0`, LEADING trimming is disabled.\nCorresponds to the\n[LEADING](http://www.usadellab.org/cms/?page=trimmomatic) option of\nTrimmomatic." + "help_text": "Only applies to Illumina reads. If set to `0`, LEADING trimming is disabled. Corresponds to the [LEADING](http://www.usadellab.org/cms/?page=trimmomatic) option of Trimmomatic." }, "trim_trailing": { "type": "integer", "default": 15, "fa_icon": "fas fa-angle-double-right", "description": "Minimum quality of bases in trailing end of read", - "help_text": "Only applies to Illumina reads. If set to `0`, TRAILING trimming is disabled.\nCorresponds to the [TRAILING](http://www.usadellab.org/cms/?page=trimmomatic) option of\nTrimmomatic." + "help_text": "Only applies to Illumina reads. If set to `0`, TRAILING trimming is disabled. Corresponds to the [TRAILING](http://www.usadellab.org/cms/?page=trimmomatic) option of Trimmomatic." }, "trim_headcrop": { "type": "integer", "default": 0, "description": "Number of bases to remove from start of read", - "help_text": "Corresponds to the [HEADCROP](http://www.usadellab.org/cms/?page=trimmomatic)\noption of Trimmomatic for Illumina reads. If set to `0`, then HEADCROP trimming is disabled.\n\nCorresponds to the [--headcrop](https://github.com/wdecoster/nanofilt/#usage)\noption of NanoFilt for Nanopore reads.", + "help_text": "Corresponds to the [HEADCROP](http://www.usadellab.org/cms/?page=trimmomatic) option of Trimmomatic for Illumina reads. If set to `0`, then HEADCROP trimming is disabled. Corresponds to the [--headcrop](https://github.com/wdecoster/nanofilt/#usage) option of NanoFilt for Nanopore reads.", "fa_icon": "fas fa-angle-up" }, "trim_crop": { "type": "integer", "default": 0, "description": "Number of bases to keep from start of read", - "help_text": "Only applies to Illumina reads. If set to `0`, CROP trimming is disabled.\nCorresponds to the\n[CROP](http://www.usadellab.org/cms/?page=trimmomatic) option of Trimmomatic.", + "help_text": "Only applies to Illumina reads. If set to `0`, CROP trimming is disabled. Corresponds to the [CROP](http://www.usadellab.org/cms/?page=trimmomatic) option of Trimmomatic.", "fa_icon": "fas fa-angle-down" }, "trim_meanqual": { @@ -258,28 +199,50 @@ "default": 7, "fa_icon": "fas fa-tachometer-alt", "description": "Minimum average base quality of entire reads", - "help_text": "Applies only to ONT reads. Corresponds to the\n[--quality](https://github.com/wdecoster/nanofilt/#usage) option of NanoFilt." + "help_text": "Applies only to ONT reads. Corresponds to the [--quality](https://github.com/wdecoster/nanofilt/#usage) option of NanoFilt." }, "trim_mingc": { "type": "integer", "default": 0, "fa_icon": "fas fa-dna", "description": "Minimum GC count of reads", - "help_text": "Only applies to ONT reads. Corresponds to the\n[--minGC](https://github.com/wdecoster/nanofilt/#usage) option of NanoFilt." + "help_text": "Only applies to ONT reads. Corresponds to the [--minGC](https://github.com/wdecoster/nanofilt/#usage) option of NanoFilt." }, "trim_maxgc": { "type": "integer", "default": 0, "fa_icon": "fas fa-dna", "description": "Maximum GC count of reads", - "help_text": "Only applies to ONT reads. Corresponds to the\n[--maxGC](https://github.com/wdecoster/nanofilt/#usage) option of NanoFilt." + "help_text": "Only applies to ONT reads. Corresponds to the [--maxGC](https://github.com/wdecoster/nanofilt/#usage) option of NanoFilt." }, "trim_tailcrop": { "type": "integer", "default": 0, "fa_icon": "fas fa-angle-down", "description": "Number of bases to remove from the end of each read", - "help_text": "Only applies to ONT reads. Corresponds to the\n[--tailcrop](https://github.com/wdecoster/nanofilt/#usage) option of NanoFilt." + "help_text": "Only applies to ONT reads. Corresponds to the [--tailcrop](https://github.com/wdecoster/nanofilt/#usage) option of NanoFilt." + }, + "multiqc_title": { + "type": "string", + "hidden": true, + "fa_icon": "fas fa-signature", + "description": "Custom title for the MultiQC report." + }, + "multiqc_config": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "description": "Custom config file to supply to MultiQC.", + "fa_icon": "fas fa-cog", + "hidden": true + }, + "max_multiqc_email_size": { + "type": "string", + "description": "File size limit when attaching MultiQC reports to summary emails.", + "default": "25.MB", + "fa_icon": "fas fa-file-upload", + "hidden": true, + "help_text": "If file generated by pipeline exceeds the threshold, it will not be attached." } } }, @@ -453,7 +416,7 @@ "type": "object", "fa_icon": "fab fa-acquisitions-incorporated", "description": "Set the top limit for requested resources for any single job.", - "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", + "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system. Note that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", "properties": { "max_cpus": { "type": "integer", @@ -480,14 +443,185 @@ "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", "hidden": true, "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file", + "help_text": "These options are common to all nf-core pipelines and allow you to customize some of the core preferences for how the pipeline runs. Typically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "fa_icon": "fab fa-leanpub", + "description": "How to create results files", + "enum": ["symlink", "relink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "email_on_fail": { + "type": "string", + "description": "Email address for completion summary, only when pipeline fails.", + "fa_icon": "fas fa-exclamation-triangle", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", + "hidden": true, + "help_text": "This works exactly as with `--email`, except emails are only sent if the workflow is not successful." + }, + "plaintext_email": { + "type": "boolean", + "description": "Send plain-text email instead of HTML.", + "fa_icon": "fas fa-remove-format", + "hidden": true, + "help_text": "Set to receive plain-text e-mails instead of HTML formatted." + }, + "monochrome_logs": { + "type": "boolean", + "description": "Do not use colored log outputs.", + "fa_icon": "fas fa-palette", + "hidden": true, + "help_text": "Set to disable colorful command line output and live life in glorious monochrome." + }, + "breed": { + "type": "string", + "hidden": true + }, + "tracedir": { + "type": "string", + "description": "Directory to keep pipeline Nextflow logs and reports.", + "default": "${params.outdir}/pipeline_info", + "fa_icon": "fas fa-cogs", + "hidden": true }, "enable_conda": { "type": "boolean", "hidden": true, "description": "Run this workflow with Conda. You can also use '-profile conda' instead of providing this parameter.", "fa_icon": "fas fa-bacon" + }, + "validate_params": { + "type": "boolean", + "description": "Boolean whether to validate parameters against the schema at runtime", + "default": true, + "fa_icon": "fas fa-check-square", + "hidden": true + }, + "show_hidden_params": { + "type": "boolean", + "fa_icon": "far fa-eye-slash", + "description": "Show all params when using `--help`", + "hidden": true, + "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + } + } + }, + "institutional_config_options": { + "title": "Institutional config options", + "type": "object", + "fa_icon": "fas fa-university", + "description": "Parameters used to describe centralised config profiles. These should not be edited.", + "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", + "properties": { + "custom_config_version": { + "type": "string", + "description": "Git commit id for Institutional configs.", + "default": "master", + "hidden": true, + "fa_icon": "fas fa-users-cog", + "help_text": "Provide git commit id for custom Institutional configs hosted at `nf-core/configs`. This was implemented for reproducibility purposes. Default: `master`. ```bash ## Download and use config file with following git commit id --custom_config_version d52db660777c4bf36546ddb188ec530c3ada1b96 ```" + }, + "custom_config_base": { + "type": "string", + "format": "directory-path", + "description": "Base directory for Institutional configs.", + "default": "https://raw.githubusercontent.com/nf-core/configs/master", + "hidden": true, + "help_text": "If you're running offline, nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell nextflow where to find them with the `custom_config_base` option. For example: ```bash ## Download and unzip the config files cd /path/to/my/configs wget https://github.com/nf-core/configs/archive/master.zip unzip master.zip ## Run the pipeline cd /path/to/my/data nextflow run /path/to/pipeline/ --custom_config_base /path/to/my/configs/configs-master/ ``` > Note that the nf-core/tools helper package has a `download` command to download all required pipeline files + singularity containers + institutional configs in one go for you, to make this process easier.", + "fa_icon": "fas fa-users-cog" + }, + "config_profile_name": { + "type": "string", + "description": "Institutional config name.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_description": { + "type": "string", + "description": "Institutional config description.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_contact": { + "type": "string", + "description": "Institutional config contact information.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_url": { + "type": "string", + "description": "Institutional config URL link.", + "hidden": true, + "fa_icon": "fas fa-users-cog" } } + }, + "remote_presentation_resource_options": { + "title": "Remote presentation resource options", + "type": "object", + "description": "", + "default": "", + "properties": { + "freezetable_js": { + "type": "string", + "default": "https://cdn.jsdelivr.net/npm/jquery-freeze-table@1.3.0/dist/js/freeze-table.min.js", + "fa_icon": "fas fa-igloo", + "description": "JavaScript file to help with haplotype alignment table", + "hidden": true + }, + "igv_js": { + "type": "string", + "default": "https://cdn.jsdelivr.net/npm/igv@2.11.0/dist/igv.js", + "fa_icon": "fas fa-align-left", + "hidden": true, + "description": "JavaScript file to show alignments in a browser" + }, + "phylotree_css": { + "type": "string", + "default": "https://cdn.jsdelivr.net/npm/phylotree@1.0.13/dist/phylotree.css", + "fa_icon": "fas fa-code-branch", + "hidden": true, + "description": "CSS file for phylogenetic trees" + }, + "d3_js": { + "type": "string", + "default": "https://cdn.jsdelivr.net/npm/d3@5.16.0/dist/d3.min.js", + "fa_icon": "fas fa-database", + "hidden": true, + "description": "JavaScript dependency for phylotree.js" + }, + "underscore_js": { + "type": "string", + "default": "https://cdn.jsdelivr.net/npm/underscore@1.13.2/underscore-umd.min.js", + "fa_icon": "fas fa-underline", + "hidden": true, + "description": "JavaScript dependency for phylotree.js" + }, + "phylotree_js": { + "type": "string", + "default": "https://cdn.jsdelivr.net/npm/phylotree@1.0.13/dist/phylotree.min.js", + "fa_icon": "fas fa-code-branch", + "hidden": true, + "description": "JavaScript to show phylogenetic trees in a browser" + } + }, + "fa_icon": "fas fa-globe" } }, "allOf": [ @@ -517,6 +651,15 @@ }, { "$ref": "#/definitions/max_job_request_options" + }, + { + "$ref": "#/definitions/generic_options" + }, + { + "$ref": "#/definitions/institutional_config_options" + }, + { + "$ref": "#/definitions/remote_presentation_resource_options" } ] } diff --git a/subworkflows/alignment.nf b/subworkflows/alignment.nf deleted file mode 100644 index 137044b6..00000000 --- a/subworkflows/alignment.nf +++ /dev/null @@ -1,31 +0,0 @@ -include { MINIMAP2_ALIGN } from '../modules/nf-core/modules/minimap2/align/main.nf' -include { SAMTOOLS_SORT } from '../modules/nf-core/modules/samtools/sort/main.nf' -include { SAMTOOLS_INDEX } from '../modules/nf-core/modules/samtools/index/main.nf' - -workflow ALIGNMENT { - take: - reads - reference - - main: - versions = Channel.empty() - - // Realign reads to the reference genome - // Note: Normally, minimap2 outputs paf, but we have forced it to output sam via - // ext.args in modules.config - MINIMAP2_ALIGN(reads, reference) - SAMTOOLS_SORT(MINIMAP2_ALIGN.out.paf) - SAMTOOLS_INDEX(SAMTOOLS_SORT.out.bam) - - SAMTOOLS_SORT.out.bam.set{ bam } - SAMTOOLS_INDEX.out.bai.set{ bai } - - versions = versions.mix(MINIMAP2_ALIGN.out.versions) - versions = versions.mix(SAMTOOLS_SORT.out.versions) - versions = versions.mix(SAMTOOLS_INDEX.out.versions) - - emit: - bam - bai - versions -} diff --git a/subworkflows/closest-reference.nf b/subworkflows/closest-reference.nf deleted file mode 100644 index 69656a63..00000000 --- a/subworkflows/closest-reference.nf +++ /dev/null @@ -1,106 +0,0 @@ -include { BLAST_BLASTN } from '../modules/nf-core/modules/blast/blastn/main.nf' -include { BLAST_MAKEBLASTDB } from '../modules/nf-core/modules/blast/makeblastdb/main.nf' -include { CUSTOM_ALIGNMENT } from './custom-alignment.nf' -include { EDIRECT_EFETCH } from '../modules/ksumngs/nf-modules/edirect/efetch/main.nf' -include { EDIRECT_ESEARCH } from '../modules/ksumngs/nf-modules/edirect/esearch/main.nf' -include { IVAR_CONSENSUS } from '../modules/nf-core/modules/ivar/consensus/main.nf' -include { SAMTOOLS_FASTQ } from '../modules/nf-core/modules/samtools/fastq/main.nf' - -workflow CLOSEST_REFERENCE { - take: - reads - reference - genome_list - - main: - versions = Channel.empty() - - // Transform the TSV genome list into an edirect query - genomeQuery = genome_list - .first() - .readLines() - .collect{ it.split('\t')[1] } - .join(' OR ') - - // Search NCBI for the accession numbers - EDIRECT_ESEARCH(genomeQuery, 'nucleotide') - - // Download the matching genomes in fasta format - EDIRECT_EFETCH(EDIRECT_ESEARCH.out.xml, 'fasta', '') - EDIRECT_EFETCH.out.txt.set{ genome_fasta } - - // Make a BLAST database out of the strain reference genomes - BLAST_MAKEBLASTDB(genome_fasta) - - // Get the consensus sequence of each sample - IVAR_CONSENSUS(reads, reference, false) - IVAR_CONSENSUS.out.fasta.set{ consensus_fasta } - - // BLAST the consensus sequence against all of the reference genomes - BLAST_BLASTN( - consensus_fasta, - BLAST_MAKEBLASTDB.out.db - ) - - BLAST_BLASTN.out.txt - .map{[ - it[0], - it[1].readLines()[0] - ]} - .set{ accession } - - // Create a channel with strain genome information - // [accession, strain] - Channel - .fromPath(genome_list) - .splitCsv(sep: '\t') - .map{ [ it[1], it[0] ] } - .set{ GenomeTable } - - // Get the strain name of each sample's closest BLAST hit - // [meta, strain name] - accession - .map{ [it[1], it[0]] } - .combine(GenomeTable, by: 0) - .map{ [it[1], it[2]] } - .set{ strain } - - // Create a channel containing every strain's genome in fasta format - // [accession, fasta] - genome_fasta - .splitFasta(file: true) - .map{ [it.readLines()[0].split(' ')[0].replace('>', ''), it] } - .set{ GenomeFastas } - - // Get the genome of each sample's closest BLAST hit in fasta format - accession - .combine(GenomeFastas.map{ [ it[1], it[0] ] }, by: 1) - .map{ [ it[1], it[2] ] } - .set{ fasta } - - // Convert the aligned reads back into fastq format (unalign them?) - SAMTOOLS_FASTQ(reads) - - // Align the reads to their new reference genome - CUSTOM_ALIGNMENT(SAMTOOLS_FASTQ.out.fastq.join(fasta)) - CUSTOM_ALIGNMENT.out.bam.set{ bam } - CUSTOM_ALIGNMENT.out.bai.set{ bai } - - versions = versions.mix(EDIRECT_ESEARCH.out.versions) - versions = versions.mix(EDIRECT_EFETCH.out.versions) - versions = versions.mix(BLAST_MAKEBLASTDB.out.versions) - versions = versions.mix(IVAR_CONSENSUS.out.versions) - versions = versions.mix(BLAST_BLASTN.out.versions) - versions = versions.mix(SAMTOOLS_FASTQ.out.versions) - versions = versions.mix(CUSTOM_ALIGNMENT.out.versions) - - emit: - accession - strain - fasta - bam - bai - genome_fasta - consensus_fasta - versions -} diff --git a/subworkflows/custom-alignment.nf b/subworkflows/custom-alignment.nf deleted file mode 100644 index 3f1272de..00000000 --- a/subworkflows/custom-alignment.nf +++ /dev/null @@ -1,31 +0,0 @@ -include { MINIMAP2_ALIGN } from '../modules/ksumngs/nf-modules/minimap2/align/main.nf' -include { SAMTOOLS_SORT } from '../modules/nf-core/modules/samtools/sort/main.nf' -include { SAMTOOLS_INDEX } from '../modules/nf-core/modules/samtools/index/main.nf' - -workflow CUSTOM_ALIGNMENT { - take: - reads - - main: - versions = Channel.empty() - - // Realign reads to the reference genome - // Note: Normally, minimap2 outputs paf, but we have forced it to output sam via - // ext.args in modules.config - MINIMAP2_ALIGN(reads) - SAMTOOLS_SORT(MINIMAP2_ALIGN.out.paf) - SAMTOOLS_INDEX(SAMTOOLS_SORT.out.bam) - - SAMTOOLS_SORT.out.bam.set{ bam } - SAMTOOLS_INDEX.out.bai.set{ bai } - - versions = versions.mix(MINIMAP2_ALIGN.out.versions) - versions = versions.mix(SAMTOOLS_SORT.out.versions) - versions = versions.mix(SAMTOOLS_INDEX.out.versions) - - - emit: - bam - bai - versions -} diff --git a/subworkflows/filtering.nf b/subworkflows/filtering.nf deleted file mode 100644 index c9a10d57..00000000 --- a/subworkflows/filtering.nf +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env nextflow -nextflow.enable.dsl = 2 - -include { KRAKEN2 } from '../modules/ksumngs/nf-modules/kraken2/main.nf' -include { KRAKENTOOLS_EXTRACT } from '../modules/ksumngs/nf-modules/krakentools/extract/main.nf' -include { KRAKENTOOLS_KREPORT2KRONA } from '../modules/ksumngs/nf-modules/krakentools/kreport2krona/main.nf' -include { KRONA_IMPORTTEXT } from '../modules/ksumngs/nf-modules/krona/importtext/main.nf' - -workflow FILTERING { - take: - reads - kraken2_db - filter - - main: - versions = Channel.empty() - - KRAKEN2(reads, kraken2_db) - - KRAKEN2.out.kreport.set{ log_out } - - if (filter == 'classified') { - KRAKEN2.out.classified.set{ filtered } - } - else if ( filter == 'unclassified') { - KRAKEN2.out.unclassified.set{ filtered } - } - else { - KRAKENTOOLS_EXTRACT( - reads - .join(KRAKEN2.out.kraken) - .join(KRAKEN2.out.kreport), - filter - ) - KRAKENTOOLS_EXTRACT.out.fastq.set{ filtered } - versions = versions.mix(KRAKENTOOLS_EXTRACT.out.versions) - } - - KRAKENTOOLS_KREPORT2KRONA(KRAKEN2.out.kreport) - KRONA_IMPORTTEXT( - KRAKENTOOLS_KREPORT2KRONA.out.krona - .map{ it.drop(1) } - .collect() - ) - KRONA_IMPORTTEXT.out.html.set{ krona } - - versions = versions.mix(KRAKEN2.out.versions) - versions = versions.mix(KRONA_IMPORTTEXT.out.versions) - versions = versions.mix(KRAKENTOOLS_KREPORT2KRONA.out.versions) - - emit: - filtered - log_out - krona - versions -} diff --git a/subworkflows/haplotype.nf b/subworkflows/haplotype.nf deleted file mode 100644 index 086601ad..00000000 --- a/subworkflows/haplotype.nf +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env nextflow -nextflow.enable.dsl = 2 - -include { CLIQUESNV_ILLUMINA } from '../modules/ksumngs/nf-modules/cliquesnv/illumina/main.nf' -include { CLIQUESNV_ILLUMINAVC } from '../modules/ksumngs/nf-modules/cliquesnv/illuminavc/main.nf' -include { HAPLINK_HAPLOTYPES } from '../modules/local/modules/haplink/haplotypes/main.nf' -include { HAPLINK_SEQUENCES } from '../modules/local/modules/haplink/sequences/main.nf' -include { HAPLINK_VARIANTS } from '../modules/local/modules/haplink/variants/main.nf' -include { JSON2YAML } from '../modules/local/json2yaml.nf' -include { PHYLOGENETIC_TREE } from './phylogenetics.nf' - -workflow HAPLOTYPING { - take: - alignments - references - - main: - versions = Channel.empty() - - if (params.platform == 'illumina') { - // Drop the BAM index: CliqueSNV doesn't need it - alignments - .map{ it.dropRight(1) } - .set{ UnindexedAlignments } - - // Do variant calling - CLIQUESNV_ILLUMINAVC(UnindexedAlignments) - CLIQUESNV_ILLUMINAVC.out.vcf.set{ vcf } - - // Do haplotype calling - CLIQUESNV_ILLUMINA(UnindexedAlignments) - CLIQUESNV_ILLUMINA.out.fasta.set{ fasta } - - // Convert haplotyp JSON to YAML - JSON2YAML(CLIQUESNV_ILLUMINA.out.json) - JSON2YAML.out.yaml.set{ yaml } - - versions = versions.mix(CLIQUESNV_ILLUMINAVC.out.versions) - versions = versions.mix(CLIQUESNV_ILLUMINA.out.versions) - versions = versions.mix(JSON2YAML.out.versions) - } - else { - HAPLINK_VARIANTS(alignments.join(references)) - HAPLINK_VARIANTS.out.vcf.set{ vcf } - - HAPLINK_HAPLOTYPES( - alignments - .map{ it.dropRight(1) } - .join(HAPLINK_VARIANTS.out.vcf) - .join(references) - ) - HAPLINK_HAPLOTYPES.out.yaml.set{ yaml } - - HAPLINK_SEQUENCES( - HAPLINK_HAPLOTYPES.out.yaml - .join(references) - ) - HAPLINK_SEQUENCES.out.fasta.set{ fasta } - - versions = versions.mix(HAPLINK_VARIANTS.out.versions) - versions = versions.mix(HAPLINK_HAPLOTYPES.out.versions) - versions = versions.mix(HAPLINK_SEQUENCES.out.versions) - } - - emit: - vcf - yaml - fasta - versions -} diff --git a/subworkflows/local/closest-reference.nf b/subworkflows/local/closest-reference.nf new file mode 100644 index 00000000..1883e45a --- /dev/null +++ b/subworkflows/local/closest-reference.nf @@ -0,0 +1,41 @@ +include { BLAST_BLASTN } from '../../modules/nf-core/modules/blast/blastn/main.nf' +include { BLAST_MAKEBLASTDB } from '../../modules/nf-core/modules/blast/makeblastdb/main.nf' + +workflow CLOSEST_REFERENCE { + take: + consensus_fasta + genome_strain + genome_fasta + + main: + versions = Channel.empty() + + // Make a BLAST database out of the strain reference genomes + BLAST_MAKEBLASTDB(genome_fasta.map{ it[1] }.collectFile(name: 'genomes.fasta')) + versions = versions.mix(BLAST_MAKEBLASTDB.out.versions) + + // BLAST the consensus sequence against all of the reference genomes + BLAST_BLASTN(consensus_fasta, BLAST_MAKEBLASTDB.out.db.first()) + BLAST_BLASTN.out.txt.map{ [it[0], it[1].readLines()[0]] }.set{ accession } + versions = versions.mix(BLAST_BLASTN.out.versions) + + // Get the strain name of each sample's closest BLAST hit + // [meta, strain name] + accession + .map{ [it[1], it[0]] } + .combine(genome_strain, by: 0) + .map{ [it[1], it[2]] } + .set{ strain } + + // Get the genome of each sample's closest BLAST hit in fasta format + accession + .combine(genome_fasta.map{ [it[1], it[0]] }, by: 1) + .map{ [it[1], it[2]] } + .set{ fasta } + + emit: + accession + strain + fasta + versions +} diff --git a/subworkflows/local/consensus.nf b/subworkflows/local/consensus.nf new file mode 100644 index 00000000..257639ca --- /dev/null +++ b/subworkflows/local/consensus.nf @@ -0,0 +1,36 @@ +include { CLIQUESNV_CONSENSUSILLUMINA } from '../../modules/ksumngs/nf-modules/cliquesnv/consensusillumina/main.nf' +include { HAPLINK_CONSENSUS } from '../../modules/local/haplink/consensus' +include { HAPLINK_VARIANTS } from '../../modules/local/haplink/variants' + +workflow CONSENSUS { + take: + bam + bai + reference + + main: + versions = Channel.empty() + + if (params.platform == 'illumina') { + CLIQUESNV_CONSENSUSILLUMINA(bam) + CLIQUESNV_CONSENSUSILLUMINA.out.fasta.set{ fasta } + versions = versions.mix(CLIQUESNV_CONSENSUSILLUMINA.out.versions) + } + else if (params.platform == 'nanopore') { + BamPlusReference = bam + .join(bai) + .combine(reference) + + HAPLINK_VARIANTS(BamPlusReference) + HAPLINK_VARIANTS.out.vcf.set{ vcf } + versions = versions.mix(HAPLINK_VARIANTS.out.versions) + + HAPLINK_CONSENSUS(vcf.combine(reference)) + HAPLINK_CONSENSUS.out.fasta.set{ fasta } + versions = versions.mix(HAPLINK_CONSENSUS.out.versions) + } + + emit: + fasta + versions +} diff --git a/subworkflows/local/filtering.nf b/subworkflows/local/filtering.nf new file mode 100644 index 00000000..6029f6d4 --- /dev/null +++ b/subworkflows/local/filtering.nf @@ -0,0 +1,79 @@ +#!/usr/bin/env nextflow +nextflow.enable.dsl = 2 + +include { KRAKEN2 } from '../../modules/ksumngs/nf-modules/kraken2/main.nf' +include { KRAKEN2_DBPREPARATION } from '../../modules/local/kraken2/dbpreparation.nf' +include { KRAKENTOOLS_EXTRACT } from '../../modules/ksumngs/nf-modules/krakentools/extract/main.nf' +include { KRAKENTOOLS_KREPORT2KRONA } from '../../modules/ksumngs/nf-modules/krakentools/kreport2krona/main.nf' +include { KRONA_IMPORTTEXT } from '../../modules/ksumngs/nf-modules/krona/importtext/main.nf' + +workflow FILTERING { + take: + reads + kraken2_db + filter + + main: + versions = Channel.empty() + + // + // Kraken2 database conversion: + // Kraken2 uses a directory of .k2d files as a sequence database + // + KrakenDb = file(kraken2_db, checkIfExists: true) + if (!KrakenDb.isDirectory()) { + if (KrakenDb.getExtension() == 'k2d') { + // The user got confused, and passed a database file, we'll try to + // correct it for them + log.warn "WARNING: ${params.kraken2_db} appears to be a file that is a *part* of a Kraken2 database." + log.warn " Kraken databases are folders that contain multiple files." + log.warn " YAVSAP will attempt to use the parent directory as the database, but it might fail!" + KrakenDb = KrakenDb.getParent() + } + else { + // We'll assume this is a tarballed database + KRAKEN2_DBPREPARATION(KrakenDb) + KrakenDb = KRAKEN2_DBPREPARATION.out.db + versions = versions.mix(KRAKEN2_DBPREPARATION.out.versions) + } + } + + KRAKEN2(reads, KrakenDb) + + KRAKEN2.out.kreport.set{ log_out } + + if (filter == 'classified') { + KRAKEN2.out.classified.set{ filtered } + } + else if ( filter == 'unclassified') { + KRAKEN2.out.unclassified.set{ filtered } + } + else { + KRAKENTOOLS_EXTRACT( + reads + .join(KRAKEN2.out.kraken) + .join(KRAKEN2.out.kreport), + filter + ) + KRAKENTOOLS_EXTRACT.out.fastq.set{ filtered } + versions = versions.mix(KRAKENTOOLS_EXTRACT.out.versions) + } + + KRAKENTOOLS_KREPORT2KRONA(KRAKEN2.out.kreport) + KRONA_IMPORTTEXT( + KRAKENTOOLS_KREPORT2KRONA.out.krona + .map{ it.drop(1) } + .collect() + ) + KRONA_IMPORTTEXT.out.html.set{ krona } + + versions = versions.mix(KRAKEN2.out.versions) + versions = versions.mix(KRONA_IMPORTTEXT.out.versions) + versions = versions.mix(KRAKENTOOLS_KREPORT2KRONA.out.versions) + + emit: + filtered + log_out + krona + versions +} diff --git a/subworkflows/local/genomes.nf b/subworkflows/local/genomes.nf new file mode 100644 index 00000000..cc3afcaf --- /dev/null +++ b/subworkflows/local/genomes.nf @@ -0,0 +1,52 @@ +include { EDIRECT_EFETCH } from '../../modules/ksumngs/nf-modules/edirect/efetch/main.nf' +include { EDIRECT_ESEARCH } from '../../modules/ksumngs/nf-modules/edirect/esearch/main.nf' + +workflow GENOME_DOWNLOAD { + take: + genome_list + + main: + versions = Channel.empty() + + // Find the strain genomes list + genomeFile = file("${genome_list}", type: 'file') + if (!genomeFile.toFile().exists()) { + genomeFile = file( + "${workflow.projectDir}/genomes/${genome_list}.tsv", + checkIfExists: true, + type: 'file' + ) + } + + // Transform the genome list into a channel + Channel + .fromPath(genomeFile) + .splitCsv(sep: '\t') + .map{ [it[1], it[0]] } + .set{ strain } + + // Transform the TSV genome list into an edirect query + genomeQuery = genomeFile + .readLines() + .collect{ it.split('\t')[1] } + .join(' OR ') + + // Search NCBI for the accession numbers + EDIRECT_ESEARCH(genomeQuery, 'nucleotide') + versions = versions.mix(EDIRECT_ESEARCH.out.versions) + + // Download the matching genomes in fasta format + EDIRECT_EFETCH(EDIRECT_ESEARCH.out.xml, 'fasta', '') + EDIRECT_EFETCH + .out + .txt + .splitFasta(file: true) + .map{ [it.readLines()[0].split(' ')[0].replace('>', ''), it] } + .set{ fasta } + versions = versions.mix(EDIRECT_EFETCH.out.versions) + + emit: + strain // [accession, strain] + fasta // [accession, fasta] + versions +} diff --git a/subworkflows/local/haplotype.nf b/subworkflows/local/haplotype.nf new file mode 100644 index 00000000..7213528c --- /dev/null +++ b/subworkflows/local/haplotype.nf @@ -0,0 +1,41 @@ +#!/usr/bin/env nextflow +nextflow.enable.dsl = 2 + +include { CLIQUESNV_ILLUMINA } from '../../modules/ksumngs/nf-modules/cliquesnv/illumina/main.nf' +include { HAPLINK_HAPLOTYPES } from '../../modules/local/haplink/haplotypes' +include { HAPLINK_SEQUENCES } from '../../modules/local/haplink/sequences' +include { JSON2YAML } from '../../modules/local/json2yaml.nf' + +workflow HAPLOTYPING { + take: + bam + vcf + reference + + main: + versions = Channel.empty() + + if (params.platform == 'illumina') { + CLIQUESNV_ILLUMINA(bam) + CLIQUESNV_ILLUMINA.out.fasta.set{ fasta } + versions = versions.mix(CLIQUESNV_ILLUMINA.out.versions) + + JSON2YAML(CLIQUESNV_ILLUMINA.out.json) + JSON2YAML.out.yaml.set{ yaml } + versions = versions.mix(JSON2YAML.out.versions) + } + else { + HAPLINK_HAPLOTYPES(bam.join(vcf).join(reference)) + HAPLINK_HAPLOTYPES.out.yaml.set{ yaml } + versions = versions.mix(HAPLINK_HAPLOTYPES.out.versions) + + HAPLINK_SEQUENCES(HAPLINK_HAPLOTYPES.out.yaml.join(reference)) + HAPLINK_SEQUENCES.out.fasta.set{ fasta } + versions = versions.mix(HAPLINK_SEQUENCES.out.versions) + } + + emit: + yaml + fasta + versions +} diff --git a/subworkflows/ingest.nf b/subworkflows/local/ingest.nf similarity index 95% rename from subworkflows/ingest.nf rename to subworkflows/local/ingest.nf index 0e57c05b..5372855a 100644 --- a/subworkflows/ingest.nf +++ b/subworkflows/local/ingest.nf @@ -1,8 +1,8 @@ #!/usr/bin/env nextflow nextflow.enable.dsl = 2 -include { CAT_FASTQ } from '../modules/ksumngs/nf-modules/cat/fastq/main.nf' -include { SEQKIT_SPLIT2 } from '../modules/nf-core/modules/seqkit/split2/main.nf' +include { CAT_FASTQ } from '../../modules/ksumngs/nf-modules/cat/fastq/main.nf' +include { SEQKIT_SPLIT2 } from '../../modules/nf-core/modules/seqkit/split2/main.nf' /// summary: | /// Take reads from the input folder or a samplesheet and reformat them to be diff --git a/subworkflows/local/phylogenetics.nf b/subworkflows/local/phylogenetics.nf new file mode 100644 index 00000000..87912e87 --- /dev/null +++ b/subworkflows/local/phylogenetics.nf @@ -0,0 +1,115 @@ +#!/usr/bin/env nextflow +nextflow.enable.dsl = 2 + +include { MAFFT } from '../../modules/nf-core/modules/mafft/main.nf' +include { RAXMLNG_BOOTSTRAP } from '../../modules/ksumngs/nf-modules/raxmlng/bootstrap/main.nf' +include { RAXMLNG_PARSE } from '../../modules/ksumngs/nf-modules/raxmlng/parse/main.nf' +include { RAXMLNG_SEARCH } from '../../modules/ksumngs/nf-modules/raxmlng/search/main.nf' +include { RAXMLNG_SUPPORT } from '../../modules/ksumngs/nf-modules/raxmlng/support/main.nf' + +/// summary: Create a phylogenetic tree +/// input: +/// - tuple: +/// - name: prefix +/// type: val(String) +/// description: Sample identifier +/// - name: Alignment +/// type: file +/// description: | +/// Plain-text multi-alignment file. RAxML-NG supports FASTA, PHYLIP, and +/// CATG formats +/// output: +/// - tuple: +/// - type: val(String) +/// description: Sample identifier +/// - type: path +/// description: Annotated support tree +workflow PHYLOGENETIC_TREE { + take: + haplotype_fasta + consensus_fasta + genome_fasta + genome_strain + + main: + versions = Channel.empty() + + // This beautiful one-liner deserves docs of a full-blown process. + // In fact, it actually cuts out the need for a class of finicky R + // processes, so here it is step-by-step + // 1. Split the id'ed fasta haplotypes into record objects + // 2. Remove any consensus sequences (more of a concern with HapLink) + // 3. Reassign an id to HapLink haplotypes based on the SHA1 hash that + // HapLink assigns to each haplotype + // 4. Reasign an id to CliqueSNV haplotypes based on the ID_NUM_FREQ + // extended tag that CliqueSNV assigns to each haplotype + // 5. Create a fasta string with all the sequences + // 6. Create a file out of it + haplotype_fasta + .splitFasta(record: [id: true, sequence: true], elem: 1) + .filter{ !it[1].id.toLowerCase().contains('consensus') } + .map{ [it[0], [id:it[1].id, sequence:it[1].sequence]] } + .map{ + [ + it[0], + [ + id: it[1].id ==~ /^[0-9a-f]{8}$/ ? + "${it[0].id}_haplotype_${it[1].id}" : it[1].id, + sequence: it[1].sequence + ] + ] + } + .map{ + [ + it[0], + [ + id: it[1].id ==~ /^.+_[0-9]+_[0-1]\.[0-9]{2}$/ ? + "${it[0].id}_haplotype_${it[1].id.split('_')[1]}" : it[1].id, + sequence: it[1].sequence + ] + ] + } + .map{ ">${it[1].id}\n${it[1].sequence}" } + .set{ ch_renamed_haplotype } + + consensus_fasta + .splitFasta(record: [sequence: true], elem: 1) + .map{ ">${it[0].id}_consensus\n${it[1].sequence}" } + .set{ ch_renamed_consensus } + + genome_strain // [accession, strain] + .join(genome_fasta) // [accession, strain, fasta] + .map{ it.drop(1) } // [strain, fasta] + .map{ [it[0].contains('ROOT') ? 'ROOT' : it[0], it[1]] } + .splitFasta(record: [sequence: true], elem: 1) // [strain, [sequence]] + .map{ ">${it[0]}\n${it[1].sequence}" } + .set{ ch_renamed_genome } + + ch_renamed_haplotype + .mix(ch_renamed_consensus) + .mix(ch_renamed_genome) + .collectFile(name: 'sequences.fasta') + .map{ [[id: 'collective', single_end: null, strandedness: null], it] } + .set{ ch_all_sequences } + + MAFFT(ch_all_sequences) + versions = versions.mix(MAFFT.out.versions) + + RAXMLNG_PARSE(MAFFT.out.fas.map{ it[1] }) + versions = versions.mix(RAXMLNG_PARSE.out.versions) + + RAXMLNG_SEARCH(RAXMLNG_PARSE.out.rba) + versions = versions.mix(RAXMLNG_SEARCH.out.versions) + + RAXMLNG_BOOTSTRAP(RAXMLNG_PARSE.out.rba) + versions = versions.mix(RAXMLNG_BOOTSTRAP.out.versions) + + RAXMLNG_SUPPORT(RAXMLNG_SEARCH.out.best_tree, RAXMLNG_BOOTSTRAP.out.bootstraps) + versions = versions.mix(RAXMLNG_SUPPORT.out.versions) + + tree = RAXMLNG_SUPPORT.out.support + + emit: + tree + versions +} diff --git a/subworkflows/local/presentation.nf b/subworkflows/local/presentation.nf new file mode 100644 index 00000000..2392902e --- /dev/null +++ b/subworkflows/local/presentation.nf @@ -0,0 +1,93 @@ +include { HAPLOTYPECONVERT } from '../../modules/local/haplotypeconvert' +include { IGV } from '../../modules/local/igv' +include { PHYLOTREEJS } from '../../modules/local/phylotreejs' +include { SEQUENCETABLE } from '../../modules/local/sequencetable' + +workflow PRESENTATION { + take: + bam + reference + strain + accession + consensus + haplotype_fasta + haplotype_yaml + tree + + main: + versions = Channel.empty() + + HAPLOTYPECONVERT( + strain + .join(accession) + .join(consensus) + .join(haplotype_fasta, remainder: true) + .join(haplotype_yaml, remainder: true) + .map{ it[4] ? it : [it[0], it[1], it[2], it[3], [], it[5]] } + .map{ it[5] ? it : [it[0], it[1], it[2], it[3], it[4], []] }, + reference + ) + HAPLOTYPECONVERT + .out + .yaml + .map{ it[1] } + .collectFile(name: 'collated_haplotypes.yml', newLine: true) + .set{ ch_collected_haplotypes } + versions = versions.mix(HAPLOTYPECONVERT.out.versions) + + freezetable_js = file(params.freezetable_js, checkIfExists: true) + sequencetable_template = file( + "${workflow.projectDir}/assets/kelpie_mqc.html", checkIfExists: true + ) + tool_meta = [] + if (params.platform == 'illumina') { + tool_meta = file( + "${workflow.projectDir}/assets/cliquesnv_info.yml", checkIfExists: true + ) + } + else if (params.platform == 'nanopore') { + tool_meta = file( + "${workflow.projectDir}/assets/haplink_info.yml", checkIfExists: true + ) + } + SEQUENCETABLE( + ch_collected_haplotypes, + reference, + sequencetable_template, + tool_meta, + freezetable_js + ) + SEQUENCETABLE.out.mqc_html.set{ seqtable } + versions = versions.mix(SEQUENCETABLE.out.versions) + + igv_js = file(params.igv_js, checkIfExists: true) + igv_template = file("${workflow.projectDir}/assets/igv_mqc.html", checkIfExists: true) + IGV( + bam + .map{ "${it[0].id}" } + .collectFile(name: 'samplenames.txt', newLine: true), + igv_js, + igv_template + ) + IGV.out.mqc_html.set{ igv } + versions = versions.mix(IGV.out.versions) + + phylotree_css = file(params.phylotree_css, checkIfExists: true) + d3_js = file(params.d3_js, checkIfExists: true) + underscore_js = file(params.underscore_js, checkIfExists: true) + phylotree_js = file(params.phylotree_js, checkIfExists: true) + phylotree_template = file( + "${workflow.projectDir}/assets/phylotree_mqc.html", checkIfExists: true + ) + PHYLOTREEJS( + tree, phylotree_template, phylotree_css, d3_js, underscore_js, phylotree_js + ) + PHYLOTREEJS.out.mqc_html.set{ phylotree } + versions = versions.mix(PHYLOTREEJS.out.versions) + + emit: + seqtable + igv + phylotree + versions +} diff --git a/subworkflows/qc.nf b/subworkflows/local/qc.nf similarity index 74% rename from subworkflows/qc.nf rename to subworkflows/local/qc.nf index 2a11a11b..95e27ca9 100644 --- a/subworkflows/qc.nf +++ b/subworkflows/local/qc.nf @@ -1,9 +1,9 @@ #!/usr/bin/env nextflow nextflow.enable.dsl = 2 -include { FASTQC } from '../modules/nf-core/modules/fastqc/main.nf' -include { NANOSTAT } from '../modules/ksumngs/nf-modules/nanostat/main.nf' -include { SEQTK_MERGEPE } from '../modules/nf-core/modules/seqtk/mergepe/main.nf' +include { FASTQC } from '../../modules/nf-core/modules/fastqc/main.nf' +include { NANOSTAT } from '../../modules/ksumngs/nf-modules/nanostat/main.nf' +include { SEQTK_MERGEPE } from '../../modules/nf-core/modules/seqtk/mergepe/main.nf' /// summary: | /// Perform context-sensitive QC on fastq reads diff --git a/subworkflows/reference.nf b/subworkflows/local/reference.nf similarity index 66% rename from subworkflows/reference.nf rename to subworkflows/local/reference.nf index 6d3e472f..7d38df8f 100644 --- a/subworkflows/reference.nf +++ b/subworkflows/local/reference.nf @@ -1,11 +1,11 @@ #!/usr/bin/env nextflow nextflow.enable.dsl = 2 -include { EDIRECT_EFETCH } from '../modules/ksumngs/nf-modules/edirect/efetch/main.nf' -include { EDIRECT_ESEARCH } from '../modules/ksumngs/nf-modules/edirect/esearch/main.nf' -include { SAMTOOLS_FAIDX } from '../modules/nf-core/modules/samtools/faidx/main.nf' +include { EDIRECT_EFETCH } from '../../modules/ksumngs/nf-modules/edirect/efetch/main.nf' +include { EDIRECT_ESEARCH } from '../../modules/ksumngs/nf-modules/edirect/esearch/main.nf' +include { SAMTOOLS_FAIDX } from '../../modules/nf-core/modules/samtools/faidx/main.nf' -workflow GENOME_DOWNLOAD { +workflow REFERENCE_DOWNLOAD { main: versions = Channel.empty() diff --git a/subworkflows/trimming.nf b/subworkflows/local/trimming.nf similarity index 79% rename from subworkflows/trimming.nf rename to subworkflows/local/trimming.nf index b8988a2c..9670f560 100644 --- a/subworkflows/trimming.nf +++ b/subworkflows/local/trimming.nf @@ -1,8 +1,8 @@ #!/usr/bin/env nextflow nextflow.enable.dsl = 2 -include { NANOFILT } from '../modules/ksumngs/nf-modules/nanofilt/main.nf' -include { TRIMMOMATIC } from '../modules/ksumngs/nf-modules/trimmomatic/main.nf' +include { NANOFILT } from '../../modules/ksumngs/nf-modules/nanofilt/main.nf' +include { TRIMMOMATIC } from '../../modules/ksumngs/nf-modules/trimmomatic/main.nf' workflow TRIMMING { take: diff --git a/subworkflows/local/variants.nf b/subworkflows/local/variants.nf new file mode 100644 index 00000000..c2af115e --- /dev/null +++ b/subworkflows/local/variants.nf @@ -0,0 +1,29 @@ +include { CLIQUESNV_ILLUMINAVC } from '../../modules/ksumngs/nf-modules/cliquesnv/illuminavc/main.nf' +include { HAPLINK_VARIANTS } from '../../modules/local/haplink/variants' + +workflow VARIANTS { + take: + bam + bai + reference + + main: + versions = Channel.empty() + vcf = Channel.empty() + + if (params.platform == 'illumina') { + CLIQUESNV_ILLUMINAVC(bam) + CLIQUESNV_ILLUMINAVC.out.vcf.set{ vcf } + versions = versions.mix(CLIQUESNV_ILLUMINAVC.out.versions.first()) + } + + if (params.platform == 'nanopore') { + HAPLINK_VARIANTS(bam.join(bai).join(reference)) + HAPLINK_VARIANTS.out.vcf.set{ vcf } + versions = versions.mix(HAPLINK_VARIANTS.out.versions.first()) + } + + emit: + vcf + versions +} diff --git a/subworkflows/phylogenetics.nf b/subworkflows/phylogenetics.nf deleted file mode 100644 index 053e1c85..00000000 --- a/subworkflows/phylogenetics.nf +++ /dev/null @@ -1,91 +0,0 @@ -#!/usr/bin/env nextflow -nextflow.enable.dsl = 2 - -include { CAT_CAT } from '../modules/nf-core/modules/cat/cat/main.nf' -include { CAT_FASTQ } from '../modules/ksumngs/nf-modules/cat/fastq/main.nf' -include { MAFFT } from '../modules/nf-core/modules/mafft/main.nf' -include { RAXMLNG_BOOTSTRAP } from '../modules/ksumngs/nf-modules/raxmlng/bootstrap/main.nf' -include { RAXMLNG_PARSE } from '../modules/ksumngs/nf-modules/raxmlng/parse/main.nf' -include { RAXMLNG_SEARCH } from '../modules/ksumngs/nf-modules/raxmlng/search/main.nf' -include { RAXMLNG_SUPPORT } from '../modules/ksumngs/nf-modules/raxmlng/support/main.nf' -include { RENAME_HAPLOTYPES } from '../modules/local/rename-haplotypes.nf' -include { RENAME_NCBI } from '../modules/local/rename-ncbi.nf' - -/// summary: Create a phylogenetic tree -/// input: -/// - tuple: -/// - name: prefix -/// type: val(String) -/// description: Sample identifier -/// - name: Alignment -/// type: file -/// description: | -/// Plain-text multi-alignment file. RAxML-NG supports FASTA, PHYLIP, and -/// CATG formats -/// output: -/// - tuple: -/// - type: val(String) -/// description: Sample identifier -/// - type: path -/// description: Annotated support tree -workflow PHYLOGENETIC_TREE { - take: - sequences - consensus - genomes - genome_table - - main: - versions = Channel.empty() - - RENAME_NCBI(genomes, genome_table) - - CAT_FASTQ( - sequences - .join(consensus) - .map{ [ - ['id':it[0].id, 'single_end': true, 'strandedness': null], - [it[1], it[2]] - ] }, - false - ) - - RENAME_HAPLOTYPES(CAT_FASTQ.out.reads) - - CAT_CAT( - RENAME_NCBI.out.fasta - .mix(RENAME_HAPLOTYPES.out.fasta.map{ it.drop(1) }) - .collect(), - 'genotypes.fasta' - ) - - MAFFT( - CAT_CAT.out.file_out.map{ [ - ['id': 'collective', 'single_end': null, 'strandedness': null], - it - ] } - ) - - RAXMLNG_PARSE(MAFFT.out.fas.map{ it[1] }) - - RAXMLNG_SEARCH(RAXMLNG_PARSE.out.rba) - RAXMLNG_BOOTSTRAP(RAXMLNG_PARSE.out.rba) - - RAXMLNG_SUPPORT(RAXMLNG_SEARCH.out.best_tree, RAXMLNG_BOOTSTRAP.out.bootstraps) - - tree = RAXMLNG_SUPPORT.out.support - - versions = versions.mix(RENAME_NCBI.out.versions) - versions = versions.mix(CAT_FASTQ.out.versions) - versions = versions.mix(RENAME_HAPLOTYPES.out.versions) - versions = versions.mix(CAT_CAT.out.versions) - versions = versions.mix(MAFFT.out.versions) - versions = versions.mix(RAXMLNG_PARSE.out.versions) - versions = versions.mix(RAXMLNG_SEARCH.out.versions) - versions = versions.mix(RAXMLNG_BOOTSTRAP.out.versions) - versions = versions.mix(RAXMLNG_SUPPORT.out.versions) - - emit: - tree - versions -} diff --git a/subworkflows/presentation.nf b/subworkflows/presentation.nf deleted file mode 100644 index 79d49ccb..00000000 --- a/subworkflows/presentation.nf +++ /dev/null @@ -1,89 +0,0 @@ -include { HAPLOTYPE_YAML2TSV } from '../modules/local/haplotype-yaml2tsv.nf' -include { MINIMAP2_ALIGN } from '../modules/nf-core/modules/minimap2/align/main.nf' -include { SEQUENCETABLE } from '../modules/local/sequencetable.nf' - -workflow PRESENTATION { - take: - bam - bai - reference_fasta - reference_fai - consensus_fasta - accession - strain - haplotype_yaml - haplotype_fasta - tree - multiqc - krona - - main: - versions = Channel.empty() - - HAPLOTYPE_YAML2TSV(haplotype_yaml) - HAPLOTYPE_YAML2TSV.out.tsv - .map{ it[1] } - .collectFile(name: 'haplotypes.tsv') - .set{ haplotype_tsv } - - versions = versions.mix(HAPLOTYPE_YAML2TSV.out.versions) - - consensus_fasta - .mix(haplotype_fasta) - .map{ it[1] } - .collectFile(name: 'haplotypes.fasta') - .map{ [ [ 'id': 'haplotypes', 'single_end': true ], it ] } - .set{ haplotype_sequences } - - MINIMAP2_ALIGN(haplotype_sequences, reference_fasta) - - MINIMAP2_ALIGN.out.paf.map{ it[1] }.set{ haplotype_alignment } - - versions = versions.mix(MINIMAP2_ALIGN.out.versions) - - accession - .join(strain) - .map{ [ - it[0].id, - it[1], - it[2] - ] } - .combine(haplotype_tsv.splitCsv(sep: '\t'), by: 0) - .set{ strain_table } - - ECHO2TSV(strain_table) - - ECHO2TSV.out - .collectFile(name: 'haplotype_strains.tsv') - .set{ haplotype_strains } - - SEQUENCETABLE( - haplotype_strains, - haplotype_alignment, - reference_fasta, - tree, - multiqc, - krona - ) - - versions = versions.mix(SEQUENCETABLE.out.versions) - - emit: - versions -} - -process ECHO2TSV { - label 'process_low' - - input: - tuple val(sample), val(accession), val(strain), val(name), val(frequency) - - output: - path "*.tsv" - - script: - """ - echo "${sample}\t${accession}\t${strain}\t${name}\t${frequency}" \\ - > ${sample}_${accession}.tsv - """ -} diff --git a/workflows/yavsap.nf b/workflows/yavsap.nf new file mode 100644 index 00000000..df3c4c5e --- /dev/null +++ b/workflows/yavsap.nf @@ -0,0 +1,314 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VALIDATE INPUTS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) + +// Validate input parameters[](../../../../../../../multiqc_report.html) +WorkflowYavsap.initialise(params, log) + +// Check input path parameters to see if they exist +def checkPathParamList = [ + params.input, + params.multiqc_config, + params.kraken2_db, + params.freezetable_js, + params.igv_js, + params.phylotree_css, + params.d3_js, + params.underscore_js, + params.phylotree_js +] +for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } + +// Check mandatory parameters +if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input does not exist!' } + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + CONFIG FILES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +ch_multiqc_config = file("$projectDir/assets/multiqc_config.yml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty() + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules +// +include { CLOSEST_REFERENCE } from '../subworkflows/local/closest-reference' +include { CONSENSUS } from '../subworkflows/local/consensus' +include { FILTERING } from '../subworkflows/local/filtering.nf' +include { GENOME_DOWNLOAD } from '../subworkflows/local/genomes' +include { HAPLOTYPING } from '../subworkflows/local/haplotype.nf' +include { PHYLOGENETIC_TREE } from '../subworkflows/local/phylogenetics.nf' +include { PRESENTATION } from '../subworkflows/local/presentation.nf' +include { QC } from '../subworkflows/local/qc.nf' +include { READS_INGEST } from '../subworkflows/local/ingest.nf' +include { REFERENCE_DOWNLOAD } from '../subworkflows/local/reference' +include { TRIMMING } from '../subworkflows/local/trimming.nf' +include { VARIANTS } from '../subworkflows/local/variants' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-CORE MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Installed directly from nf-core/modules +// +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main.nf' +include { KRAKEN2_DBPREPARATION } from '../modules/local/kraken2/dbpreparation.nf' +include { MINIMAP2_ALIGN as MINIMAP2_REALIGN } from '../modules/ksumngs/nf-modules/minimap2/align/main' +include { MINIMAP2_ALIGN } from '../modules/nf-core/modules/minimap2/align/main' +include { MULTIQC } from '../modules/nf-core/modules/multiqc/main.nf' +include { SAMTOOLS_INDEX as SAMTOOLS_REINDEX } from '../modules/nf-core/modules/samtools/index/main' +include { SAMTOOLS_INDEX } from '../modules/nf-core/modules/samtools/index/main' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// Info required for completion email and summary +def multiqc_report = [] + +workflow YAVSAP { + + ch_versions = Channel.empty() + + // + // SUBWORKFLOW: Read in samplesheet, validate and stage input files + // + READS_INGEST() + READS_INGEST.out.sample_info.set{ ch_reads } + ch_versions = ch_versions.mix(READS_INGEST.out.versions) + + // + // SUBWORKFLOW: Run read QC + // + ch_qc = Channel.empty() + if (!params.skip_qc) { + QC(ch_reads) + QC.out.report.set{ ch_qc } + ch_versions = ch_versions.mix(QC.out.versions) + } + + // + // SUBWORKFLOW: Trim reads + // + ch_reads.set{ ch_trimmed } + ch_trimlog = Channel.empty() + if (!params.skip_trimming) { + TRIMMING(ch_reads) + TRIMMING.out.fastq.set{ ch_trimmed } + TRIMMING.out.log_out.set{ ch_trimlog } + ch_versions = ch_versions.mix(TRIMMING.out.versions) + } + + // + // SUBWORKFLOW: Kraken2 host read filtering + // + ch_trimmed.set{ ch_filtered } + ch_krona = Channel.empty() + ch_kreport = Channel.empty() + if (!params.skip_filtering) { + FILTERING(ch_trimmed, "${params.kraken2_db}", "${params.keep_taxid}") + FILTERING.out.filtered.set{ ch_filtered } + FILTERING.out.krona.set{ ch_krona } + FILTERING.out.log_out.set{ ch_kreport } + ch_versions = ch_versions.mix(FILTERING.out.versions) + } + + // + // SUBWORKFLOW: Download reference genome from NCBI + // + REFERENCE_DOWNLOAD() + REFERENCE_DOWNLOAD.out.fasta.set{ ch_reference_fasta } + ch_versions = ch_versions.mix(REFERENCE_DOWNLOAD.out.versions) + + // + // MODULE: Align reads into BAM format using minimap2 + // + MINIMAP2_ALIGN(ch_filtered, ch_reference_fasta, true, false, false) + MINIMAP2_ALIGN.out.bam.set{ ch_bam } + ch_versions = ch_versions.mix(MINIMAP2_ALIGN.out.versions.first()) + + // + // MODULE: Index BAM reads using Samtools + // + SAMTOOLS_INDEX(ch_bam) + SAMTOOLS_INDEX.out.bai.set{ ch_bai } + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + // + // SUBWORKFLOW: Find the consensus sequence, falling back to the reference + // sequence in case of failure + // + CONSENSUS(ch_bam, ch_bai, ch_reference_fasta) + CONSENSUS + .out + .fasta + .concat(ch_bam.map{ it[0] }.combine(ch_reference_fasta)) + .unique{ it[0].id } + .set{ ch_consensus_fasta } + ch_versions = ch_versions.mix(CONSENSUS.out.versions) + + // + // SUBWORKFLOW: Download and reformat the strain reference genomes + // + GENOME_DOWNLOAD("${params.genome_list}") + GENOME_DOWNLOAD.out.strain.set{ ch_genome_strain } + GENOME_DOWNLOAD.out.fasta.set{ ch_genome_fasta } + ch_versions = ch_versions.mix(GENOME_DOWNLOAD.out.versions) + + // + // SUBWORKFLOW: Find the closest strain to each consensus sequence, falling + // back to the reference genome and an 'UNDEFINED' strain designation in + // case of failure + // + CLOSEST_REFERENCE(ch_consensus_fasta, ch_genome_strain, ch_genome_fasta) + CLOSEST_REFERENCE + .out + .accession + .concat(ch_consensus_fasta.map{ it[0] }.combine(Channel.of(params.genome))) + .unique{ it[0].id } + .set{ ch_closest_accession } + CLOSEST_REFERENCE + .out + .strain + .concat(ch_consensus_fasta.map{ it[0] }.combine(Channel.of('UNDEFINED'))) + .unique{ it[0].id } + .set{ ch_closest_strain } + CLOSEST_REFERENCE + .out + .fasta + .concat(ch_consensus_fasta.map{ it[0] }.combine(ch_reference_fasta)) + .unique{ it[0].id } + .set{ ch_closest_reference } + ch_versions = ch_versions.mix(CLOSEST_REFERENCE.out.versions) + + // + // MODULE: Realign reads into BAM format using minimap2 + // + MINIMAP2_REALIGN(ch_filtered.join(ch_closest_reference), true, false, false) + MINIMAP2_REALIGN.out.bam.set{ ch_realigned_bam } + ch_versions = ch_versions.mix(MINIMAP2_REALIGN.out.versions.first()) + + // + // MODULE: Index new BAM reads using Samtools + // + SAMTOOLS_REINDEX(ch_realigned_bam) + SAMTOOLS_REINDEX.out.bai.set{ ch_realigned_bai } + ch_versions = ch_versions.mix(SAMTOOLS_REINDEX.out.versions.first()) + + // + // SUBWORKFLOW: Variant calling + // + VARIANTS(ch_realigned_bam, ch_realigned_bai, ch_closest_reference) + VARIANTS.out.vcf.set{ ch_vcf } + ch_versions = ch_versions.mix(VARIANTS.out.versions.first()) + + // + // SUBWORKFLOW: Haplotype calling + // + ch_closest_strain.map{ [it[0], []] }.set{ ch_haplotype_fasta } + ch_closest_strain.map{ [it[0], []] }.set{ ch_haplotype_yaml } + if (!params.skip_haplotype) { + HAPLOTYPING(ch_realigned_bam, ch_vcf, ch_closest_reference) + HAPLOTYPING.out.fasta.set{ ch_haplotype_fasta } + HAPLOTYPING.out.yaml.set { ch_haplotype_yaml } + ch_versions = ch_versions.mix(HAPLOTYPING.out.versions) + } + + // + // SUBWORKFLOW: Phylogenetics + // + ch_tree = Channel.empty() + if (!params.skip_haplotype && !params.skip_phylogenetics) { + PHYLOGENETIC_TREE( + ch_haplotype_fasta, + ch_consensus_fasta, + ch_genome_fasta, + ch_genome_strain + ) + PHYLOGENETIC_TREE.out.tree.set{ ch_tree } + ch_versions = ch_versions.mix(PHYLOGENETIC_TREE.out.versions) + } + + // + // SUBWORKFLOW: Fancy presentations + // + PRESENTATION( + ch_bam, + ch_reference_fasta, + ch_closest_strain, + ch_closest_accession, + ch_consensus_fasta, + ch_haplotype_fasta, + ch_haplotype_yaml, + ch_tree + ) + PRESENTATION.out.seqtable.set{ ch_seqtable_mqc } + PRESENTATION.out.igv.set{ ch_igv_mqc } + PRESENTATION.out.phylotree.set{ ch_phylotree_mqc } + ch_versions = ch_versions.mix(PRESENTATION.out.versions) + + // + // MODULE: Get the versions of each bioinformatics tool + // + CUSTOM_DUMPSOFTWAREVERSIONS ( + ch_versions.unique().collectFile(name: 'collated_versions.yml') + ) + + // + // MODULE: MultiQC + // + workflow_summary = WorkflowYavsap.paramsSummaryMultiqc(workflow, summary_params) + ch_workflow_summary = Channel.value(workflow_summary) + + ch_multiqc_files = Channel.empty() + ch_multiqc_files = ch_multiqc_files.mix(Channel.from(ch_multiqc_config)) + ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) + ch_multiqc_files = ch_multiqc_files.mix(ch_qc.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_trimlog.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_kreport.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_seqtable_mqc.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_igv_mqc.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_phylotree_mqc.ifEmpty([])) + + MULTIQC (ch_multiqc_files.collect()) + multiqc_report = MULTIQC.out.report.toList() + ch_versions = ch_versions.mix(MULTIQC.out.versions) +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + COMPLETION EMAIL AND SUMMARY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow.onComplete { + if (params.email || params.email_on_fail) { + NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) + } + NfcoreTemplate.summary(workflow, params, log) +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/