diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index ea27a58..4ecfbfe 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -2,6 +2,7 @@ "name": "nfcore", "image": "nfcore/gitpod:latest", "remoteUser": "gitpod", + "runArgs": ["--privileged"], // Configure tool-specific properties. "customizations": { diff --git a/.editorconfig b/.editorconfig index b6b3190..9b99008 100644 --- a/.editorconfig +++ b/.editorconfig @@ -22,3 +22,11 @@ indent_size = unset [/assets/email*] indent_size = unset + +# ignore Readme +[README.md] +indent_style = unset + +# ignore python +[*.{py}] +indent_style = unset diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 6d9461b..25e2fd5 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -9,7 +9,9 @@ Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) -> If you need help using or modifying nf-core/readsimulator then the best place to ask is on the nf-core Slack [#readsimulator](https://nfcore.slack.com/channels/readsimulator) channel ([join our Slack here](https://nf-co.re/join/slack)). +:::info +If you need help using or modifying nf-core/readsimulator then the best place to ask is on the nf-core Slack [#readsimulator](https://nfcore.slack.com/channels/readsimulator) channel ([join our Slack here](https://nf-co.re/join/slack)). +::: ## Contribution workflow @@ -25,6 +27,9 @@ If you're not used to this workflow with git, you can start with some [docs from ## Tests +You can optionally test your changes by running the pipeline locally. Then it is recommended to use the `debug` profile to +receive warnings about process selectors and other debug info. Example: `nextflow run . -profile debug,test,docker --outdir `. + When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 3ce6b23..440b520 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -19,6 +19,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/read - [ ] If necessary, also make a PR on the nf-core/readsimulator _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 9555e9f..bd01c3d 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -31,7 +31,7 @@ jobs: } profiles: test_full - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: Tower debug log file path: | diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 1e0d22e..ed0d641 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -25,7 +25,7 @@ jobs: } profiles: test - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: Tower debug log file path: | diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index 8454e5b..c2775b1 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -19,7 +19,7 @@ jobs: # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets - name: Post PR comment if: failure() - uses: mshick/add-pr-comment@v1 + uses: mshick/add-pr-comment@v2 with: message: | ## This PR is against the `master` branch :x: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9d9993b..947c874 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,43 +1,124 @@ -name: nf-core CI # This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors +name: nf-core CI on: - push: - branches: - - dev pull_request: release: types: [published] + merge_group: + types: + - checks_requested + branches: + - master + - dev env: NXF_ANSI_LOG: false + NFT_VER: "0.8.3" + NFT_WORKDIR: "~" + NFT_DIFF: "pdiff" + NFT_DIFF_ARGS: "--line-numbers --expand-tabs=2" concurrency: group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" cancel-in-progress: true jobs: + changes: + name: Check for changes + runs-on: ubuntu-latest + outputs: + # Expose matched filters as job 'tags' output variable + tags: ${{ steps.filter.outputs.changes }} + steps: + - uses: actions/checkout@v3 + - name: Combine all tags.yml files + id: get_username + run: find . -name "tags.yml" -not -path "./.github/*" -exec cat {} + > .github/tags.yml + - name: debug + run: cat .github/tags.yml + - uses: dorny/paths-filter@v2 + id: filter + with: + filters: ".github/tags.yml" + + define_nxf_versions: + name: Choose nextflow versions to test against depending on target branch + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.nxf_versions.outputs.matrix }} + steps: + - id: nxf_versions + run: | + if [[ "${{ github.event_name }}" == "pull_request" && "${{ github.base_ref }}" == "dev" && "${{ matrix.NXF_VER }}" != "latest-everything" ]]; then + echo matrix='["latest-everything"]' | tee -a $GITHUB_OUTPUT + else + echo matrix='["latest-everything", "23.04.0"]' | tee -a $GITHUB_OUTPUT + fi + test: - name: Run pipeline with test data - # Only run on push if this is the nf-core dev branch (merged PRs) - if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/readsimulator') }}" + name: ${{ matrix.tags }} ${{ matrix.profile }} NF ${{ matrix.NXF_VER }} + needs: [changes, define_nxf_versions] + if: needs.changes.outputs.tags != '[]' runs-on: ubuntu-latest strategy: + fail-fast: false matrix: - NXF_VER: - - "23.04.0" - - "latest-everything" + NXF_VER: ${{ fromJson(needs.define_nxf_versions.outputs.matrix) }} + tags: ["${{ fromJson(needs.changes.outputs.tags) }}"] + profile: + - "docker" + steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 with: version: "${{ matrix.NXF_VER }}" - - name: Run pipeline with test data - # TODO nf-core: You can customise CI pipeline run tests as required - # For example: adding multiple test runs with different parameters - # Remember that you can parallelise this by using strategy.matrix + - uses: actions/setup-python@v4 + with: + python-version: "3.11" + architecture: "x64" + + - name: Install pdiff to see diff between nf-test snapshots + run: | + python -m pip install --upgrade pip + pip install pdiff + + - name: Cache nf-test installation + id: cache-software + uses: actions/cache@v3 + with: + path: | + /usr/local/bin/nf-test + /home/runner/.nf-test/nf-test.jar + key: ${{ runner.os }}-${{ env.NFT_VER }}-nftest + + - name: Install nf-test + if: steps.cache-software.outputs.cache-hit != 'true' + run: | + wget -qO- https://code.askimed.com/install/nf-test | bash + sudo mv nf-test /usr/local/bin/ + + - name: Run nf-test run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results + nf-test test --verbose --tag ${{ matrix.tags }} --profile test,"${{ matrix.profile }}" --junitxml=test.xml --tap=test.tap + + - uses: pcolby/tap-summary@v1 + with: + path: >- + test.tap + + - name: Output log on failure + if: failure() + run: | + sudo apt install bat > /dev/null + batcat --decorations=always --color=always ${{ github.workspace }}/.nf-test/tests/*/meta/nextflow.log + + - name: Publish Test Report + uses: mikepenz/action-junit-report@v3 + if: always() # always run even if the previous step fails + with: + report_paths: test.xml diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml index 694e90e..e37cfda 100644 --- a/.github/workflows/clean-up.yml +++ b/.github/workflows/clean-up.yml @@ -10,7 +10,7 @@ jobs: issues: write pull-requests: write steps: - - uses: actions/stale@v7 + - uses: actions/stale@v9 with: stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml new file mode 100644 index 0000000..8a33004 --- /dev/null +++ b/.github/workflows/download_pipeline.yml @@ -0,0 +1,67 @@ +name: Test successful pipeline download with 'nf-core download' + +# Run the workflow when: +# - dispatched manually +# - when a PR is opened or reopened to master branch +# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. +on: + workflow_dispatch: + pull_request: + types: + - opened + branches: + - master + pull_request_target: + branches: + - master + +env: + NXF_ANSI_LOG: false + +jobs: + download: + runs-on: ubuntu-latest + steps: + - name: Install Nextflow + uses: nf-core/setup-nextflow@v1 + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + architecture: "x64" + - uses: eWaterCycle/setup-singularity@v7 + with: + singularity-version: 3.8.3 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install git+https://github.com/nf-core/tools.git@dev + + - name: Get the repository name and current branch set as environment variable + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} + echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} + echo "REPO_BRANCH=${GITHUB_REF#refs/heads/}" >> ${GITHUB_ENV} + + - name: Download the pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + run: | + nf-core download ${{ env.REPO_LOWERCASE }} \ + --revision ${{ env.REPO_BRANCH }} \ + --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ + --compress "none" \ + --container-system 'singularity' \ + --container-library "quay.io" -l "docker.io" -l "ghcr.io" \ + --container-cache-utilisation 'amend' \ + --download-configuration + + - name: Inspect download + run: tree ./${{ env.REPOTITLE_LOWERCASE }} + + - name: Run the downloaded pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index 4c48270..f5b18fa 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -4,7 +4,7 @@ on: types: [created] jobs: - deploy: + fix-linting: # Only run if comment is on a PR with the main repo, and if it contains the magic keywords if: > contains(github.event.comment.html_url, '/pull/') && @@ -13,10 +13,17 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@v3 + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 with: token: ${{ secrets.nf_core_bot_auth_token }} + # indication that the linting is being fixed + - name: React on comment + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: eyes + # Action runs on the issue comment, so we don't get the PR by default # Use the gh cli to check out the PR - name: Checkout Pull Request @@ -24,32 +31,59 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - uses: actions/setup-node@v3 + # Install and run pre-commit + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + with: + python-version: 3.11 - - name: Install Prettier - run: npm install -g prettier @prettier/plugin-php + - name: Install pre-commit + run: pip install pre-commit - # Check that we actually need to fix something - - name: Run 'prettier --check' - id: prettier_status - run: | - if prettier --check ${GITHUB_WORKSPACE}; then - echo "result=pass" >> $GITHUB_OUTPUT - else - echo "result=fail" >> $GITHUB_OUTPUT - fi + - name: Run pre-commit + id: pre-commit + run: pre-commit run --all-files + continue-on-error: true - - name: Run 'prettier --write' - if: steps.prettier_status.outputs.result == 'fail' - run: prettier --write ${GITHUB_WORKSPACE} + # indication that the linting has finished + - name: react if linting finished succesfully + if: steps.pre-commit.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: "+1" - name: Commit & push changes - if: steps.prettier_status.outputs.result == 'fail' + id: commit-and-push + if: steps.pre-commit.outcome == 'failure' run: | git config user.email "core@nf-co.re" git config user.name "nf-core-bot" git config push.default upstream git add . git status - git commit -m "[automated] Fix linting with Prettier" + git commit -m "[automated] Fix code linting" git push + + - name: react if linting errors were fixed + id: react-if-fixed + if: steps.commit-and-push.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: hooray + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: confused + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + issue-number: ${{ github.event.issue.number }} + body: | + @${{ github.actor }} I tried to fix the linting errors, but it didn't work. Please fix them manually. + See [CI log](https://github.com/nf-core/readsimulator/actions/runs/${{ github.run_id }}) for more details. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 888cb4b..81cd098 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -11,74 +11,35 @@ on: types: [published] jobs: - EditorConfig: + pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - - uses: actions/setup-node@v3 - - - name: Install editorconfig-checker - run: npm install -g editorconfig-checker - - - name: Run ECLint check - run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') - - Prettier: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - uses: actions/setup-node@v3 - - - name: Install Prettier - run: npm install -g prettier - - - name: Run Prettier --check - run: prettier --check ${GITHUB_WORKSPACE} - - PythonBlack: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - name: Check code lints with Black - uses: psf/black@stable - - # If the above check failed, post a comment on the PR explaining the failure - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@v1 + - name: Set up Python 3.11 + uses: actions/setup-python@v5 with: - message: | - ## Python linting (`black`) is failing - - To keep the code consistent with lots of contributors, we run automated code consistency checks. - To fix this CI test, please run: - - * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` - * Fix formatting errors in your pipeline: `black .` - - Once you push these changes the test should pass, and you can hide this comment :+1: + python-version: 3.11 + cache: "pip" - We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + - name: Install pre-commit + run: pip install pre-commit - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false + - name: Run pre-commit + run: pre-commit run --all-files nf-core: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: - python-version: "3.8" + python-version: "3.11" architecture: "x64" - name: Install dependencies @@ -99,7 +60,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 0bbcd30..147bcd1 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@v2 + uses: dawidd6/action-download-artifact@v3 with: workflow: linting.yml workflow_conclusion: completed diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml new file mode 100644 index 0000000..21ac3f0 --- /dev/null +++ b/.github/workflows/release-announcements.yml @@ -0,0 +1,68 @@ +name: release-announcements +# Automatic release toot and tweet anouncements +on: + release: + types: [published] + workflow_dispatch: + +jobs: + toot: + runs-on: ubuntu-latest + steps: + - uses: rzr/fediverse-action@master + with: + access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} + host: "mstdn.science" # custom host if not "mastodon.social" (default) + # GitHub event payload + # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release + message: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + + send-tweet: + runs-on: ubuntu-latest + + steps: + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + - name: Install dependencies + run: pip install tweepy==4.14.0 + - name: Send tweet + shell: python + run: | + import os + import tweepy + + client = tweepy.Client( + access_token=os.getenv("TWITTER_ACCESS_TOKEN"), + access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), + consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), + consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), + ) + tweet = os.getenv("TWEET") + client.create_tweet(text=tweet) + env: + TWEET: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} + TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} + TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} + TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} + + bsky-post: + runs-on: ubuntu-latest + steps: + - uses: zentered/bluesky-post-action@v0.1.0 + with: + post: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + env: + BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} + BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} + # diff --git a/.gitignore b/.gitignore index 5124c9a..6cd4828 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,6 @@ results/ testing/ testing* *.pyc +.nf-tests/ +.nf-test/ +.nf-test.log diff --git a/.gitpod.yml b/.gitpod.yml index 25488dc..363d5b1 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -4,6 +4,9 @@ tasks: command: | pre-commit install --install-hooks nextflow self-update + - name: unset JAVA_TOOL_OPTIONS + command: | + unset JAVA_TOOL_OPTIONS vscode: extensions: # based on nf-core.nf-core-extensionpack diff --git a/.nf-core.yml b/.nf-core.yml index 3805dc8..005c4f3 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1 +1,3 @@ repository_type: pipeline +lint: + actions_ci: false diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0c31cdb..af57081 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,10 @@ repos: - repo: https://github.com/pre-commit/mirrors-prettier - rev: "v2.7.1" + rev: "v3.1.0" hooks: - id: prettier + - repo: https://github.com/editorconfig-checker/editorconfig-checker.python + rev: "2.7.3" + hooks: + - id: editorconfig-checker + alias: ec diff --git a/CHANGELOG.md b/CHANGELOG.md index ec9fbd7..e76994e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,12 +3,19 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.0dev - [date] +## v1.0.0 - 2024-02-07 Initial release of nf-core/readsimulator, created with the [nf-core](https://nf-co.re/) template. ### `Added` +- Option to simulate amplicon sequencing reads +- Option to simulate target capture sequencing reads +- Option to simulate metagenomic sequencing reads +- Option to simulate wholegenome sequencing reads +- Samplesheet creation after reads have been simulated +- ncbi-genome-download added to download reference database + ### `Fixed` ### `Dependencies` diff --git a/CITATIONS.md b/CITATIONS.md index 6b7717f..06f6127 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -10,14 +10,96 @@ ## Pipeline tools +- [ART](https://www.niehs.nih.gov/research/resources/software/biostatistics/art/index.cfm) + + > Weichun Huang, Leping Li, Jason R. Myers, Gabor T. Marth, ART: a next-generation sequencing read simulator, Bioinformatics, Volume 28, Issue 4, February 2012, Pages 593–594, https://doi.org/10.1093/bioinformatics/btr708 + +- [bedtools](https://pubmed.ncbi.nlm.nih.gov/20110278/) + + > Quinlan AR and Hall IM, 2010. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 26, 6, pp. 841–842. hhtps://doi.org/10.1093/bioinformatics/btq033 + +- [Bowtie2](https://www.nature.com/articles/nmeth.1923) + + > Langmead, B., Salzberg, S. Fast gapped-read alignment with Bowtie 2. Nat Methods 9, 357–359 (2012). https://doi.org/10.1038/nmeth.1923 + +- [CapSim](https://academic.oup.com/bioinformatics/article/34/5/873/4575140) + + > Minh Duc Cao, Devika Ganesamoorthy, Chenxi Zhou, Lachlan J M Coin, Simulating the dynamics of targeted capture sequencing with CapSim, Bioinformatics, Volume 34, Issue 5, March 2018, Pages 873–874, https://doi.org/10.1093/bioinformatics/btx691 + +- [CRABS](https://onlinelibrary.wiley.com/doi/10.1111/1755-0998.13741) + + > Jeunen, G.-J., Dowle, E., Edgecombe, J., von Ammon, U., Gemmell, N. J., & Cross, H. (2022). crabs—A software program to generate curated reference databases for metabarcoding sequencing data. Molecular Ecology Resources, 00, 1– 14. https://doi.org/10.1111/1755-0998.13741 + - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) - > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. Available online https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. + > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. + +- [InSilicoSeq](https://academic.oup.com/bioinformatics/article/35/3/521/5055123) + + > Gourlé H, Karlsson-Lindsjö O, Hayer J and Bongcam+Rudloff E, Simulating Illumina data with InSilicoSeq. Bioinformatics (2018) doi:10.1093/bioinformatics/bty630 - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. +- [ncbi-genome-download](https://zenodo.org/records/8192486) + + > DOI: 10.5281/zenodo.8192432 + +- [Samtools](https://academic.oup.com/gigascience/article/10/2/giab008/6137722?login=false) + + > Twelve years of SAMtools and BCFtools. Petr Danecek, James K Bonfield, Jennifer Liddle, John Marshall, Valeriu Ohan, Martin O Pollard, Andrew Whitwham, Thomas Keane, Shane A McCarthy, Robert M Davies, Heng Li. GigaScience, Volume 10, Issue 2, February 2021, giab008, https://doi.org/10.1093/gigascience/giab008 + +- [Wgsim](https://github.com/lh3/wgsim) + +## Reference probe/baitset databases + +All reference probe databases are sourced from [ultraconserved](https://www.ultraconserved.org/) + +- [Tetrapods; 2,560 baits for 2,386 UCEs; version 1](https://academic.oup.com/sysbio/article/61/5/717/1735316) + + > Brant C. Faircloth, John E. McCormack, Nicholas G. Crawford, Michael G. Harvey, Robb T. Brumfield, Travis C. Glenn, Ultraconserved Elements Anchor Thousands of Genetic Markers Spanning Multiple Evolutionary Timescales, Systematic Biology, Volume 61, Issue 5, October 2012, Pages 717–726, https://doi.org/10.1093/sysbio/sys004 + +- [Tetrapods; 5,472 baits for 5,060 UCEs; version 1](https://royalsocietypublishing.org/doi/10.1098/rspb.2014.0823) + + > Sun Keping, Meiklejohn Kelly A., Faircloth Brant C., Glenn Travis C., Braun Edward L. and Kimball Rebecca T., 2014 The evolution of peafowl and other taxa with ocelli (eyespots): a phylogenomic approach. Proc. R. Soc. B. 281: 20140823. 20140823. http://doi.org/10.1098/rspb.2014.0823 + +- [Actinopterygians; 2,001 baits for 500 UCEs; version 1](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0065923) + + > Faircloth BC, Sorenson L, Santini F, Alfaro ME (2013) A Phylogenomic Perspective on the Radiation of Ray-Finned Fishes Based upon Targeted Sequencing of Ultraconserved Elements (UCEs). PLOS ONE 8(6): e65923. https://doi.org/10.1371/journal.pone.0065923 + +- [Acanthomorphs; 2,628 baits for 1,314 UCEs; version 1](https://royalsocietypublishing.org/doi/10.1098/rspb.2015.1413) + + > McGee Matthew D., Faircloth Brant C., Borstein Samuel R., Zheng Jimmy, Darrin Hulsey C., Wainwright Peter C. and Alfaro Michael E.. 2016 Replicated divergence in cichlid radiations mirrors a major vertebrate innovation. Proc. R. Soc. B. 283: 20151413. 20151413. http://doi.org/10.1098/rspb.2015.1413 + +- [Arachnida; 14,799 baits for 1,120 UCEs; version 1](https://onlinelibrary.wiley.com/doi/10.1111/1755-0998.12621) + + > Starrett, J., Derkarabetian, S., Hedin, M., Bryson Jr. R. W., McCormack. J. E., & Faircloth. B. C. (2016). High phylogenetic utility of an Ultraconserved element probe set designed for Arachnida. Molecular Ecology Resources. 17(4), 812-823. https://doi.org/10.1111/1755-0998.12621 + +- [Coleoptera; 13,674 baits for 1,172 UCEs; version 1](https://resjournals.onlinelibrary.wiley.com/doi/10.1111/syen.12244) + + > Baca. S. M., Alexander. A., Gustafson. G. T., & Short. A. E. Z. (2017). Ultraconserved elements show utility in phylogenetic inference of Adephaga (Coleoptera) and suggest paraphyly of Hydradephaga. 42(4), 786-795. https://doi.org/10.1111/syen.12244 + +- [Diptera; 31,328 baits for 2,711 UCEs; version 1](https://besjournals.onlinelibrary.wiley.com/doi/10.1111/2041-210X.12754) + + > Faircloth. B. C. (2017). Identifying conserved genomic elements and designing universal bait sets to enrich them. Methods in Ecology and Evolution. 8(9), 1103-1112. https://doi.org/10.1111/2041-210X.12754 + +- [Hemiptera; 40,207 baits for 2,731 UCEs; version 1](https://besjournals.onlinelibrary.wiley.com/doi/10.1111/2041-210X.12754) + + > Faircloth. B. C. (2017). Identifying conserved genomic elements and designing universal bait sets to enrich them. Methods in Ecology and Evolution. 8(9), 1103-1112. https://doi.org/10.1111/2041-210X.12754 + +- [Hymenoptera; 2,749 baits for 1,510 UCEs; version 1](https://onlinelibrary.wiley.com/doi/10.1111/1755-0998.12328) + + > Faircloth. B. C., Branstetter. M. G., White. N. D., & Brady. S. G. (2014). Target enrichment of ultraconserved elements from anthropods provides a genomic perspective on elationships among Hymenoptera. Molecular Ecology Resources. 15(3), 489-501. https://doi.org/10.1111/1755-0998.12328 + +- [Hymenoptera; 31,829 baits for 2,590 UCEs; version 2](https://besjournals.onlinelibrary.wiley.com/doi/10.1111/2041-210X.12742) + + > Branstetter. M. G., Longino. J. T., Ward. P. S., & Faircloth. B. C. (2017). Enriching the ant tree of life: enhanced UCE bait set for genome-scale phylogenetics of ants and other Hymenoptera. 8(6), 768-776. https://doi.org/10.1111.2041-210X.12742 + +- [Anthozoa; 16,306 baits for 720 UCEs and 1,071 exons; version 1](https://onlinelibrary.wiley.com/doi/10.1111/1755-0998.12736) + + > Quattrini. A. M., Faircloth. B. C., Duenas. L. F., Bridge. T. C. L., Brugler. M. R., Calixto-Botia. I. F., DeLeo. D. M., Foret. S., Herrera. S., Lee. S. M. Y., Miller. D. J., Prada. C., Radis-Baptista. G., Ramirez-Portilla. C., Sanchez. J. A., Rodriguez. E., & McFadden. C. S. (2017). Universal target-enrichment baits for anthozoan (Cnidaria) phylogenomics: New approaches to long-standing problems. Molecular Ecology Resources. 18(2), 281-295. https://doi.org/10.1111/1755-0998.12736 + ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index f4fd052..c089ec7 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,18 +1,20 @@ -# Code of Conduct at nf-core (v1.0) +# Code of Conduct at nf-core (v1.4) ## Our Pledge -In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core, pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: +In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: - Age +- Ability - Body size +- Caste - Familial status - Gender identity and expression - Geographical location - Level of experience - Nationality and national origins - Native language -- Physical and neurological ability +- Neurodiversity - Race or ethnicity - Religion - Sexual identity and orientation @@ -22,80 +24,133 @@ Please note that the list above is alphabetised and is therefore not ranked in a ## Preamble -> Note: This Code of Conduct (CoC) has been drafted by the nf-core Safety Officer and been edited after input from members of the nf-core team and others. "We", in this document, refers to the Safety Officer and members of the nf-core core team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will amended periodically to keep it up-to-date, and in case of any dispute, the most current version will apply. +:::note +This Code of Conduct (CoC) has been drafted by Renuka Kudva, Cris Tuñí, and Michael Heuer, with input from the nf-core Core Team and Susanna Marquez from the nf-core community. "We", in this document, refers to the Safety Officers and members of the nf-core Core Team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will be amended periodically to keep it up-to-date. In case of any dispute, the most current version will apply. +::: -An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). Our current safety officer is Renuka Kudva. +An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). + +Our Safety Officers are Saba Nafees, Cris Tuñí, and Michael Heuer. nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. -We have therefore adopted this code of conduct (CoC), which we require all members of our community and attendees in nf-core events to adhere to in all our workspaces at all times. Workspaces include but are not limited to Slack, meetings on Zoom, Jitsi, YouTube live etc. +We have therefore adopted this CoC, which we require all members of our community and attendees of nf-core events to adhere to in all our workspaces at all times. Workspaces include, but are not limited to, Slack, meetings on Zoom, gather.town, YouTube live etc. -Our CoC will be strictly enforced and the nf-core team reserve the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. +Our CoC will be strictly enforced and the nf-core team reserves the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. -We ask all members of our community to help maintain a supportive and productive workspace and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. +We ask all members of our community to help maintain supportive and productive workspaces and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. -Questions, concerns or ideas on what we can include? Contact safety [at] nf-co [dot] re +Questions, concerns, or ideas on what we can include? Contact members of the Safety Team on Slack or email safety [at] nf-co [dot] re. ## Our Responsibilities -The safety officer is responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. +Members of the Safety Team (the Safety Officers) are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. -The safety officer in consultation with the nf-core core team have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. +The Safety Team, in consultation with the nf-core core team, have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this CoC, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. -Members of the core team or the safety officer who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and be subject to the same actions as others in violation of the CoC. +Members of the core team or the Safety Team who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and will be subject to the same actions as others in violation of the CoC. -## When are where does this Code of Conduct apply? +## When and where does this Code of Conduct apply? -Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events. This includes but is not limited to the following listed alphabetically and therefore in no order of preference: +Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events, such as hackathons, workshops, bytesize, and collaborative workspaces on gather.town. These guidelines include, but are not limited to, the following (listed alphabetically and therefore in no order of preference): - Communicating with an official project email address. - Communicating with community members within the nf-core Slack channel. - Participating in hackathons organised by nf-core (both online and in-person events). -- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence. -- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, Jitsi, YouTube live etc. +- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence, and on the nf-core gather.town workspace. +- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, gather.town, Jitsi, YouTube live etc. - Representing nf-core on social media. This includes both official and personal accounts. ## nf-core cares 😊 -nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include but are not limited to the following (listed in alphabetical order): +nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include, but are not limited to, the following (listed in alphabetical order): - Ask for consent before sharing another community member’s personal information (including photographs) on social media. - Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. -- Celebrate your accomplishments at events! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) +- Celebrate your accomplishments! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) - Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) - Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) - Focus on what is best for the team and the community. (When in doubt, ask) -- Graciously accept constructive criticism, yet be unafraid to question, deliberate, and learn. +- Accept feedback, yet be unafraid to question, deliberate, and learn. - Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) -- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communications to be kind.**) +- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communication to be kind.**) - Take breaks when you feel like you need them. -- Using welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack.) +- Use welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack) ## nf-core frowns on 😕 -The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this code of conduct. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces. +The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this CoC. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces: - Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. - “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. - Spamming or trolling of individuals on social media. -- Use of sexual or discriminatory imagery, comments, or jokes and unwelcome sexual attention. -- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion or work experience. +- Use of sexual or discriminatory imagery, comments, jokes, or unwelcome sexual attention. +- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion, or work experience. ### Online Trolling -The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the added issue of online trolling. This is unacceptable, reports of such behaviour will be taken very seriously, and perpetrators will be excluded from activities immediately. +The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the risk of online trolling. This is unacceptable — reports of such behaviour will be taken very seriously and perpetrators will be excluded from activities immediately. -All community members are required to ask members of the group they are working within for explicit consent prior to taking screenshots of individuals during video calls. +All community members are **required** to ask members of the group they are working with for explicit consent prior to taking screenshots of individuals during video calls. -## Procedures for Reporting CoC violations +## Procedures for reporting CoC violations If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. -You can reach out to members of the [nf-core core team](https://nf-co.re/about) and they will forward your concerns to the safety officer(s). +You can reach out to members of the Safety Team (Saba Nafees, Cris Tuñí, and Michael Heuer) on Slack. Alternatively, contact a member of the nf-core core team [nf-core core team](https://nf-co.re/about), and they will forward your concerns to the Safety Team. + +Issues directly concerning members of the Core Team or the Safety Team will be dealt with by other members of the core team and the safety manager — possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson and details will be shared in due course. + +All reports will be handled with the utmost discretion and confidentiality. + +You can also report any CoC violations to safety [at] nf-co [dot] re. In your email report, please do your best to include: + +- Your contact information. +- Identifying information (e.g. names, nicknames, pseudonyms) of the participant who has violated the Code of Conduct. +- The behaviour that was in violation and the circumstances surrounding the incident. +- The approximate time of the behaviour (if different than the time the report was made). +- Other people involved in the incident, if applicable. +- If you believe the incident is ongoing. +- If there is a publicly available record (e.g. mailing list record, a screenshot). +- Any additional information. + +After you file a report, one or more members of our Safety Team will contact you to follow up on your report. + +## Who will read and handle reports + +All reports will be read and handled by the members of the Safety Team at nf-core. + +If members of the Safety Team are deemed to have a conflict of interest with a report, they will be required to recuse themselves as per our Code of Conduct and will not have access to any follow-ups. + +To keep this first report confidential from any of the Safety Team members, please submit your first report by direct messaging on Slack/direct email to any of the nf-core members you are comfortable disclosing the information to, and be explicit about which member(s) you do not consent to sharing the information with. + +## Reviewing reports + +After receiving the report, members of the Safety Team will review the incident report to determine whether immediate action is required, for example, whether there is immediate threat to participants’ safety. + +The Safety Team, in consultation with members of the nf-core core team, will assess the information to determine whether the report constitutes a Code of Conduct violation, for them to decide on a course of action. + +In the case of insufficient information, one or more members of the Safety Team may contact the reporter, the reportee, or any other attendees to obtain more information. -Issues directly concerning members of the core team will be dealt with by other members of the core team and the safety manager, and possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson, and details will be shared in due course. +Once additional information is gathered, the Safety Team will collectively review and decide on the best course of action to take, if any. The Safety Team reserves the right to not act on a report. -All reports will be handled with utmost discretion and confidentially. +## Confidentiality + +All reports, and any additional information included, are only shared with the team of safety officers (and possibly members of the core team, in case the safety officer is in violation of the CoC). We will respect confidentiality requests for the purpose of protecting victims of abuse. + +We will not name harassment victims, beyond discussions between the safety officer and members of the nf-core team, without the explicit consent of the individuals involved. + +## Enforcement + +Actions taken by the nf-core’s Safety Team may include, but are not limited to: + +- Asking anyone to stop a behaviour. +- Asking anyone to leave the event and online spaces either temporarily, for the remainder of the event, or permanently. +- Removing access to the gather.town and Slack, either temporarily or permanently. +- Communicating to all participants to reinforce our expectations for conduct and remind what is unacceptable behaviour; this may be public for practical reasons. +- Communicating to all participants that an incident has taken place and how we will act or have acted — this may be for the purpose of letting event participants know we are aware of and dealing with the incident. +- Banning anyone from participating in nf-core-managed spaces, future events, and activities, either temporarily or permanently. +- No action. ## Attribution and Acknowledgements @@ -106,6 +161,22 @@ All reports will be handled with utmost discretion and confidentially. ## Changelog -### v1.0 - March 12th, 2021 +### v1.4 - February 8th, 2022 + +- Included a new member of the Safety Team. Corrected a typographical error in the text. + +### v1.3 - December 10th, 2021 + +- Added a statement that the CoC applies to nf-core gather.town workspaces. Corrected typographical errors in the text. + +### v1.2 - November 12th, 2021 + +- Removed information specific to reporting CoC violations at the Hackathon in October 2021. + +### v1.1 - October 14th, 2021 + +- Updated with names of new Safety Officers and specific information for the hackathon in October 2021. + +### v1.0 - March 15th, 2021 - Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. diff --git a/README.md b/README.md index 8019e4d..db12c52 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,12 @@ -# ![nf-core/readsimulator](docs/images/nf-core-readsimulator_logo_light.png#gh-light-mode-only) ![nf-core/readsimulator](docs/images/nf-core-readsimulator_logo_dark.png#gh-dark-mode-only) +

+ + + nf-core/readsimulator + +

-[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/readsimulator/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![GitHub Actions CI Status](https://github.com/nf-core/readsimulator/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/readsimulator/actions?query=workflow%3A%22nf-core+CI%22) +[![GitHub Actions Linting Status](https://github.com/nf-core/readsimulator/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/readsimulator/actions?query=workflow%3A%22nf-core+linting%22)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/readsimulator/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) @@ -12,58 +18,72 @@ ## Introduction -**nf-core/readsimulator** is a bioinformatics pipeline that ... +**nf-core/readsimulator** is a pipeline to simulate sequencing reads. The pipeline currently supports simulating amplicon, target capture, metagenome, and wholegenome data. It takes a samplesheet with sample names and seeds for random generation to produce simulated FASTQ files and a samplesheet that contains the paths to the FASTQ files. - +

+ nf-core/readsimulator workflow overview +

- - +### Amplicon simulation steps -1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) -2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) +1. Create reference database for amplicon-based sequencing ([`CRABS`](https://github.com/gjeunen/reference_database_creator)) +2. Simulate amplicon Illumina reads ([`art_illumina`](https://manpages.debian.org/testing/art-nextgen-simulation-tools/art_illumina.1.en.html)) +3. Create samplesheet with sample names and paths to simulated read files (header = sample,fastq_1,fastq_2) +4. Simulated read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) +5. Present QC for simulated reads ([`MultiQC`](http://multiqc.info/)) -## Usage +### Target capture simulation steps + +1. Align probes to genome ([`Bowtie2`](https://bowtie-bio.sourceforge.net/bowtie2/index.shtml)) +2. Get SAM index ([`SAMtools`](https://www.htslib.org/)) +3. Simulate target capture reads (Illumina (default) or Pacbio) ([`Japsa capsim`](https://japsa.readthedocs.io/en/latest/tools/jsa.sim.capsim.html)) +4. Create samplesheet with sample names and paths to simulated read files (header = sample,fastq_1,fastq_2) +5. Simulated read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) +6. Present QC for simulated reads ([`MultiQC`](http://multiqc.info/)) + +### Metagenome simulation steps -> **Note** -> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how -> to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) -> with `-profile test` before running the workflow on actual data. +1. Simulate metagenome Illumina reads ([`InsilicoSeq Generate`](https://insilicoseq.readthedocs.io/en/latest/)) +2. Create samplesheet with sample names and paths to simulated read files (header = sample,fastq_1,fastq_2) +3. Simulated read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) +4. Present QC for simulated reads ([`MultiQC`](http://multiqc.info/)) - +Each row represents an output sample. Now, you can run the pipeline using: - - ```bash nextflow run nf-core/readsimulator \ -profile \ --input samplesheet.csv \ + --amplicon \ --outdir ``` -> **Warning:** -> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those -> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; +> [!WARNING] +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; > see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/readsimulator/usage) and the [parameter documentation](https://nf-co.re/readsimulator/parameters). @@ -76,11 +96,13 @@ For more details about the output files and reports, please refer to the ## Credits -nf-core/readsimulator was originally written by Adam Bennett. +nf-core/readsimulator was originally written by [Adam Bennett](https://github.com/a4000) for use at the [Minderoo Foundation's OceanOmics project](https://www.minderoo.org/oceanomics). -We thank the following people for their extensive assistance in the development of this pipeline: +We thank the following people for their extensive assistance in the development of this pipeline (in alphabetical order): - +- [Carson J Miller](https://github.com/CarsonJM) +- [Lauren Huet](https://github.com/LaurenHuet/) +- [Philipp Bayer](https://github.com/philippbayer) ## Contributions and Support @@ -91,7 +113,7 @@ For further information or help, don't hesitate to get in touch on the [Slack `# ## Citations - + diff --git a/assets/email_template.html b/assets/email_template.html index 1137716..f2bc049 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -12,7 +12,7 @@ -

nf-core/readsimulator v${version}

+

nf-core/readsimulator ${version}

Run Name: $runName

<% if (!success){ diff --git a/assets/email_template.txt b/assets/email_template.txt index 6dc04a6..6ca5507 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -4,7 +4,7 @@ |\\ | |__ __ / ` / \\ |__) |__ } { | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, `._,._,' - nf-core/readsimulator v${version} + nf-core/readsimulator ${version} ---------------------------------------------------- Run Name: $runName diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 28d3fd1..7d3650d 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,9 @@ report_comment: > - This report has been generated by the nf-core/readsimulator + + This report has been generated by the nf-core/readsimulator analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. + report_section_order: "nf-core-readsimulator-methods-description": order: -1000 diff --git a/assets/nf-core-readsimulator_logo_light.png b/assets/nf-core-readsimulator_logo_light.png index 7c411bb..6dec241 100644 Binary files a/assets/nf-core-readsimulator_logo_light.png and b/assets/nf-core-readsimulator_logo_light.png differ diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 5f653ab..a359015 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,3 @@ -sample,fastq_1,fastq_2 -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz, +sample,seed +SAMPLE_1,41 +SAMPLE_2,42 diff --git a/assets/schema_input.json b/assets/schema_input.json index 51606d1..c50c66d 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -10,27 +10,17 @@ "sample": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces" + "unique": true, + "errorMessage": "Sample name must be provided and cannot contain spaces", + "meta": ["id"] }, - "fastq_1": { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" - }, - "fastq_2": { - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "seed": { + "type": "integer", + "errorMessage": "Seed must be provided", + "unique": true, + "meta": ["seed"] } }, - "required": ["sample", "fastq_1"] + "required": ["sample", "seed"] } } diff --git a/assets/slackreport.json b/assets/slackreport.json index cfdb0eb..e9d5f8a 100644 --- a/assets/slackreport.json +++ b/assets/slackreport.json @@ -3,7 +3,7 @@ { "fallback": "Plain-text summary of the attachment.", "color": "<% if (success) { %>good<% } else { %>danger<%} %>", - "author_name": "nf-core/readsimulator v${version} - ${runName}", + "author_name": "nf-core/readsimulator ${version} - ${runName}", "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", "fields": [ diff --git a/conf/base.config b/conf/base.config index 43abe99..661772d 100644 --- a/conf/base.config +++ b/conf/base.config @@ -10,7 +10,6 @@ process { - // TODO nf-core: Check the defaults for all processes cpus = { check_max( 1 * task.attempt, 'cpus' ) } memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } @@ -24,7 +23,6 @@ process { // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. // If possible, it would be nice to keep the same label naming convention when // adding in your local modules too. - // TODO nf-core: Customise requirements for specific processes. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { cpus = { check_max( 1 , 'cpus' ) } diff --git a/conf/modules.config b/conf/modules.config index da58a5d..6d45e43 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -18,11 +18,79 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: SAMPLESHEET_CHECK { + withName: ART_ILLUMINA { + ext.args = "-amp -p -na -c ${params.amplicon_read_count}" + publishDir = [ + path: { "${params.outdir}/art_illumina" }, + mode: params.publish_dir_mode + ] + } + + withName: BOWTIE2_ALIGN { + ext.args = "--local --very-sensitive-local --mp 32 --rdg 10,8 --rfg 10,8 -k 10000 -f" + ext.args2 = "-b | samtools sort" + publishDir = [ + path: { "${params.outdir}/bowtie2" }, + mode: params.publish_dir_mode + ] + } + + withName: BOWTIE2_BUILD { + publishDir = [ + path: { "${params.outdir}/bowtie2" }, + mode: params.publish_dir_mode + ] + } + + withName: JAPSA_CAPSIM { + ext.args = [ + "--fmedian ${params.target_capture_fmedian}", + "--fshape ${params.target_capture_fshape}", + "--smedian ${params.target_capture_smedian}", + "--sshape ${params.target_capture_sshape}", + params.target_capture_tmedian ? "--tmedian ${params.target_capture_tmedian}" : "", + params.target_capture_tshape ? "--tshape ${params.target_capture_tshape}" : "", + "--num ${params.target_capture_num}", + params.target_capture_mode == "illumina" ? + "--illen ${params.target_capture_illen}" : "--pblen ${params.target_capture_pblen}", + params.target_capture_mode == "illumina" ? + "--ilmode ${params.target_capture_ilmode} --miseq" : "--pacbio" + ].join(' ').trim() + publishDir = [ + path: { "${params.outdir}/capsim" }, + mode: params.publish_dir_mode + ] + } + + withName: CRABS_DBIMPORT { + ext.args = "--seq_header species --delim ' '" + publishDir = [ + path: { "${params.outdir}/crabs_dbimport" }, + mode: params.publish_dir_mode + ] + } + + withName: CRABS_INSILICOPCR { + ext.args = "--error ${params.amplicon_crabs_ispcr_error} -t 12 --fwd ${params.amplicon_fw_primer} --rev ${params.amplicon_rv_primer}" + publishDir = [ + path: { "${params.outdir}/crabs_insilicopcr" }, + mode: params.publish_dir_mode + ] + } + + withName: CREATE_SAMPLESHEET { + publishDir = [ + path: { "${params.outdir}/samplesheet_individual_samples" }, + mode: params.publish_dir_mode, + enabled: false + ] + } + + withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ path: { "${params.outdir}/pipeline_info" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + pattern: '*_versions.yml' ] } @@ -30,12 +98,91 @@ process { ext.args = '--quiet' } - withName: CUSTOM_DUMPSOFTWAREVERSIONS { + withName: INSILICOSEQ_GENERATE { + ext.args = [ + "--abundance ${params.metagenome_abundance}", + "--n_reads ${params.metagenome_n_reads}", + "--mode ${params.metagenome_mode}", + params.metagenome_mode == "basic" ? "" : "--model ${params.metagenome_model}", + params.metagenome_coverage ? "--coverage ${params.metagenome_coverage}" : "", + params.metagenome_gc_bias ? "--gc_bias ${params.metagenome_gc_bias}" : "" + ].join(' ').trim() publishDir = [ - path: { "${params.outdir}/pipeline_info" }, + path: { "${params.outdir}/insilicoseq" }, + mode: params.publish_dir_mode + ] + } + + withName: MERGE_FASTAS { + publishDir = [ + path: { "${params.outdir}/merged_fastas" }, mode: params.publish_dir_mode, - pattern: '*_versions.yml' + enabled: false + ] + } + + withName: MERGE_SAMPLESHEETS { + publishDir = [ + path: { "${params.outdir}/samplesheet" }, + mode: params.publish_dir_mode + ] + } + + withName: 'MULTIQC' { + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + publishDir = [ + path: { "${params.outdir}/multiqc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: NCBIGENOMEDOWNLOAD { + ext.args = " -N --formats fasta" + publishDir = [ + path: { "${params.outdir}/ncbigenomedownload" }, + mode: params.publish_dir_mode ] } + withName: SAMTOOLS_INDEX { + publishDir = [ + path: { "${params.outdir}/bowtie2" }, + mode: params.publish_dir_mode + ] + } + + withName: UNCOMPRESS_FASTA { + publishDir = [ + path: { "${params.outdir}/uncompress_fasta" }, + mode: params.publish_dir_mode, + enabled: false + ] + } + + withName: UNZIP { + publishDir = [ + path: { "${params.outdir}/probes" }, + mode: params.publish_dir_mode + ] + } + + withName: WGSIM { + ext.args = [ + "-e ${params.wholegenome_error_rate}", + "-d ${params.wholegenome_outer_dist}", + "-s ${params.wholegenome_standard_dev}", + "-N ${params.wholegenome_n_reads}", + "-1 ${params.wholegenome_r1_length}", + "-2 ${params.wholegenome_r2_length}", + "-r ${params.wholegenome_mutation_rate}", + "-R ${params.wholegenome_indel_fraction}", + "-X ${params.wholegenome_indel_extended}" + ].join(' ').trim() + + publishDir = [ + path: { "${params.outdir}/wgsim" }, + mode: params.publish_dir_mode + ] + } } diff --git a/conf/ref_databases.config b/conf/ref_databases.config new file mode 100644 index 0000000..9d38a64 --- /dev/null +++ b/conf/ref_databases.config @@ -0,0 +1,68 @@ +/* + * ----------------------------------------------------------- + * Nextflow config file for reference databases + * ----------------------------------------------------------- + * Defines sources and files for reference databases + * Please also reflect all changes in 'nextflow_schema.json' + * Each entry requires as a minimum: title, url, citation + */ + +params { + probe_ref_db { + 'Tetrapods-UCE-2.5Kv1' { + title = 'Tetrapods; 2,560 baits for 2,386 UCEs; version 1' + url = 'https://ndownloader.figshare.com/files/10513876' + citation = 'Brant C. Faircloth, John E. McCormack, Nicholas G. Crawford, Michael G. Harvey, Robb T. Brumfield, Travis C. Glenn, Ultraconserved Elements Anchor Thousands of Genetic Markers Spanning Multiple Evolutionary Timescales, Systematic Biology, Volume 61, Issue 5, October 2012, Pages 717–726, https://doi.org/10.1093/sysbio/sys004' + } + 'Tetrapods-UCE-5Kv1' { + title = 'Tetrapods; 5,472 baits for 5,060 UCEs; version 1' + url = 'https://ndownloader.figshare.com/files/10513882' + citation = 'Sun Keping, Meiklejohn Kelly A., Faircloth Brant C., Glenn Travis C., Braun Edward L. and Kimball Rebecca T., 2014 The evolution of peafowl and other taxa with ocelli (eyespots): a phylogenomic approach. Proc. R. Soc. B. 281: 20140823. 20140823. http://doi.org/10.1098/rspb.2014.0823' + } + 'Actinopterygians-0.5Kv1' { + title = 'Actinopterygians; 2,001 baits for 500 UCEs; version 1' + url = 'https://ndownloader.figshare.com/files/10513885' + citation = 'Faircloth BC, Sorenson L, Santini F, Alfaro ME (2013) A Phylogenomic Perspective on the Radiation of Ray-Finned Fishes Based upon Targeted Sequencing of Ultraconserved Elements (UCEs). PLOS ONE 8(6): e65923. https://doi.org/10.1371/journal.pone.0065923' + } + 'Acanthomorphs-1Kv1' { + title = 'Acanthomorphs; 2,628 baits for 1,314 UCEs; version 1' + url = 'https://ndownloader.figshare.com/files/11188235' + citation = 'McGee Matthew D., Faircloth Brant C., Borstein Samuel R., Zheng Jimmy, Darrin Hulsey C., Wainwright Peter C. and Alfaro Michael E.. 2016 Replicated divergence in cichlid radiations mirrors a major vertebrate innovation. Proc. R. Soc. B. 283: 20151413. 20151413. http://doi.org/10.1098/rspb.2015.1413' + } + 'Arachnida-1.1Kv1' { + title = 'Arachnida; 14,799 baits for 1,120 UCEs; version 1' + url = 'https://ndownloader.figshare.com/files/6042078' + citation = 'Starrett, J., Derkarabetian, S., Hedin, M., Bryson Jr. R. W., McCormack. J. E., & Faircloth. B. C. (2016). High phylogenetic utility of an Ultraconserved element probe set designed for Arachnida. Molecular Ecology Resources. 17(4), 812-823. https://doi.org/10.1111/1755-0998.12621' + } + 'Coleoptera-1.1Kv1' { + title = 'Coleoptera; 13,674 baits for 1,172 UCEs; version 1' + url = 'https://ndownloader.figshare.com/files/6042081' + citation = 'Baca. S. M., Alexander. A., Gustafson. G. T., & Short. A. E. Z. (2017). Ultraconserved elements show utility in phylogenetic inference of Adephaga (Coleoptera) and suggest paraphyly of Hydradephaga. 42(4), 786-795. https://doi.org/10.1111/syen.12244' + } + 'Diptera-2.7Kv1' { + title = 'Diptera; 31,328 baits for 2,711 UCEs; version 1' + url = 'https://ndownloader.figshare.com/files/6042084' + citation = 'Faircloth. B. C. (2017). Identifying conserved genomic elements and designing universal bait sets to enrich them. Methods in Ecology and Evolution. 8(9), 1103-1112. https://doi.org/10.1111/2041-210X.12754' + } + 'Hemiptera-2.7Kv1' { + title = 'Hemiptera; 40,207 baits for 2,731 UCEs; version 1' + url = 'https://ndownloader.figshare.com/files/6042087' + citation = 'Faircloth. B. C. (2017). Identifying conserved genomic elements and designing universal bait sets to enrich them. Methods in Ecology and Evolution. 8(9), 1103-1112. https://doi.org/10.1111/2041-210X.12754' + } + 'Hymenoptera-1.5Kv1' { + title = 'Hymenoptera; 2,749 baits for 1,510 UCEs; version 1' + url = 'https://ndownloader.figshare.com/files/10513873' + citation = 'Faircloth. B. C., Branstetter. M. G., White. N. D., & Brady. S. G. (2014). Target enrichment of ultraconserved elements from anthropods provides a genomic perspective on elationships among Hymenoptera. Molecular Ecology Resources. 15(3), 489-501. https://doi.org/10.1111/1755-0998.12328' + } + 'Hymenoptera-2.5Kv2' { + title = 'Hymenoptera; 31,829 baits for 2,590 UCEs; version 2' + url = 'https://ndownloader.figshare.com/files/7539658' + citation = 'Branstetter. M. G., Longino. J. T., Ward. P. S., & Faircloth. B. C. (2017). Enriching the ant tree of life: enhanced UCE bait set for genome-scale phylogenetics of ants and other Hymenoptera. 8(6), 768-776. https://doi.org/10.1111.2041-210X.12742' + } + 'Anthozoa-1.7Kv1' { + title = 'Anthozoa; 16,306 baits for 720 UCEs and 1,071 exons; version 1' + url = 'https://ndownloader.figshare.com/files/10513894' + citation = 'Quattrini. A. M., Faircloth. B. C., Duenas. L. F., Bridge. T. C. L., Brugler. M. R., Calixto-Botia. I. F., DeLeo. D. M., Foret. S., Herrera. S., Lee. S. M. Y., Miller. D. J., Prada. C., Radis-Baptista. G., Ramirez-Portilla. C., Sanchez. J. A., Rodriguez. E., & McFadden. C. S. (2017). Universal target-enrichment baits for anthozoan (Cnidaria) phylogenomics: New approaches to long-standing problems. Molecular Ecology Resources. 18(2), 281-295. https://doi.org/10.1111/1755-0998.12736' + } + } +} diff --git a/conf/test.config b/conf/test.config index d7b277f..e472172 100644 --- a/conf/test.config +++ b/conf/test.config @@ -18,12 +18,4 @@ params { max_cpus = 2 max_memory = '6.GB' max_time = '6.h' - - // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' - - // Genome references - genome = 'R64-1-1' } diff --git a/conf/test_data.config b/conf/test_data.config new file mode 100644 index 0000000..81ca3fd --- /dev/null +++ b/conf/test_data.config @@ -0,0 +1,764 @@ +// README: +// https://github.com/nf-core/test-datasets/blob/modules/README.md + +params { + // Base directory for test data + test_data_base = "https://raw.githubusercontent.com/nf-core/test-datasets/modules" + + test_data { + 'sarscov2' { + 'genome' { + genome_fasta = "${params.test_data_base}/data/genomics/sarscov2/genome/genome.fasta" + genome_fasta_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/genome.fasta.gz" + genome_fasta_fai = "${params.test_data_base}/data/genomics/sarscov2/genome/genome.fasta.fai" + genome_fasta_txt_zst = "${params.test_data_base}/data/genomics/sarscov2/genome/genome.fasta.txt.zst" + genome_dict = "${params.test_data_base}/data/genomics/sarscov2/genome/genome.dict" + genome_gff3 = "${params.test_data_base}/data/genomics/sarscov2/genome/genome.gff3" + genome_gff3_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/genome.gff3.gz" + genome_gtf = "${params.test_data_base}/data/genomics/sarscov2/genome/genome.gtf" + genome_paf = "${params.test_data_base}/data/genomics/sarscov2/genome/genome.paf" + genome_sizes = "${params.test_data_base}/data/genomics/sarscov2/genome/genome.sizes" + transcriptome_fasta = "${params.test_data_base}/data/genomics/sarscov2/genome/transcriptome.fasta" + transcriptome_paf = "${params.test_data_base}/data/genomics/sarscov2/genome/transcriptome.paf" + proteome_fasta = "${params.test_data_base}/data/genomics/sarscov2/genome/proteome.fasta" + proteome_fasta_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/proteome.fasta.gz" + + test_bed = "${params.test_data_base}/data/genomics/sarscov2/genome/bed/test.bed" + test_bed_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/bed/test.bed.gz" + test2_bed = "${params.test_data_base}/data/genomics/sarscov2/genome/bed/test2.bed" + test_bed12 = "${params.test_data_base}/data/genomics/sarscov2/genome/bed/test.bed12" + baits_bed = "${params.test_data_base}/data/genomics/sarscov2/genome/bed/baits.bed" + bed_autosql = "${params.test_data_base}/data/genomics/sarscov2/genome/bed/bed6alt.as" + + reference_cnn = "${params.test_data_base}/data/genomics/sarscov2/genome/cnn/reference.cnn" + + kraken2 = "${params.test_data_base}/data/genomics/sarscov2/genome/db/kraken2" + kraken2_tar_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/db/kraken2.tar.gz" + + kraken2_bracken = "${params.test_data_base}/data/genomics/sarscov2/genome/db/kraken2_bracken" + kraken2_bracken_tar_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/db/kraken2_bracken.tar.gz" + + kaiju = "${params.test_data_base}/data/genomics/sarscov2/genome/db/kaiju" + kaiju_tar_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/db/kaiju.tar.gz" + + kofamscan_profiles_tar_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/db/kofamscan/profiles.tar.gz" + kofamscan_ko_list_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/db/kofamscan/ko_list.gz" + + ncbi_taxmap_zip = "${params.test_data_base}/data/genomics/sarscov2/genome/db/maltextract/ncbi_taxmap.zip" + taxon_list_txt = "${params.test_data_base}/data/genomics/sarscov2/genome/db/maltextract/taxon_list.txt" + + mmseqs_tar_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/db/mmseqs.tar.gz" + + all_sites_fas = "${params.test_data_base}/data/genomics/sarscov2/genome/alignment/all_sites.fas" + informative_sites_fas = "${params.test_data_base}/data/genomics/sarscov2/genome/alignment/informative_sites.fas" + + contigs_genome_maf_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz" + contigs_genome_par = "${params.test_data_base}/data/genomics/sarscov2/genome/alignment/last/contigs.genome.par" + lastdb_tar_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/alignment/last/lastdb.tar.gz" + + baits_interval_list = "${params.test_data_base}/data/genomics/sarscov2/genome/picard/baits.interval_list" + targets_interval_list = "${params.test_data_base}/data/genomics/sarscov2/genome/picard/targets.interval_list" + regions_txt = "${params.test_data_base}/data/genomics/sarscov2/genome/graphtyper/regions.txt" + lc_extrap_mr = "${params.test_data_base}/data/delete_me/preseq/SRR1003759_5M_subset.mr" + } + 'illumina' { + test_single_end_bam = "${params.test_data_base}/data/genomics/sarscov2/illumina/bam/test.single_end.bam" + test_single_end_sorted_bam = "${params.test_data_base}/data/genomics/sarscov2/illumina/bam/test.single_end.sorted.bam" + test_single_end_sorted_bam_bai = "${params.test_data_base}/data/genomics/sarscov2/illumina/bam/test.single_end.sorted.bam.bai" + test_paired_end_bam = "${params.test_data_base}/data/genomics/sarscov2/illumina/bam/test.paired_end.bam" + test_paired_end_sorted_bam = "${params.test_data_base}/data/genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam" + test_paired_end_sorted_bam_bai = "${params.test_data_base}/data/genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai" + test_paired_end_methylated_bam = "${params.test_data_base}/data/genomics/sarscov2/illumina/bam/test.paired_end.methylated.bam" + test_paired_end_methylated_sorted_bam = "${params.test_data_base}/data/genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam" + test_paired_end_methylated_sorted_bam_bai = "${params.test_data_base}/data/genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam.bai" + test_unaligned_bam = "${params.test_data_base}/data/genomics/sarscov2/illumina/bam/test.unaligned.bam" + + test_1_fastq_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz" + test_2_fastq_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test_2.fastq.gz" + test_interleaved_fastq_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz" + test_1_fastq_txt_zst = "${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test_1.fastq.txt.zst" + test2_1_fastq_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test2_1.fastq.gz" + test2_2_fastq_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test2_2.fastq.gz" + test_methylated_1_fastq_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test.methylated_1.fastq.gz" + test_methylated_2_fastq_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test.methylated_2.fastq.gz" + test_1_fastq_gz_fastqc_html = "${params.test_data_base}/data/genomics/sarscov2/illumina/fastqc/test_fastqc.html" + test_1_fastq_gz_fastqc_zip = "${params.test_data_base}/data/genomics/sarscov2/illumina/fastqc/test_fastqc.zip" + + test_bedgraph = "${params.test_data_base}/data/genomics/sarscov2/illumina/bedgraph/test.bedgraph" + + test_bigwig = "${params.test_data_base}/data/genomics/sarscov2/illumina/bigwig/test.bigwig" + + test_wig_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/wig/test.wig.gz" + + test_baserecalibrator_table = "${params.test_data_base}/data/genomics/sarscov2/illumina/gatk/test.baserecalibrator.table" + + test_computematrix_mat_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/deeptools/test.computeMatrix.mat.gz" + + test_bcf = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test.bcf" + + test_vcf = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test.vcf" + test_vcf_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test.vcf.gz" + test_vcf_gz_tbi = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi" + test2_vcf = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test2.vcf" + test2_vcf_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test2.vcf.gz" + test2_vcf_gz_tbi = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi" + test2_vcf_targets_tsv_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test2.targets.tsv.gz" + test3_vcf = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test3.vcf" + test3_vcf_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test3.vcf.gz" + test3_vcf_gz_tbi = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi" + + contigs_fasta = "${params.test_data_base}/data/genomics/sarscov2/illumina/fasta/contigs.fasta" + scaffolds_fasta = "${params.test_data_base}/data/genomics/sarscov2/illumina/fasta/scaffolds.fasta" + + assembly_gfa = "${params.test_data_base}/data/genomics/sarscov2/illumina/gfa/assembly.gfa" + assembly_gfa_bgz = "${params.test_data_base}/data/genomics/sarscov2/illumina/gfa/assembly.gfa.bgz" + assembly_gfa_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/gfa/assembly.gfa.gz" + assembly_gfa_zst = "${params.test_data_base}/data/genomics/sarscov2/illumina/gfa/assembly.gfa.zst" + + test_single_end_bam_readlist_txt = "${params.test_data_base}/data/genomics/sarscov2/illumina/picard/test.single_end.bam.readlist.txt" + + SRR13255544_tar_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/sra/SRR13255544.tar.gz" + SRR11140744_tar_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/sra/SRR11140744.tar.gz" + } + 'nanopore' { + test_sorted_bam = "${params.test_data_base}/data/genomics/sarscov2/nanopore/bam/test.sorted.bam" + test_sorted_bam_bai = "${params.test_data_base}/data/genomics/sarscov2/nanopore/bam/test.sorted.bam.bai" + + fast5_tar_gz = "${params.test_data_base}/data/genomics/sarscov2/nanopore/fast5/fast5.tar.gz" + + test_fastq_gz = "${params.test_data_base}/data/genomics/sarscov2/nanopore/fastq/test.fastq.gz" + + test_sequencing_summary = "${params.test_data_base}/data/genomics/sarscov2/nanopore/sequencing_summary/test.sequencing_summary.txt" + } + 'metagenome' { + classified_reads_assignment = "${params.test_data_base}/data/genomics/sarscov2/metagenome/test_1.kraken2.reads.txt" + kraken_report = "${params.test_data_base}/data/genomics/sarscov2/metagenome/test_1.kraken2.report.txt" + krona_taxonomy = "${params.test_data_base}/data/genomics/sarscov2/metagenome/krona_taxonomy.tab" + seqid2taxid_map = "${params.test_data_base}/data/genomics/sarscov2/metagenome/seqid2taxid.map" + nodes_dmp = "${params.test_data_base}/data/genomics/sarscov2/metagenome/nodes.dmp" + names_dmp = "${params.test_data_base}/data/genomics/sarscov2/metagenome/names.dmp" + prot_nodes_dmp = "${params.test_data_base}/data/genomics/sarscov2/metagenome/prot_nodes.dmp" + prot_names_dmp = "${params.test_data_base}/data/genomics/sarscov2/metagenome/prot_names.dmp" + prot_accession2taxid_gz = "${params.test_data_base}/data/genomics/sarscov2/metagenome/prot.accession2taxid.gz" + } + } + 'mus_musculus' { + 'genome' { + rnaseq_samplesheet = "${params.test_data_base}/data/genomics/mus_musculus/rnaseq_expression/SRP254919.samplesheet.csv" + rnaseq_genemeta = "${params.test_data_base}/data/genomics/mus_musculus/rnaseq_expression/SRP254919.gene_meta.tsv" + rnaseq_contrasts = "${params.test_data_base}/data/genomics/mus_musculus/rnaseq_expression/SRP254919.contrasts.csv" + rnaseq_matrix = "${params.test_data_base}/data/genomics/mus_musculus/rnaseq_expression/SRP254919.salmon.merged.gene_counts.top1000cov.tsv" + rnaseq_lengths = "${params.test_data_base}/data/genomics/mus_musculus/rnaseq_expression/SRP254919.spoofed_lengths.tsv" + deseq_results = "${params.test_data_base}/data/genomics/mus_musculus/rnaseq_expression/SRP254919.salmon.merged.deseq2.results.tsv" + genome_19_fasta = "${params.test_data_base}/data/genomics/mus_musculus/genome/chr19.fa.gz" + genome_19_gtf = "${params.test_data_base}/data/genomics/mus_musculus/genome/chr19.filtered.gtf.gz" + } + 'illumina' { + test_1_fastq_gz = "${params.test_data_base}/data/genomics/mus_musculus/mageck/ERR376998.small.fastq.gz" + test_2_fastq_gz = "${params.test_data_base}/data/genomics/mus_musculus/mageck/ERR376999.small.fastq.gz" + genome_config = "${params.test_data_base}/data/genomics/mus_musculus/illumina/10xgenomics/multiome/cellranger_arc_mkref_test_mm39_chr19_config.json" + multiome_lib_csv = "${params.test_data_base}/data/genomics/mus_musculus/illumina/10xgenomics/multiome/lib.csv" + test_scARC_gex_R1_fastq_gz = "${params.test_data_base}/data/genomics/mus_musculus/illumina/10xgenomics/multiome/SRR18907480_chr19_sub_S1_L001_R1_001.fastq.gz" + test_scARC_gex_R2_fastq_gz = "${params.test_data_base}/data/genomics/mus_musculus/illumina/10xgenomics/multiome/SRR18907480_chr19_sub_S1_L001_R2_001.fastq.gz" + test_scARC_atac_R1_fastq_gz = "${params.test_data_base}/data/genomics/mus_musculus/illumina/10xgenomics/multiome/SRR18907481_chr19_sub_S1_L001_R1_001.fastq.gz" + test_scARC_atac_R2_fastq_gz = "${params.test_data_base}/data/genomics/mus_musculus/illumina/10xgenomics/multiome/SRR18907481_chr19_sub_S1_L001_R2_001.fastq.gz" + test_scARC_atac_I2_fastq_gz = "${params.test_data_base}/data/genomics/mus_musculus/illumina/10xgenomics/multiome/SRR18907481_chr19_sub_S1_L001_I2_001.fastq.gz" + } + 'csv' { + count_table = "${params.test_data_base}/data/genomics/mus_musculus/mageck/count_table.csv" + library = "${params.test_data_base}/data/genomics/mus_musculus/mageck/yusa_library.csv" + } + 'txt' { + design_matrix = "${params.test_data_base}/data/genomics/mus_musculus/mageck/design_matrix.txt" + } + } + 'homo_sapiens' { + '10xgenomics' { + cellranger { + test_10x_10k_pbmc_5fb_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/fastqs/5gex/5fb/subsampled_sc5p_v2_hs_PBMC_10k_5fb_S1_L001_R1_001.fastq.gz" + test_10x_10k_pbmc_5fb_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/fastqs/5gex/5fb/subsampled_sc5p_v2_hs_PBMC_10k_5fb_S1_L001_R2_001.fastq.gz" + test_10x_10k_pbmc_5gex_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/fastqs/5gex/5gex/subsampled_sc5p_v2_hs_PBMC_10k_5gex_S1_L001_R1_001.fastq.gz" + test_10x_10k_pbmc_5gex_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/fastqs/5gex/5gex/subsampled_sc5p_v2_hs_PBMC_10k_5gex_S1_L001_R2_001.fastq.gz" + test_10x_10k_pbmc_b_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/fastqs/bcell/subsampled_sc5p_v2_hs_PBMC_10k_b_S1_L001_R1_001.fastq.gz" + test_10x_10k_pbmc_b_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/fastqs/bcell/subsampled_sc5p_v2_hs_PBMC_10k_b_S1_L001_R2_001.fastq.gz" + test_10x_10k_pbmc_t_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/fastqs/tcell/subsampled_sc5p_v2_hs_PBMC_10k_t_S1_L001_R1_001.fastq.gz" + test_10x_10k_pbmc_t_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/fastqs/tcell/subsampled_sc5p_v2_hs_PBMC_10k_t_S1_L001_R2_001.fastq.gz" + test_10x_10k_pbmc_feature_ref_csv = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/sc5p_v2_hs_PBMC_10k_multi_5gex_5fb_b_t_feature_ref.csv" + + test_10x_10k_pbmc_cmo_cmo_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/fastqs/cmo/subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_1_multiplexing_capture_S1_L001_R1_001.fastq.gz" + test_10x_10k_pbmc_cmo_cmo_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/fastqs/cmo/subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_1_multiplexing_capture_S1_L001_R2_001.fastq.gz" + test_10x_10k_pbmc_cmo_gex1_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/fastqs/gex_1/subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_1_gex_S2_L001_R1_001.fastq.gz" + test_10x_10k_pbmc_cmo_gex1_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/fastqs/gex_1/subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_1_gex_S2_L001_R2_001.fastq.gz" + test_10x_10k_pbmc_cmo_gex2_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/fastqs/gex_2/subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_2_gex_S1_L001_R1_001.fastq.gz" + test_10x_10k_pbmc_cmo_gex2_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/fastqs/gex_2/subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_2_gex_S1_L001_R2_001.fastq.gz" + test_10x_10k_pbmc_cmo_feature_ref_csv = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/10k_pbmc_cmo_count_feature_reference.csv" + + test_10x_5k_cmvpos_tcells_ab_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/5k_cmvpos_tcells/fastqs/ab/subsampled_5k_human_antiCMV_T_TBNK_connect_AB_S2_L004_R1_001.fastq.gz" + test_10x_5k_cmvpos_tcells_ab_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/5k_cmvpos_tcells/fastqs/ab/subsampled_5k_human_antiCMV_T_TBNK_connect_AB_S2_L004_R2_001.fastq.gz" + test_10x_5k_cmvpos_tcells_gex1_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/5k_cmvpos_tcells/fastqs/gex_1/subsampled_5k_human_antiCMV_T_TBNK_connect_GEX_1_S1_L001_R1_001.fastq.gz" + test_10x_5k_cmvpos_tcells_gex1_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/5k_cmvpos_tcells/fastqs/gex_1/subsampled_5k_human_antiCMV_T_TBNK_connect_GEX_1_S1_L001_R2_001.fastq.gz" + test_10x_5k_cmvpos_tcells_vdj_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/5k_cmvpos_tcells/fastqs/vdj/subsampled_5k_human_antiCMV_T_TBNK_connect_VDJ_S1_L001_R1_001.fastq.gz" + test_10x_5k_cmvpos_tcells_vdj_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/5k_cmvpos_tcells/fastqs/vdj/subsampled_5k_human_antiCMV_T_TBNK_connect_VDJ_S1_L001_R2_001.fastq.gz" + test_10x_5k_cmvpos_tcells_feature_ref_csv = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/5k_cmvpos_tcells/5k_human_antiCMV_T_TBNK_connect_Multiplex_count_feature_reference.csv" + + test_10x_vdj_ref_json = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/references/vdj/refdata-cellranger-vdj-GRCh38-alts-ensembl-5.0.0/reference.json" + test_10x_vdj_ref_fasta = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/references/vdj/refdata-cellranger-vdj-GRCh38-alts-ensembl-5.0.0/fasta/regions.fa" + test_10x_vdj_ref_suppfasta = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/references/vdj/refdata-cellranger-vdj-GRCh38-alts-ensembl-5.0.0/fasta/supp_regions.fa" + + test_scATAC_1_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger-atac/test_scATAC_S1_L001_R1_001.fastq.gz" + test_scATAC_2_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger-atac/test_scATAC_S1_L001_R2_001.fastq.gz" + test_scATAC_3_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger-atac/test_scATAC_S1_L001_R3_001.fastq.gz" + test_scATAC_I_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger-atac/test_scATAC_S1_L001_I1_001.fastq.gz" + } + spaceranger { + test_10x_ffpe_cytassist_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/spaceranger/human-brain-cancer-11-mm-capture-area-ffpe-2-standard_v2_ffpe_cytassist/CytAssist_11mm_FFPE_Human_Glioblastoma_2_S1_L001_R1_001.fastq.gz" + test_10x_ffpe_cytassist_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/spaceranger/human-brain-cancer-11-mm-capture-area-ffpe-2-standard_v2_ffpe_cytassist/CytAssist_11mm_FFPE_Human_Glioblastoma_2_S1_L001_R2_001.fastq.gz" + test_10x_ffpe_cytassist_image = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/spaceranger/human-brain-cancer-11-mm-capture-area-ffpe-2-standard_v2_ffpe_cytassist/CytAssist_11mm_FFPE_Human_Glioblastoma_image.tif" + test_10x_ffpe_cytassist_probeset = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/spaceranger/human-brain-cancer-11-mm-capture-area-ffpe-2-standard_v2_ffpe_cytassist/CytAssist_11mm_FFPE_Human_Glioblastoma_probe_set.csv" + + test_10x_ffpe_v1_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/spaceranger/human-ovarian-cancer-1-standard_v1_ffpe/Visium_FFPE_Human_Ovarian_Cancer_S1_L001_R1_001.fastq.gz" + test_10x_ffpe_v1_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/spaceranger/human-ovarian-cancer-1-standard_v1_ffpe/Visium_FFPE_Human_Ovarian_Cancer_S1_L001_R2_001.fastq.gz" + test_10x_ffpe_v1_image = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/spaceranger/human-ovarian-cancer-1-standard_v1_ffpe/Visium_FFPE_Human_Ovarian_Cancer_image.jpg" + } + } + 'genome' { + genome_elfasta = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.elfasta" + genome_fasta = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.fasta" + genome_fasta_fai = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.fasta.fai" + genome_fasta_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.fasta.gz" + genome_fasta_gz_fai = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.fasta.gz.fai" + genome_fasta_gz_gzi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.fasta.gz.gzi" + genome_strtablefile = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome_strtablefile.zip" + genome_dict = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.dict" + genome_gff3 = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.gff3" + genome_gtf = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.gtf" + genome_interval_list = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.interval_list" + genome_multi_interval_bed = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.multi_intervals.bed" + genome_blacklist_interval_bed = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.blacklist_intervals.bed" + genome_sizes = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.sizes" + genome_bed = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.bed" + genome_header = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.header" + genome_bed_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.bed.gz" + genome_bed_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.bed.gz.tbi" + genome_elsites = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.elsites" + transcriptome_fasta = "${params.test_data_base}/data/genomics/homo_sapiens/genome/transcriptome.fasta" + genome2_fasta = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome2.fasta" + genome_chain_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.chain.gz" + genome_annotated_interval_tsv = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.annotated_intervals.tsv" + genome_mt_gb = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.NC_012920_1.gb" + genome_preprocessed_count_tsv = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.preprocessed_intervals.counts.tsv" + genome_preprocessed_interval_list = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.preprocessed_intervals.interval_list" + genome_ploidy_model = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.ploidy_model.tar.gz" + genome_ploidy_calls = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.ploidy_calls.tar.gz" + genome_germline_cnv_model = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.germline_cnv_model.tar.gz" + genome_germline_cnv_calls = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.germline_cnv_calls.tar.gz" + genome_motifs = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome_motifs.txt" + genome_config = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome_config.json" + + genome_1_fasta = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr1/genome.fasta.gz" + genome_1_gtf = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr1/genome.gtf" + + genome_21_sdf = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome_sdf.tar.gz" + genome_21_fasta = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta" + genome_21_fasta_fai = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai" + genome_21_gencode_gtf = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/chr21_gencode.gtf" + genome_21_dict = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome.dict" + genome_21_sizes = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome.sizes" + genome_21_interval_list = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome.interval_list" + genome_21_annotated_bed = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/annotated.bed" + genome_21_multi_interval_bed = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" + genome_21_multi_interval_antitarget_bed = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.antitarget.bed" + genome_21_multi_interval_bed_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed.gz" + genome_21_multi_interval_bed_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed.gz.tbi" + genome_21_chromosomes_dir = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/chromosomes.tar.gz" + genome_21_reference_cnn = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/reference_chr21.cnn" + genome_21_eigenstrat_snp = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/chr_21.snp" + genome_21_stitch_posfile = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/dbsnp_138.hg38.first_10_biallelic_sites.tsv" + + dbsnp_146_hg38_elsites = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.elsites" + dbsnp_146_hg38_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz" + dbsnp_146_hg38_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi" + gnomad_r2_1_1_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz" + gnomad_r2_1_1_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz.tbi" + mills_and_1000g_indels_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz" + mills_and_1000g_indels_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz.tbi" + syntheticvcf_short_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/syntheticvcf_short.vcf.gz" + syntheticvcf_short_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/syntheticvcf_short.vcf.gz.tbi" + syntheticvcf_short_score = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/syntheticvcf_short.score" + gnomad_r2_1_1_sv_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1-sv.vcf.gz" + gnomad2_r2_1_1_sv_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/gnomAD2.r2.1.1-sv.vcf.gz" + + hapmap_3_3_hg38_21_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/hapmap_3.3.hg38.vcf.gz" + hapmap_3_3_hg38_21_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/hapmap_3.3.hg38.vcf.gz.tbi" + res_1000g_omni2_5_hg38_21_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/1000G_omni2.5.hg38.vcf.gz" + res_1000g_omni2_5_hg38_21_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/1000G_omni2.5.hg38.vcf.gz.tbi" + res_1000g_phase1_snps_hg38_21_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/1000G_phase1.snps.hg38.vcf.gz" + res_1000g_phase1_snps_hg38_21_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/1000G_phase1.snps.hg38.vcf.gz.tbi" + dbsnp_138_hg38_21_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz" + dbsnp_138_hg38_21_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi" + gnomad_r2_1_1_21_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz" + gnomad_r2_1_1_21_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz.tbi" + mills_and_1000g_indels_21_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz" + mills_and_1000g_indels_21_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz.tbi" + haplotype_map = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/haplotype_map.txt" + dbNSFP_4_1a_21_hg38_txt_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/dbNSFP4.1a.21.txt.gz" + dbNSFP_4_1a_21_hg38_txt_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/dbNSFP4.1a.21.txt.gz.tbi" + ngscheckmate_bed = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/SNP_GRCh38_hg38_wChr.bed" + + index_salmon = "${params.test_data_base}/data/genomics/homo_sapiens/genome/index/salmon" + repeat_expansions = "${params.test_data_base}/data/genomics/homo_sapiens/genome/loci/repeat_expansions.json" + justhusky_ped = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/ped/justhusky.ped" + justhusky_minimal_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/ped/justhusky_minimal.vcf.gz" + justhusky_minimal_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/ped/justhusky_minimal.vcf.gz.tbi" + + vcfanno_tar_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/vcfanno/vcfanno_grch38_module_test.tar.gz" + vcfanno_toml = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/vcfanno/vcfanno.toml" + updsites_bed = "${params.test_data_base}/data/genomics/homo_sapiens/genome/updsites.bed" + + prg_input = "${params.test_data_base}/data/genomics/homo_sapiens/genome/PRG_test.zip" + crispr_functional_counts = "${params.test_data_base}/data/genomics/homo_sapiens/genome/tsv/functional_genomics_counts.tsv" + crispr_functional_library = "${params.test_data_base}/data/genomics/homo_sapiens/genome/tsv/library_functional_genomics.tsv" + + vep_cache = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vep.tar.gz" + affy_array_samplesheet = "${params.test_data_base}/data/genomics/homo_sapiens/array_expression/GSE38751.csv" + affy_array_celfiles_tar = "${params.test_data_base}/data/genomics/homo_sapiens/array_expression/GSE38751_RAW.tar" + + } + 'pangenome' { + pangenome_fa = "${params.test_data_base}/data/pangenomics/homo_sapiens/pangenome.fa" + pangenome_fa_bgzip = "${params.test_data_base}/data/pangenomics/homo_sapiens/pangenome.fa.gz" + pangenome_fa_bgzip_fai = "${params.test_data_base}/data/pangenomics/homo_sapiens/pangenome.fa.gz.fai" + pangenome_fa_bgzip_gzi = "${params.test_data_base}/data/pangenomics/homo_sapiens/pangenome.fa.gz.gzi" + pangenome_paf = "${params.test_data_base}/data/pangenomics/homo_sapiens/pangenome.paf" + pangenome_paf_gz = "${params.test_data_base}/data/pangenomics/homo_sapiens/pangenome.paf.gz" + pangenome_panacus_tsv = "${params.test_data_base}/data/pangenomics/homo_sapiens/pangenome.panacus.tsv" + pangenome_seqwish_gfa = "${params.test_data_base}/data/pangenomics/homo_sapiens/pangenome.seqwish.gfa" + pangenome_smoothxg_gfa = "${params.test_data_base}/data/pangenomics/homo_sapiens/pangenome.smoothxg.gfa" + pangenome_gfaffix_gfa = "${params.test_data_base}/data/pangenomics/homo_sapiens/pangenome.gfaffix.gfa" + 'odgi' { + pangenome_og = "${params.test_data_base}/data/pangenomics/homo_sapiens/odgi/pangenome.og" + pangenome_lay = "${params.test_data_base}/data/pangenomics/homo_sapiens/odgi/pangenome.lay" + } + } + 'illumina' { + test_paired_end_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam" + test_paired_end_sorted_bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai" + test_paired_end_name_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test.paired_end.name.sorted.bam" + test_paired_end_markduplicates_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam" + test_paired_end_markduplicates_sorted_bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam.bai" + test_paired_end_markduplicates_sorted_referencesn_txt = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.referencesn.txt" + test_paired_end_recalibrated_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam" + test_paired_end_recalibrated_sorted_bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai" + test_paired_end_umi_consensus_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_consensus.bam" + test_paired_end_umi_converted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_converted.bam" + test_paired_end_umi_grouped_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_grouped.bam" + test_paired_end_umi_histogram_txt = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_histogram.txt" + test_paired_end_umi_unsorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_unsorted.bam" + test_paired_end_umi_unsorted_tagged_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.unsorted_tagged.bam" + test_paired_end_hla = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/example_hla_pe.bam" + test_paired_end_hla_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/example_hla_pe.sorted.bam" + test_paired_end_hla_sorted_bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/example_hla_pe.sorted.bam.bai" + test_rna_paired_end_sorted_chr6_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test.rna.paired_end.sorted.chr6.bam" + test_rna_paired_end_sorted_chr6_bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test.rna.paired_end.sorted.chr6.bam.bai" + + test2_paired_end_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam" + test2_paired_end_sorted_bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai" + test2_paired_end_name_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.name.sorted.bam" + test2_paired_end_markduplicates_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.markduplicates.sorted.bam" + test2_paired_end_markduplicates_sorted_bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.markduplicates.sorted.bam.bai" + test2_paired_end_recalibrated_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam" + test2_paired_end_recalibrated_sorted_bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai" + test2_paired_end_umi_consensus_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_consensus.bam" + test2_paired_end_umi_converted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_converted.bam" + test2_paired_end_umi_grouped_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_grouped.bam" + test2_paired_end_umi_histogram_txt = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_histogram.txt" + test2_paired_end_umi_unsorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_unsorted.bam" + test2_paired_end_umi_unsorted_tagged_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.unsorted_tagged.bam" + test_paired_end_duplex_umi_unmapped_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_unmapped.bam" + test_paired_end_duplex_umi_mapped_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_mapped.bam" + test_paired_end_duplex_umi_mapped_tagged_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_mapped_tagged.bam" + test_paired_end_duplex_umi_grouped_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_grouped.bam" + test_paired_end_duplex_umi_duplex_consensus_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_duplex_consensus.bam" + + mitochon_standin_recalibrated_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/mitochon_standin.recalibrated.sorted.bam" + mitochon_standin_recalibrated_sorted_bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/mitochon_standin.recalibrated.sorted.bam.bai" + test_illumina_mt_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test_illumina_mt.bam" + test_illumina_mt_bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test_illumina_mt.bam.bai" + + test3_single_end_markduplicates_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam" + + read_group_settings_txt = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/read_group_settings.txt" + + test_paired_end_sorted_cram = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram" + test_paired_end_sorted_cram_crai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai" + test_paired_end_markduplicates_sorted_cram = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test.paired_end.markduplicates.sorted.cram" + test_paired_end_markduplicates_sorted_cram_crai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test.paired_end.markduplicates.sorted.cram.crai" + test_paired_end_recalibrated_sorted_cram = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram" + test_paired_end_recalibrated_sorted_cram_crai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai" + + test2_paired_end_sorted_cram = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.sorted.cram" + test2_paired_end_sorted_cram_crai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.sorted.cram.crai" + test2_paired_end_markduplicates_sorted_cram = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.markduplicates.sorted.cram" + test2_paired_end_markduplicates_sorted_cram_crai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.markduplicates.sorted.cram.crai" + test2_paired_end_recalibrated_sorted_cram = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram" + test2_paired_end_recalibrated_sorted_cram_crai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai" + test3_paired_end_recalibrated_sorted_cram = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test3.paired_end.recalibrated.sorted.cram" + test3_paired_end_recalibrated_sorted_cram_crai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test3.paired_end.recalibrated.sorted.cram.crai" + + test_1_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz" + test_2_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz" + test_umi_1_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test.umi_1.fastq.gz" + test_umi_2_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test.umi_2.fastq.gz" + test_airrseq_1_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test_airrseq_umi_R1.fastq.gz" + test_airrseq_2_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test_airrseq_R2.fastq.gz" + test2_1_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test2_1.fastq.gz" + test2_2_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test2_2.fastq.gz" + test2_umi_1_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test2.umi_1.fastq.gz" + test2_umi_2_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test2.umi_2.fastq.gz" + test_rnaseq_1_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz" + test_rnaseq_2_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz" + test_paired_end_duplex_umi_1_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test_duplex_umi_1.fastq.gz" + test_paired_end_duplex_umi_2_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test_duplex_umi_2.fastq.gz" + + test_baserecalibrator_table = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/test.baserecalibrator.table" + test2_baserecalibrator_table = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/test2.baserecalibrator.table" + test_pileups_table = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/test.pileups.table" + test2_pileups_table = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/test2.pileups.table" + + test_paired_end_sorted_dragstrmodel = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/test_paired_end_sorted_dragstrmodel.txt" + + test_genomicsdb_tar_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb.tar.gz" + test_pon_genomicsdb_tar_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/test_pon_genomicsdb.tar.gz" + + test2_haplotc_ann_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.ann.vcf.gz" + test2_haplotc_ann_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.ann.vcf.gz.tbi" + test_haplotc_cnn_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz" + test_haplotc_cnn_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz.tbi" + + test2_haplotc_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.vcf.gz" + test2_haplotc_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.vcf.gz.tbi" + + test2_recal = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2.recal" + test2_recal_idx = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2.recal.idx" + test2_tranches = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2.tranches" + test2_allele_specific_recal = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2_allele_specific.recal" + test2_allele_specific_recal_idx = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2_allele_specific.recal.idx" + test2_allele_specific_tranches = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2_allele_specific.tranches" + + test_test2_paired_mutect2_calls_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz" + test_test2_paired_mutect2_calls_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.tbi" + test_test2_paired_mutect2_calls_vcf_gz_stats = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.stats" + test_test2_paired_mutect2_calls_f1r2_tar_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.f1r2.tar.gz" + test_test2_paired_mutect2_calls_artifact_prior_tar_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/test_test2_paired_mutect2_calls.artifact-prior.tar.gz" + test_test2_paired_segmentation_table = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/test_test2_paired.segmentation.table" + test_test2_paired_contamination_table = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/test_test2_paired.contamination.table" + + test_genome_vcf = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf" + test_genome_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz" + test_genome_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi" + test_genome_vcf_idx = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.idx" + + test_genome_vcf_ud = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/svd/test.genome.vcf.UD" + test_genome_vcf_mu = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/svd/test.genome.vcf.mu" + test_genome_vcf_bed = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/svd/test.genome.vcf.bed" + + test2_genome_vcf = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf" + test2_genome_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz" + test2_genome_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz.tbi" + test2_genome_vcf_idx = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.idx" + + test_genome21_indels_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/test.genome_21.somatic_sv.vcf.gz" + test_genome21_indels_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/test.genome_21.somatic_sv.vcf.gz.tbi" + + test_mpileup = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/mpileup/test.mpileup.gz" + test2_mpileup = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/mpileup/test2.mpileup.gz" + + test_broadpeak = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/broadpeak/test.broadPeak" + test2_broadpeak = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/broadpeak/test2.broadPeak" + + test_narrowpeak = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/narrowpeak/test.narrowPeak" + test2_narrowpeak = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/narrowpeak/test2.narrowPeak" + + test_yak = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/yak/test.yak" + test2_yak = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/yak/test2.yak" + + cutandrun_bedgraph_test_1 = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bedgraph/cutandtag_h3k27me3_test_1.bedGraph" + cutandrun_bedgraph_test_2 = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bedgraph/cutandtag_igg_test_1.bedGraph" + na24385_chr22_coverage = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bedgraph/NA24385_coverage.bed" + + empty_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/empty.vcf.gz" + empty_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/empty.vcf.gz.tbi" + + simulated_sv = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz" + simulated_sv_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz.tbi" + simulated_sv2 = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv2.vcf.gz" + simulated_sv2_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv2.vcf.gz.tbi" + + test_rnaseq_vcf = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/test.rnaseq.vcf" + test_sv_vcf = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/sv_query.vcf.gz" + test_sv_vcf_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/sv_query.vcf.gz.tbi" + na24385_chr22_sv_vcf = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz" + na24385_chr22_sv_vcf_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz.tbi" + genmod_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/genmod.vcf.gz" + genmod_annotate_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/test_annotate.vcf.gz" + genmod_models_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/test_models.vcf.gz" + genmod_score_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/test_score.vcf.gz" + + test_mito_vcf = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/NA12878_chrM.vcf.gz" + + test_pytor = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/pytor/test.pytor" + rank_model = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/genmod/svrank_model_-v1.8-.ini" + + test_flowcell = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bcl/flowcell.tar.gz" + test_flowcell_samplesheet = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bcl/flowcell_samplesheet.csv" + + varlociraptor_scenario = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/varlociraptor/scenario.yml" + + contig_ploidy_priors_table = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" + + purecn_ex1_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/purecn/purecn_ex1.bam" + purecn_ex1_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/purecn/purecn_ex1.bam.bai" + purecn_ex1_interval = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/purecn/purecn_ex1_intervals.txt" + purecn_ex1_normal = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/purecn/purecn_ex1_normal.txt.gz" + purecn_ex2_normal = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/purecn/purecn_ex2_normal.txt.gz" + purecn_normalpanel_vcf = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/purecn/purecn_normalpanel.vcf.gz" + purecn_normalpanel_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/purecn/purecn_normalpanel.vcf.gz.tbi" + } + 'pacbio' { + primers = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/fasta/primers.fasta" + alz = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bam/alz.bam" + alzpbi = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bam/alz.bam.pbi" + ccs = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bam/alz.ccs.bam" + ccs_fa = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/fasta/alz.ccs.fasta" + ccs_fa_gz = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/fasta/alz.ccs.fasta.gz" + ccs_fq = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/fastq/alz.ccs.fastq" + ccs_fq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/fastq/alz.ccs.fastq.gz" + ccs_xml = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/xml/alz.ccs.consensusreadset.xml" + hifi = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz" + lima = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bam/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.bam" + refine = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bam/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.bam" + cluster = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bam/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.bam" + singletons = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bam/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.bam" + aligned = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bam/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.merged.aligned.bam" + alignedbai = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bam/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.merged.aligned.bam.bai" + genemodel1 = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bed/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.merged.aligned_tc.bed" + genemodel2 = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bed/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.merged.aligned_tc.2.bed" + filelist = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/txt/filelist.txt" + } + 'scramble' { + fasta = "${params.test_data_base}/data/genomics/homo_sapiens/scramble/test.fa" + fasta_fai = "${params.test_data_base}/data/genomics/homo_sapiens/scramble/test.fa.fai" + bam = "${params.test_data_base}/data/genomics/homo_sapiens/scramble/test.bam" + bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/scramble/test.bam.bai" + cram = "${params.test_data_base}/data/genomics/homo_sapiens/scramble/test.cram" + cram_crai = "${params.test_data_base}/data/genomics/homo_sapiens/scramble/test.cram.crai" + bed = "${params.test_data_base}/data/genomics/homo_sapiens/scramble/test.bed" + } + 'gene_set_analysis' { + gct = "${params.test_data_base}/data/genomics/homo_sapiens/gene_set_analysis/P53_6samples_collapsed_symbols.gct" + cls = "${params.test_data_base}/data/genomics/homo_sapiens/gene_set_analysis/P53_6samples.cls" + gmx = "${params.test_data_base}/data/genomics/homo_sapiens/gene_set_analysis/c1.symbols.reduced.gmx" + } + 'cnvkit' { + amplicon_cnr = "https://raw.githubusercontent.com/etal/cnvkit/v0.9.9/test/formats/amplicon.cnr" + amplicon_cns = "https://raw.githubusercontent.com/etal/cnvkit/v0.9.9/test/formats/amplicon.cns" + } + } + 'bacteroides_fragilis' { + 'genome' { + genome_fna_gz = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz" + genome_gbff_gz = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/genome/genome.gbff.gz" + genome_paf = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/genome/genome.paf" + genome_gff_gz = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/genome/genome.gff.gz" + + } + 'hamronization' { + genome_abricate_tsv = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/hamronization/genome.abricate.tsv" + genome_mapping_potential_arg = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/hamronization/genome.mapping.potential.ARG" + } + 'illumina' { + test1_contigs_fa_gz = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/fasta/test1.contigs.fa.gz" + test1_1_fastq_gz = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_1.fastq.gz" + test1_2_fastq_gz = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_2.fastq.gz" + test2_1_fastq_gz = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test2_1.fastq.gz" + test2_2_fastq_gz = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test2_2.fastq.gz" + test1_paired_end_bam = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/bam/test1.bam" + test1_paired_end_sorted_bam = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/bam/test1.sorted.bam" + test1_paired_end_sorted_bam_bai = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/bam/test1.sorted.bam.bai" + test2_paired_end_bam = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/bam/test2.bam" + test2_paired_end_sorted_bam = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/bam/test2.sorted.bam" + test2_paired_end_sorted_bam_bai = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/bam/test2.sorted.bam.bai" + } + 'nanopore' { + test_fastq_gz = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/nanopore/fastq/test.fastq.gz" + overlap_paf = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/nanopore/overlap.paf" + } + } + 'candidatus_portiera_aleyrodidarum' { + 'genome' { + genome_fasta = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/genome.fasta" + genome_sizes = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/genome.sizes" + genome_aln_gz = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/genome.aln.gz" + genome_aln_nwk = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/genome.aln.nwk" + proteome_fasta = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/proteome.fasta" + test1_gff = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/gff/test1.gff" + test2_gff = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/gff/test2.gff" + test3_gff = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/gff/test3.gff" + } + 'illumina' { + test_1_fastq_gz = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/illumina/fastq/test_1.fastq.gz" + test_2_fastq_gz = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/illumina/fastq/test_2.fastq.gz" + test_se_fastq_gz = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/illumina/fastq/test_se.fastq.gz" + } + 'nanopore' { + test_fastq_gz = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/nanopore/fastq/test.fastq.gz" + } + } + 'haemophilus_influenzae' { + 'genome' { + genome_fna_gz = "${params.test_data_base}/data/genomics/prokaryotes/haemophilus_influenzae/genome/genome.fna.gz" + genome_aln_gz = "${params.test_data_base}/data/genomics/prokaryotes/haemophilus_influenzae/genome/genome.aln.gz" + genome_aln_nwk = "${params.test_data_base}/data/genomics/prokaryotes/haemophilus_influenzae/genome/genome.aln.nwk" + } + } + 'generic' { + 'csv' { + test_csv = "${params.test_data_base}/data/generic/csv/test.csv" + } + 'notebooks' { + rmarkdown = "${params.test_data_base}/data/generic/notebooks/rmarkdown/rmarkdown_notebook.Rmd" + ipython_md = "${params.test_data_base}/data/generic/notebooks/jupyter/ipython_notebook.md" + ipython_ipynb = "${params.test_data_base}/data/generic/notebooks/jupyter/ipython_notebook.ipynb" + } + 'tar' { + tar_gz = "${params.test_data_base}/data/generic/tar/hello.tar.gz" + } + 'tsv' { + test_tsv = "${params.test_data_base}/data/generic/tsv/test.tsv" + } + 'txt' { + hello = "${params.test_data_base}/data/generic/txt/hello.txt" + } + 'cooler'{ + test_pairix_pair_gz = "${params.test_data_base}/data/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz" + test_pairix_pair_gz_px2 = "${params.test_data_base}/data/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz.px2" + test_pairs_pair = "${params.test_data_base}/data/genomics/homo_sapiens/cooler/cload/hg19/hg19.sample1.pairs" + test_tabix_pair_gz = "${params.test_data_base}/data/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.sorted.possrt.txt.gz" + test_tabix_pair_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.sorted.possrt.txt.gz.tbi" + hg19_chrom_sizes = "${params.test_data_base}/data/genomics/homo_sapiens/cooler/cload/hg19/hg19.chrom.sizes" + test_merge_cool = "${params.test_data_base}/data/genomics/homo_sapiens/cooler/merge/toy/toy.symm.upper.2.cool" + test_merge_cool_cp2 = "${params.test_data_base}/data/genomics/homo_sapiens/cooler/merge/toy/toy.symm.upper.2.cp2.cool" + + } + 'pairtools' { + mock_4dedup_pairsam = "${params.test_data_base}/data/genomics/homo_sapiens/pairtools/mock.4dedup.pairsam" + mock_4flip_pairs = "${params.test_data_base}/data/genomics/homo_sapiens/pairtools/mock.4flip.pairs" + mock_chrom_sizes = "${params.test_data_base}/data/genomics/homo_sapiens/pairtools/mock.chrom.sizes" + mock_pairsam = "${params.test_data_base}/data/genomics/homo_sapiens/pairtools/mock.pairsam" + mock_sam = "${params.test_data_base}/data/genomics/homo_sapiens/pairtools/mock.sam" + frag_bed = "${params.test_data_base}/data/genomics/homo_sapiens/pairtools/frag.bed" + } + 'estsfs'{ + config_file = "${params.test_data_base}/data/delete_me/estsfs/config-JC.txt" + data_file = "${params.test_data_base}/data/delete_me/estsfs/TEST-DATA.TXT" + seed_file = "${params.test_data_base}/data/delete_me/estsfs/seedfile.txt" + } + 'config' { + ncbi_user_settings = "${params.test_data_base}/data/generic/config/ncbi_user_settings.mkfg" + } + 'unsorted_data' { + 'unsorted_text' { + genome_file = "${params.test_data_base}/data/generic/unsorted_data/unsorted_text/test.genome" + intervals = "${params.test_data_base}/data/generic/unsorted_data/unsorted_text/test.bed" + numbers_csv = "${params.test_data_base}/data/generic/unsorted_data/unsorted_text/test.csv" + } + } + } + 'proteomics' { + 'msspectra' { + ups_file1 = "${params.test_data_base}/data/proteomics/msspectra/OVEMB150205_12.raw" + ups_file2 = "${params.test_data_base}/data/proteomics/msspectra/OVEMB150205_14.raw" + profile_spectra = "${params.test_data_base}/data/proteomics/msspectra/peakpicker_tutorial_1.mzML" + } + 'database' { + yeast_ups = "${params.test_data_base}/data/proteomics/database/yeast_UPS.fasta" + } + 'maxquant' { + mq_contrasts = "${params.test_data_base}/data/proteomics/maxquant/MaxQuant_contrasts.csv" + mq_proteingroups = "${params.test_data_base}/data/proteomics/maxquant/MaxQuant_proteinGroups.txt" + mq_samplesheet = "${params.test_data_base}/data/proteomics/maxquant/MaxQuant_samplesheet.tsv" + mq_proteus_mat = "${params.test_data_base}/data/proteomics/maxquant/proteus.raw_MaxQuant_proteingroups_tab.tsv" + } + 'parameter' { + maxquant = "${params.test_data_base}/data/proteomics/parameter/mqpar.xml" + } + 'openms' { + idxml1 = "${params.test_data_base}/data/proteomics/openms/HepG2_rep1_small.idXML" + idxml2 = "${params.test_data_base}/data/proteomics/openms/HepG2_rep2_small.idXML" + } + 'pdb' { + tim1_pdb = "${params.test_data_base}/data/proteomics/pdb/1tim.pdb" + tim8_pdb = "${params.test_data_base}/data/proteomics/pdb/8tim.pdb" + } + } + 'galaxea_fascicularis' { + hic { + pretext = "${params.test_data_base}/data/genomics/eukaryotes/galaxea_fascicularis/hic/jaGalFasc40_2.pretext" + } + } + 'deilephila_porcellus' { + 'mito' { + ref_fa = "${params.test_data_base}/data/genomics/eukaryotes/deilephila_porcellus/mito/MW539688.1.fasta" + ref_gb = "${params.test_data_base}/data/genomics/eukaryotes/deilephila_porcellus/mito/MW539688.1.gb" + hifi_reads = "${params.test_data_base}/data/genomics/eukaryotes/deilephila_porcellus/mito/ilDeiPorc1.HiFi.reads.fa" + contigs = "${params.test_data_base}/data/genomics/eukaryotes/deilephila_porcellus/mito/ilDeiPorc1.contigs.fa" + } + } + 'imaging' { + 'h5' { + plant_wga = "${params.test_data_base}/data/imaging/h5/plant_wga.h5" + plant_wga_prob = "${params.test_data_base}/data/imaging/h5/plant_wga_probabilities.h5" + } + 'ilp' { + plant_wga_multicut = "${params.test_data_base}/data/imaging/ilp/plant_wga.multicut.ilp" + plant_wga_pixel_class = "${params.test_data_base}/data/imaging/ilp/plant_wga.pixel_prob.ilp" + } + 'tiff' { + mouse_heart_wga = "${params.test_data_base}/data/imaging/tiff/mindagap.mouse_heart.wga.tiff" + } + 'ome-tiff' { + cycif_tonsil_channels = "${params.test_data_base}/data/imaging/ome-tiff/cycif-tonsil-channels.csv" + cycif_tonsil_cycle1 = "${params.test_data_base}/data/imaging/ome-tiff/cycif-tonsil-cycle1.ome.tif" + cycif_tonsil_cycle2 = "${params.test_data_base}/data/imaging/ome-tiff/cycif-tonsil-cycle2.ome.tif" + cycif_tonsil_cycle3 = "${params.test_data_base}/data/imaging/ome-tiff/cycif-tonsil-cycle3.ome.tif" + cycif_tonsil_dfp = "${params.test_data_base}/data/imaging/ome-tiff/cycif-tonsil-dfp.ome.tif" + cycif_tonsil_ffp = "${params.test_data_base}/data/imaging/ome-tiff/cycif-tonsil-ffp.ome.tif" + } + 'registration' { + markers = "${params.test_data_base}/data/imaging/registration/markers.csv" + cycle1 = "${params.test_data_base}/data/imaging/ome-tiff/cycif-tonsil-cycle1.ome.tif" + cycle2 = "${params.test_data_base}/data/imaging/ome-tiff/cycif-tonsil-cycle2.ome.tif" + } + 'segmentation' { + markers = "${params.test_data_base}/data/imaging/segmentation/markers.csv" + image = "${params.test_data_base}/data/imaging/segmentation/cycif_tonsil_registered.ome.tif" + } + 'quantification' { + markers = "${params.test_data_base}/data/imaging/quantification/markers.csv" + image = "${params.test_data_base}/data/imaging/quantification/cycif_tonsil_registered.ome.tif" + mask = "${params.test_data_base}/data/imaging/quantification/cell.ome.tif" + } + 'downstream' { + markers = "${params.test_data_base}/data/imaging/downstream/markers.csv" + cell_feature_array = "${params.test_data_base}/data/imaging/downstream/cycif_tonsil_cell.csv" + } + 'background_subtraction' { + markers = "${params.test_data_base}/data/imaging/background_subtraction/markers.csv" + image = "${params.test_data_base}/data/imaging/background_subtraction/cycif_tonsil_registered.ome.tif" + } + 'core_detection' { + image = "${params.test_data_base}/data/imaging/core_detection/single_core_dapi.tif" + } + } + } +} diff --git a/conf/test_full.config b/conf/test_full.config index 730c542..80d10e7 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -15,10 +15,17 @@ params { config_profile_description = 'Full test dataset to check pipeline function' // Input data for full size test - // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/readsimulator/samplesheet/test_samplesheet.csv' + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/readsimulator/testdata/GCF_024334085.1_ASM2433408v1_genomic.fna.gz' - // Genome references - genome = 'R64-1-1' + amplicon = true + metagenome = true + target_capture = true + wholegenome = true + + amplicon_fw_primer = 'AAAATAAT' + amplicon_rv_primer = 'GATTACTTT' + amplicon_read_count = 1000 + metagenome_n_reads = '100K' + probe_ref_name = 'Diptera-2.7Kv1' } diff --git a/docs/images/nf-core-readsimulator_logo_dark.png b/docs/images/nf-core-readsimulator_logo_dark.png index 8bd1a07..92a0a37 100644 Binary files a/docs/images/nf-core-readsimulator_logo_dark.png and b/docs/images/nf-core-readsimulator_logo_dark.png differ diff --git a/docs/images/nf-core-readsimulator_logo_light.png b/docs/images/nf-core-readsimulator_logo_light.png index 7c411bb..b8a9993 100644 Binary files a/docs/images/nf-core-readsimulator_logo_light.png and b/docs/images/nf-core-readsimulator_logo_light.png differ diff --git a/docs/images/readsimulator_workflow.png b/docs/images/readsimulator_workflow.png new file mode 100755 index 0000000..f6f9aa4 Binary files /dev/null and b/docs/images/readsimulator_workflow.png differ diff --git a/docs/images/readsimulator_workflow.svg b/docs/images/readsimulator_workflow.svg new file mode 100755 index 0000000..95488ec --- /dev/null +++ b/docs/images/readsimulator_workflow.svg @@ -0,0 +1,4 @@ + + + +
Fasta
Fasta
Fasta
Fasta
Wgsim
Wgsim
InSilicoSeq
InSilicoSeq
Whole genome
Whole genome
Metagenome
Metagenome
Amplicon
Amplicon
Target capture
Target capture
CRABS
CRABS
ART
ART
Bowtie2
Bowtie2
Samtools
Samtools
CapSim
CapSim
Create Samplesheet
Create Sam...
Fastqc
Fastqc
Multiqc
Multiqc
Legend
Legend
Reference
Reference
Probes
Probes
Simulated Reads
Simulated Reads
Start
Start
Step
Step
nf-core/
readsimulator
nf-core/...
1.0.0
1.0.0
Text is not SVG - cannot display
\ No newline at end of file diff --git a/docs/output.md b/docs/output.md index 31a4766..eef32fa 100644 --- a/docs/output.md +++ b/docs/output.md @@ -6,15 +6,95 @@ This document describes the output produced by the pipeline. Most of the plots a The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. - - ## Pipeline overview The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: +- [ART](#art) - Simulated amplicon reads +- [bedtools](#bedtools) - Probe fasta file +- [Bowtie2](#bowtie2) - Alignments and index files +- [CapSim](#capsim) - Simulated target capture reads +- [CRABS](#crabs) - Reference database formatted for amplicon read simulation - [FastQC](#fastqc) - Raw read QC +- [InSilicoSeq](#insilicoseq) - Simulated metagenomic reads - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline +- [ncbi-genome-download](#ncbi-genome-download) - Reference fasta files - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution +- [Samplesheet](#samplesheet) - Samplesheets produced during the running of the pipeline +- [Unzip](#unzip) - Unziped probe file +- [Wgsim](#wgsim) - Simulated wholegenome reads + +### ART + +
+Output files + +- `art_illumina/` + - `*1.fq.gz`: Read 1 files simulating Illumina reads. The prefix will be the sample name found in the samplesheet. + - `*2.fq.gz`: Read 2 files simulating Illumina reads. The prefix will be the sample name found in the samplesheet. + +
+ +[ART](https://www.niehs.nih.gov/research/resources/software/biostatistics/art/index.cfm) is a tool for simulating Illumina sequencing reads. For further reading and documentation see the [ART Illumina manual](https://manpages.debian.org/testing/art-nextgen-simulation-tools/art_illumina.1.en.html). + +### bedtools + +
+Output files + +- `bedtools/` + - `*.fa`: The probe fasta file extracted from the reference fasta file if the input probe file was a bed file. + +
+ +[bedtools](https://pubmed.ncbi.nlm.nih.gov/20110278/) is a suite of tools for genomic data analysis. For further reading and documentation see the [bedtools documentation](https://bedtools.readthedocs.io/en/latest/). + +### Bowtie2 + +
+Output files + +- `bowtie2/` + - `bowtie2/` + - `*.bt2`: Bowtie2 index files. + - `*.bam`: BAM file produced from aligning with Bowtie2. + - `*.bowtie2.log`: Log file containing alignment information. + - `*.bai`: Index file produced with SAMtools. + +
+ +[Bowtie2](https://www.nature.com/articles/nmeth.1923) is a popular tool for aligning sequences to reference reads. For further reading and documentation see the [Bowtie2 manual](https://bowtie-bio.sourceforge.net/bowtie2/manual.shtml). +[SAMtools](https://academic.oup.com/gigascience/article/10/2/giab008/6137722?login=false) is a popular set of tools for working with sequencing data. For further reading and documentation see the [SAMtools documentation](http://www.htslib.org/doc/). + +### CapSim + +
+Output files + +- `capsim_illumina/` + - `*_1.fastq.gz`: Read 1 files simulating Illumina reads. The prefix will be the sample name found in the samplesheet. + - `*_2.fastq.gz`: Read 2 files simulating Illumina reads. The prefix will be the sample name found in the samplesheet. +- `capsim_pacbio/` + - `*_1.fastq.gz`: Read 1 files simulating PacBio reads. The prefix will be the sample name found in the samplesheet. + - `*_1.fastq.gz`: Read 2 files simulating PacBio reads. The prefix will be the sample name found in the samplesheet. + +
+ +[CapSim](https://academic.oup.com/bioinformatics/article/34/5/873/4575140) is a tool to simulate capture sequencing reads. It's part of the [Japsa package](https://japsa.readthedocs.io/en/latest/). For further reading and documentation see the [CapSim documentation](https://japsa.readthedocs.io/en/latest/tools/jsa.sim.capsim.html). + +### CRABS + +
+Output files + +- `crabs_dbimport/` + - `*.fa`: Reference fasta file. +- `crabs_insilicopcr/` + - `*.fa`: Reference fasta file for simulating amplicon data. + +
+ +[CRABS](https://onlinelibrary.wiley.com/doi/10.1111/1755-0998.13741) is a toolfor reformating reference databases for simulating amplicon sequencing data. For further reading and documentation see the [CRABS repo](https://github.com/gjeunen/reference_database_creator). ### FastQC @@ -35,7 +115,34 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d ![MultiQC - FastQC adapter content plot](images/mqc_fastqc_adapter.png) -> **NB:** The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. +:::note +The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. +::: + +### ncbi-genome-download + +
+Output files + +- `ncbigenomedownload/` + - `*.fna.gz`: Reference fasta files downloaded from NCBI + +
+ +[ncbi-genome-download](https://github.com/kblin/ncbi-genome-download) downloads reference genome files from NCBI. + +### InSilicoSeq + +
+Output files + +- `insilicoseq/` + - `*R1.fastq.gz`: Read 1 files simulating Illumina metagenomic reads. The prefix will be the sample name found in the samplesheet. + - `*R2.fastq.gz`: Read 2 files simulating Illumina metagenomic reads. The prefix will be the sample name found in the samplesheet. + +
+ +[InSilicoSeq](https://academic.oup.com/bioinformatics/article/35/3/521/5055123) is a tool for simulating Illumina metagenomic sequencing reads. For further reading and documentation see the [InSilicoSeq documentation](https://insilicoseq.readthedocs.io/en/latest/). ### MultiQC @@ -62,7 +169,44 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. + - Parameters used by the pipeline run: `params.json`. [Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. + +### Samplesheet + +
+Output files + +- `samplesheet/` + - `*.csv`: Samplesheets with all samples. +- `samplesheet_individual_samples/` + - `*.csv`: Samplesheets for each individual sample. + +
+ +### Unzip + +
+Output files + +- `probes/` + - `unziped/` + - `*.fasta`: Probe file downloaded if custom probe hasn't been provided with `--probe_fasta` parameter. + +
+ +### Wgsim + +
+Output files + +- `wgsim/` + - `*R1.fq.gz`: Read 1 files simulating wholegenome reads. The prefix will be the sample name found in the samplesheet. + - `*R2.fq.gz`: Read 2 files simulating wholegenome reads. The prefix will be the sample name found in the samplesheet. + +
+ +[Wgsim](https://github.com/lh3/wgsim) is a tool for simulating wholegenome sequencing reads. For further reading and documentation see the [Wgsim manual](). diff --git a/docs/usage.md b/docs/usage.md index 8596ff1..de660c4 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -6,49 +6,36 @@ ## Introduction - +The pipeline currently supports simulating amplicon, target capture, metagenome, and wholegenome sequencing reads. The different modes can be selected with `--amplicon`, `--target_capture`, `--metagenome`, or `--wholegenome` respectively. ## Samplesheet input -You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. +You will need to create a samplesheet with information about the samples you would like to simulate before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 2 columns, and a header row as shown in the examples below. ```bash --input '[path to samplesheet file]' ``` -### Multiple runs of the same sample - -The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: - -```console -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz -``` - ### Full samplesheet -The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. +The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 2 columns to match those defined in the table below. -A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. +A final samplesheet file may look something like the one below. This is for 6 samples. -```console -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz -CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz -TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz, -TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, +```csv title="samplesheet.csv" +sample,seed +sample_1,1 +sample_2,2 +sample_3,3 +sample_4,4 +sample_5,5 +sample_6,6 ``` -| Column | Description | -| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | -| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| Column | Description | +| -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. This entry will be used as the prefix for many output files. Spaces in sample names are automatically converted to underscores (`_`). | +| `seed` | Seed used with tools that have a seed parameter for Random Number Generation. | An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. @@ -57,7 +44,7 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p The typical command for running the pipeline is as follows: ```bash -nextflow run nf-core/readsimulator --input ./samplesheet.csv --outdir ./results --genome GRCh37 -profile docker +nextflow run nf-core/readsimulator --input ./samplesheet.csv --outdir ./results --genome GRCh37 -profile docker <--amplicon/--target_capture/--metagenome> ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -75,7 +62,9 @@ If you wish to repeatedly use the same parameters for multiple runs, rather than Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. -> ⚠️ Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). +:::warning +Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). +::: The above pipeline run specified with a params file in yaml format: @@ -89,6 +78,7 @@ with `params.yaml` containing: input: './samplesheet.csv' outdir: './results/' genome: 'GRCh37' +target_capture: true <...> ``` @@ -112,11 +102,15 @@ This version number will be logged in reports when you run the pipeline, so that To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. -> 💡 If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. +:::tip +If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. +::: ## Core Nextflow arguments -> **NB:** These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +:::note +These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +::: ### `-profile` @@ -124,7 +118,9 @@ Use this parameter to choose a configuration profile. Profiles can give configur Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. -> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +:::info +We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +::: The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 408951a..e248e4c 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -3,6 +3,8 @@ // import org.yaml.snakeyaml.Yaml +import groovy.json.JsonOutput +import nextflow.extension.FilesEx class NfcoreTemplate { @@ -140,12 +142,14 @@ class NfcoreTemplate { try { if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } [ 'sendmail', '-t' ].execute() << sendmail_html log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" } catch (all) { // Catch failures and try with plaintext def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { + if ( mqc_report != null && mqc_report.size() <= max_multiqc_email_size.toBytes() ) { mail_cmd += [ '-A', mqc_report ] } mail_cmd.execute() << email_html @@ -154,14 +158,16 @@ class NfcoreTemplate { } // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") + FilesEx.copyTo(output_hf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() } // @@ -222,6 +228,20 @@ class NfcoreTemplate { } } + // + // Dump pipeline parameters in a json file + // + public static void dump_parameters(workflow, params) { + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = JsonOutput.toJson(params) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${params.outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() + } + // // Print pipeline summary on completion // diff --git a/lib/NftestUtils.groovy b/lib/NftestUtils.groovy new file mode 100644 index 0000000..ce6fc8b --- /dev/null +++ b/lib/NftestUtils.groovy @@ -0,0 +1,38 @@ +// Helper functions for pipeline tests + +class NftestUtils { + + // Function to remove Nextflow version from software_versions.yml + public static String removeNextflowVersion(outputDir) { + def softwareVersions = path("$outputDir/pipeline_info/software_versions.yml").yaml + if (softwareVersions.containsKey("Workflow")) { + softwareVersions.Workflow.remove("Nextflow") + } + return softwareVersions + } + + // Function to filter lines from a file and return a new file + public static File filterLines(String inFilePath, int linesToSkip) { + if (linesToSkip >= 0) { + File inputFile = new File(inFilePath) + File outputFile = new File(inFilePath + ".filtered") + def lineCount = 0 + inputFile.eachLine { line -> + lineCount++ + if (lineCount > linesToSkip) { + outputFile.append(line + '\n') + } + } + return outputFile + } else { + File inputFile = new File(inFilePath) + File outputFile = new File(inFilePath + ".filtered") + def lines = inputFile.readLines() + def totalLines = lines.size() + lines.take(totalLines + linesToSkip).each { line -> + outputFile.append(line + '\n') + } + return outputFile + } + } +} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 51e9f71..477ddaf 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -24,7 +24,7 @@ class WorkflowMain { // // Validate parameters and print summary to screen // - public static void initialise(workflow, params, log) { + public static void initialise(workflow, params, log, args) { // Print workflow version and exit on --version if (params.version) { @@ -35,6 +35,8 @@ class WorkflowMain { // Check that a -profile or Nextflow config has been provided to run the pipeline NfcoreTemplate.checkConfigProvided(workflow, log) + // Check that the profile doesn't contain spaces and doesn't end with a trailing comma + checkProfile(workflow.profile, args, log) // Check that conda channels are set-up correctly if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { @@ -60,4 +62,16 @@ class WorkflowMain { } return null } + + // + // Exit pipeline if --profile contains spaces + // + private static void checkProfile(profile, args, log) { + if (profile.endsWith(',')) { + Nextflow.error "Profile cannot end with a trailing comma. Please remove the comma from the end of the profile string.\nHint: A common mistake is to provide multiple values to `-profile` separated by spaces. Please use commas to separate profiles instead,e.g., `-profile docker,test`." + } + if (args[0]) { + log.warn "nf-core pipelines do not accept positional arguments. The positional argument `${args[0]}` has been detected.\n Hint: A common mistake is to provide multiple values to `-profile` separated by spaces. Please use commas to separate profiles instead,e.g., `-profile docker,test`." + } + } } diff --git a/lib/WorkflowReadsimulator.groovy b/lib/WorkflowReadsimulator.groovy index c7cc20d..c0b9df0 100755 --- a/lib/WorkflowReadsimulator.groovy +++ b/lib/WorkflowReadsimulator.groovy @@ -14,9 +14,8 @@ class WorkflowReadsimulator { genomeExistsError(params, log) - - if (!params.fasta) { - Nextflow.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." + if (!params.fasta && !params.ncbidownload_accessions && !params.ncbidownload_taxids) { + Nextflow.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file. Alternatively, '--ncbidownload_accessions accessions.txt' or '--ncbidownload_taxids taxids.txt' can be used." } } @@ -52,12 +51,19 @@ class WorkflowReadsimulator { // public static String toolCitationText(params) { - - // TODO Optionally add in-text citation tools to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report def citation_text = [ "Tools used in the workflow included:", + params["amplicon"] ? [ + "CRABS (Jeunen et al. 2022),", + "ART (Huang et al. 2012)," + ].join(' ').trim() : "", + params["target_capture"] ? [ + "Bowtie2 (Langmead et al. 2012),", + "Samtools (Danecek et al. 2021),", + "CapSim (Cao et al. 2018)," + ].join(' ').trim() : "", + params["metagenome"] ? + "InSilicoSeq (Gourlé et al. 2018)," : "", "FastQC (Andrews 2010),", "MultiQC (Ewels et al. 2016)", "." @@ -67,11 +73,18 @@ class WorkflowReadsimulator { } public static String toolBibliographyText(params) { - - // TODO Optionally add bibliographic entries to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report def reference_text = [ + params["amplicon"] ? [ + "
  • Jeunen, G.-J., Dowle, E., Edgecombe, J., von Ammon, U., Gemmell, N. J., & Cross, H. (2022). crabs—A software program to generate curated reference databases for metabarcoding sequencing data. Molecular Ecology Resources, 00, 1– 14. https://doi.org/10.1111/1755-0998.13741
  • ", + "
  • Weichun Huang, Leping Li, Jason R. Myers, Gabor T. Marth, ART: a next-generation sequencing read simulator, Bioinformatics, Volume 28, Issue 4, February 2012, Pages 593–594, https://doi.org/10.1093/bioinformatics/btr708
  • " + ].join(' ').trim() : "", + params["target_capture"] ? [ + "
  • Langmead, B., Salzberg, S. Fast gapped-read alignment with Bowtie 2. Nat Methods 9, 357–359 (2012). https://doi.org/10.1038/nmeth.1923
  • ", + "
  • Twelve years of SAMtools and BCFtools. Petr Danecek, James K Bonfield, Jennifer Liddle, John Marshall, Valeriu Ohan, Martin O Pollard, Andrew Whitwham, Thomas Keane, Shane A McCarthy, Robert M Davies, Heng Li. GigaScience, Volume 10, Issue 2, February 2021, giab008, https://doi.org/10.1093/gigascience/giab008
  • ", + "
  • Minh Duc Cao, Devika Ganesamoorthy, Chenxi Zhou, Lachlan J M Coin, Simulating the dynamics of targeted capture sequencing with CapSim, Bioinformatics, Volume 34, Issue 5, March 2018, Pages 873–874, https://doi.org/10.1093/bioinformatics/btx691
  • " + ].join(' ').trim() : "", + params["metagenome"] ? + "
  • Gourlé H, Karlsson-Lindsjö O, Hayer J and Bongcam+Rudloff E, Simulating Illumina data with InSilicoSeq. Bioinformatics (2018) doi:10.1093/bioinformatics/bty630
  • " : "", "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " ].join(' ').trim() @@ -90,12 +103,8 @@ class WorkflowReadsimulator { meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " // Tool references - meta["tool_citations"] = "" - meta["tool_bibliography"] = "" - - // TODO Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! - //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") - //meta["tool_bibliography"] = toolBibliographyText(params) + meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + meta["tool_bibliography"] = toolBibliographyText(params) def methods_text = mqc_methods_yaml.text diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar deleted file mode 100644 index 805c8bb..0000000 Binary files a/lib/nfcore_external_java_deps.jar and /dev/null differ diff --git a/main.nf b/main.nf index f050787..7e94dc3 100644 --- a/main.nf +++ b/main.nf @@ -17,7 +17,9 @@ nextflow.enable.dsl = 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +if (!params.fasta) { + params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +} /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -25,7 +27,7 @@ params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { validateParameters; paramsHelp } from 'plugin/nf-validation' +include { validateParameters; paramsHelp; fromSamplesheet } from 'plugin/nf-validation' // Print help message if needed if (params.help) { @@ -41,7 +43,7 @@ if (params.validate_params) { validateParameters() } -WorkflowMain.initialise(workflow, params, log) +WorkflowMain.initialise(workflow, params, log, args) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -49,13 +51,16 @@ WorkflowMain.initialise(workflow, params, log) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { READSIMULATOR } from './workflows/readsimulator' +include { READSIMULATOR } from './workflows/readsimulator/main' // // WORKFLOW: Run main nf-core/readsimulator analysis pipeline // workflow NFCORE_READSIMULATOR { - READSIMULATOR () + + ch_input = Channel.fromSamplesheet("input") + + READSIMULATOR ( ch_input ) } /* diff --git a/modules.json b/modules.json index 039aea8..d781338 100644 --- a/modules.json +++ b/modules.json @@ -5,19 +5,50 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "art/illumina": { + "branch": "master", + "git_sha": "240937a2a9c30298110753292be041188891f2cb", + "installed_by": ["modules"], + "patch": "modules/nf-core/art/illumina/art-illumina.diff" + }, + "bedtools/getfasta": { + "branch": "master", + "git_sha": "3b248b84694d1939ac4bb33df84bf6233a34d668", + "installed_by": ["modules"] + }, + "bowtie2/align": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "bowtie2/build": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "76cc4938c1f6ea5c7d83fed1eeffc146787f9543", + "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93", "installed_by": ["modules"] }, "fastqc": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "f4ae1d942bd50c5c0b9bd2de1393ce38315ba57c", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "f2d63bd5b68925f98f572eed70993d205cc694b7", + "git_sha": "9e71d8519dfbfc328c078bba14d4bd4c99e39a94", + "installed_by": ["modules"] + }, + "ncbigenomedownload": { + "branch": "master", + "git_sha": "4be1afba55bca38b29b44767b699ca7996f26d4d", + "installed_by": ["modules"] + }, + "samtools/index": { + "branch": "master", + "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", "installed_by": ["modules"] } } diff --git a/modules/local/crabs/dbimport/environment.yml b/modules/local/crabs/dbimport/environment.yml new file mode 100644 index 0000000..b4f8928 --- /dev/null +++ b/modules/local/crabs/dbimport/environment.yml @@ -0,0 +1,7 @@ +name: crabs_dbimport +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::crabs=0.1.1-0 diff --git a/modules/local/crabs/dbimport/main.nf b/modules/local/crabs/dbimport/main.nf new file mode 100644 index 0000000..e67a13b --- /dev/null +++ b/modules/local/crabs/dbimport/main.nf @@ -0,0 +1,44 @@ +process CRABS_DBIMPORT { + tag "$meta.id" + label 'process_medium' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/crabs:0.1.1--pyhb7b1952_0': + 'biocontainers/crabs:0.1.1--pyhb7b1952_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.fa"), emit: fasta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '0.1.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def is_compressed = fasta.name.endsWith(".gz") + def fasta_name = fasta.name.replace(".gz", "") + """ + if [ "${is_compressed}" == "true" ]; then + gzip -c -d ${fasta} > ${fasta_name} + fi + + crabs db_import \\ + --input ${fasta_name} \\ + --output ${prefix}.crabsdb.fa \\ + $args + + rm ${fasta_name} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + crabs: $VERSION + END_VERSIONS + """ +} diff --git a/modules/local/crabs/insilicopcr/environment.yml b/modules/local/crabs/insilicopcr/environment.yml new file mode 100644 index 0000000..d90cc41 --- /dev/null +++ b/modules/local/crabs/insilicopcr/environment.yml @@ -0,0 +1,7 @@ +name: crabs_insilicopcr +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::crabs=0.1.1-0 diff --git a/modules/local/crabs/insilicopcr/main.nf b/modules/local/crabs/insilicopcr/main.nf new file mode 100644 index 0000000..de4c452 --- /dev/null +++ b/modules/local/crabs/insilicopcr/main.nf @@ -0,0 +1,36 @@ +process CRABS_INSILICOPCR { + tag "$meta.id" + label 'process_medium' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/crabs:0.1.1--pyhb7b1952_0': + 'biocontainers/crabs:0.1.1--pyhb7b1952_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.fa"), emit: fasta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '0.1.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + crabs insilico_pcr \\ + --input $fasta \\ + --output ${prefix}.crabs.fa \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + crabs: $VERSION + END_VERSIONS + """ +} diff --git a/modules/local/custom/create_samplesheet/main.nf b/modules/local/custom/create_samplesheet/main.nf new file mode 100644 index 0000000..efbb002 --- /dev/null +++ b/modules/local/custom/create_samplesheet/main.nf @@ -0,0 +1,32 @@ +process CREATE_SAMPLESHEET { + tag "$meta.id" + + input: + tuple val(meta), path(fastq) + + output: + tuple val(meta), path("*.csv"), emit: samplesheet + + exec: + def fastq_1 = "${params.outdir}/${meta.outdir}/${fastq}" + def fastq_2 = '' + if (fastq instanceof List && fastq.size() == 2) { + fastq_1 = "${params.outdir}/${meta.outdir}/${fastq[0]}" + fastq_2 = "${params.outdir}/${meta.outdir}/${fastq[1]}" + } + + // Add relevant fields to the beginning of the map + pipeline_map = [ + sample : "${meta.id}", + fastq_1 : fastq_1, + fastq_2 : fastq_2 + ] + + // Create a samplesheet + samplesheet = pipeline_map.keySet().collect{ '"' + it + '"'}.join(",") + '\n' + samplesheet += pipeline_map.values().collect{ '"' + it + '"'}.join(",") + + // Write samplesheet to file + def samplesheet_file = task.workDir.resolve("${meta.datatype}_${meta.id}.samplesheet.csv") + samplesheet_file.text = samplesheet +} diff --git a/modules/local/custom/merge_fastas/environment.yml b/modules/local/custom/merge_fastas/environment.yml new file mode 100644 index 0000000..801016a --- /dev/null +++ b/modules/local/custom/merge_fastas/environment.yml @@ -0,0 +1,7 @@ +name: merge_fastas +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::sed=4.7 diff --git a/modules/local/custom/merge_fastas/main.nf b/modules/local/custom/merge_fastas/main.nf new file mode 100644 index 0000000..dd77416 --- /dev/null +++ b/modules/local/custom/merge_fastas/main.nf @@ -0,0 +1,31 @@ +process MERGE_FASTAS { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.fa.gz"), emit: fasta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + cat ${args} *.gz > ${prefix}.fa.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(cat --version | head -n 1 | sed 's/cat (GNU coreutils) //g') + END_VERSIONS + """ +} diff --git a/modules/local/custom/merge_samplesheets/environment.yml b/modules/local/custom/merge_samplesheets/environment.yml new file mode 100644 index 0000000..c7eedba --- /dev/null +++ b/modules/local/custom/merge_samplesheets/environment.yml @@ -0,0 +1,7 @@ +name: merge_samplesheet +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::sed=4.7 diff --git a/modules/local/custom/merge_samplesheets/main.nf b/modules/local/custom/merge_samplesheets/main.nf new file mode 100644 index 0000000..d7ae1d9 --- /dev/null +++ b/modules/local/custom/merge_samplesheets/main.nf @@ -0,0 +1,29 @@ +process MERGE_SAMPLESHEETS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(samplesheet) + + output: + tuple val(meta), path("*.csv"), emit: samplesheet + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo \\"sample\\",\\"fastq_1\\",\\"fastq_2\\" > "${prefix}_samplesheet.csv" + for curr_sheet in $samplesheet; do + tail -n +2 "\$curr_sheet" >> "${prefix}_samplesheet.csv" + echo >> "${prefix}_samplesheet.csv" + done + """ +} diff --git a/modules/local/insilicoseq/generate/environment.yml b/modules/local/insilicoseq/generate/environment.yml new file mode 100644 index 0000000..e706c95 --- /dev/null +++ b/modules/local/insilicoseq/generate/environment.yml @@ -0,0 +1,7 @@ +name: insilicoseq_generate +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::insilicoseq=1.6.0 diff --git a/modules/local/insilicoseq/generate/main.nf b/modules/local/insilicoseq/generate/main.nf new file mode 100644 index 0000000..dbb1b41 --- /dev/null +++ b/modules/local/insilicoseq/generate/main.nf @@ -0,0 +1,70 @@ +process INSILICOSEQ_GENERATE { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/insilicoseq:1.6.0--pyh7cba7a3_0': + 'biocontainers/insilicoseq:1.6.0--pyh7cba7a3_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.fastq.gz*"), emit: fastq + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def seed = task.ext.seed ?: "${meta.seed}" + if (fasta) { + def is_compressed = fasta.name.endsWith(".gz") + def fasta_name = fasta.name.replace(".gz", "") + """ + seed=\$(echo $seed | sed 's/\\[//g' | sed 's/\\]//g') + prefix=\$(echo $prefix | sed 's/\\[//g' | sed 's/\\]//g') + + if [ "${is_compressed}" == "true" ]; then + gzip -c -d ${fasta} > ${fasta_name} + fi + + iss generate \\ + --genomes ${fasta_name} \\ + --seed \$seed \\ + --output \$prefix \\ + --compress \\ + --cpus $task.cpus \\ + $args + + rm ${fasta_name} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + insilicoseq: \$(iss --version | sed 's/iss version //g') + END_VERSIONS + """ + } else { + """ + seed=\$(echo $seed | sed 's/\\[//g' | sed 's/\\]//g') + prefix=\$(echo $prefix | sed 's/\\[//g' | sed 's/\\]//g') + + iss generate \\ + --ncbi $args2 \\ + --seed \$seed \\ + --output \$prefix \\ + --compress \\ + --cpus $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + insilicoseq: \$(iss --version | sed 's/iss version //g') + END_VERSIONS + """ + } +} diff --git a/modules/local/japsa/capsim/main.nf b/modules/local/japsa/capsim/main.nf new file mode 100644 index 0000000..4d1deea --- /dev/null +++ b/modules/local/japsa/capsim/main.nf @@ -0,0 +1,38 @@ +process JAPSA_CAPSIM { + tag "$meta.id" + label 'process_single' + + container "nf-core/japsa:0" + + input: + tuple val(meta), path(fasta), path(probes), path(index) + + output: + tuple val(meta), path("*.fastq.gz*"), emit: fastq + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "CAPSIM does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def seed = task.ext.seed ?: "${meta.seed}" + """ + jsa.sim.capsim \\ + --reference ${fasta} \\ + --probe ${probes} \\ + --ID ${prefix} \\ + --seed ${seed} \\ + ${args} ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + capsim: \$(jsa | tail -n +2 | head -n 1 | awk -F, '{print \$1}') | sed 's/Version //g' + END_VERSIONS + """ +} diff --git a/modules/local/uncompress_fasta/environment.yml b/modules/local/uncompress_fasta/environment.yml new file mode 100644 index 0000000..334d0a3 --- /dev/null +++ b/modules/local/uncompress_fasta/environment.yml @@ -0,0 +1,7 @@ +name: gzip +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::gzip=1.11 diff --git a/modules/local/uncompress_fasta/main.nf b/modules/local/uncompress_fasta/main.nf new file mode 100644 index 0000000..56edf1a --- /dev/null +++ b/modules/local/uncompress_fasta/main.nf @@ -0,0 +1,33 @@ +process UNCOMPRESS_FASTA { + tag "$file" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gzip:1.11': + 'biocontainers/gzip:1.11' }" + + input: + path(fasta) + + output: + path "${fasta.name.replace('.gz', '')}", emit: fasta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def is_compressed = fasta.name.endsWith(".gz") + def fasta_name = fasta.name.replace(".gz", "") + """ + if [ "${is_compressed}" == "true" ]; then + gzip -c -d ${fasta} > ${fasta_name} + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gzip: \$(gzip -V | head -n 1 | sed 's/gzip //g') + END_VERSIONS + """ +} diff --git a/modules/local/unzip/environment.yml b/modules/local/unzip/environment.yml new file mode 100644 index 0000000..0c6171c --- /dev/null +++ b/modules/local/unzip/environment.yml @@ -0,0 +1,7 @@ +name: unzip +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::unzip=6.0 diff --git a/modules/local/unzip/main.nf b/modules/local/unzip/main.nf new file mode 100644 index 0000000..0523e5e --- /dev/null +++ b/modules/local/unzip/main.nf @@ -0,0 +1,30 @@ +process UNZIP { + tag "$file" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/unzip:6.0': + 'biocontainers/unzip:6.0' }" + + input: + path(file) + + output: + path "unziped/*" , emit: file + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + mkdir unziped + unzip ${file} -d unziped + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + unzip: \$(unzip -v | head -n 1 | sed 's/UnZip //g' | cut -d ' ' -f1) + END_VERSIONS + """ +} diff --git a/modules/local/wgsim/environment.yml b/modules/local/wgsim/environment.yml new file mode 100644 index 0000000..0f45674 --- /dev/null +++ b/modules/local/wgsim/environment.yml @@ -0,0 +1,7 @@ +name: wgsim +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::wgsim=1.0 diff --git a/modules/local/wgsim/main.nf b/modules/local/wgsim/main.nf new file mode 100644 index 0000000..6671598 --- /dev/null +++ b/modules/local/wgsim/main.nf @@ -0,0 +1,41 @@ +process WGSIM { + tag "$meta.id" + label 'process_single' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/wgsim:1.0--he4a0461_7': + 'biocontainers/wgsim:1.0--he4a0461_7' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.fq.gz*"), emit: fastq + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def seed = task.ext.seed ?: "${meta.seed}" + def VERSION = '1.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + wgsim \\ + $args \\ + -S $seed \\ + $fasta \\ + ${prefix}_R1.fq \\ + ${prefix}_R2.fq + + gzip ${prefix}_R1.fq ${prefix}_R2.fq + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + wgsim: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/art/illumina/art-illumina.diff b/modules/nf-core/art/illumina/art-illumina.diff new file mode 100644 index 0000000..0f8e263 --- /dev/null +++ b/modules/nf-core/art/illumina/art-illumina.diff @@ -0,0 +1,37 @@ +Changes in module 'nf-core/art/illumina' +--- modules/nf-core/art/illumina/main.nf ++++ modules/nf-core/art/illumina/main.nf +@@ -12,7 +12,6 @@ + input: + tuple val(meta), path(fasta) + val(sequencing_system) +- val(fold_coverage) + val(read_length) + + output: +@@ -28,13 +27,14 @@ + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" ++ def seed = task.ext.prefix ?: "${meta.seed}" + def VERSION = '2016.06.05' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + art_illumina \\ + -ss $sequencing_system \\ + -i $fasta \\ + -l $read_length \\ +- -f $fold_coverage \\ ++ -rs $seed \\ + -o $prefix \\ + $args + + +--- modules/nf-core/art/illumina/meta.yml ++++ modules/nf-core/art/illumina/meta.yml +@@ -56,4 +56,4 @@ + pattern: "*.sam" + + authors: +- - "@MarieLataretu" ++ - "@MarieLataretu" +************************************************************ diff --git a/modules/nf-core/art/illumina/main.nf b/modules/nf-core/art/illumina/main.nf new file mode 100644 index 0000000..0d7978b --- /dev/null +++ b/modules/nf-core/art/illumina/main.nf @@ -0,0 +1,71 @@ +process ART_ILLUMINA { + + tag "$meta.id" + label 'process_single' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda "bioconda::art=2016.06.05" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/art:2016.06.05--h589041f_9': + 'biocontainers/art:2016.06.05--h589041f_9' }" + + input: + tuple val(meta), path(fasta) + val(sequencing_system) + val(read_length) + + output: + tuple val(meta), path("*.fq.gz"), emit: fastq + tuple val(meta), path("*.aln"), optional:true , emit: aln + tuple val(meta), path("*.sam"), optional:true , emit: sam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def seed = task.ext.prefix ?: "${meta.seed}" + def VERSION = '2016.06.05' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + art_illumina \\ + -ss $sequencing_system \\ + -i $fasta \\ + -l $read_length \\ + -rs $seed \\ + -o $prefix \\ + $args + + gzip \\ + --no-name \\ + $args2 \\ + $prefix*.fq + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + art: $VERSION + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '2016.06.05' + """ + touch ${prefix}.fq.gz + touch ${prefix}1.fq.gz + touch ${prefix}2.fq.gz + touch ${prefix}.aln + touch ${prefix}1.aln + touch ${prefix}2.aln + touch ${prefix}.sam + touch ${prefix}1.sam + touch ${prefix}2.sam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + art: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/art/illumina/meta.yml b/modules/nf-core/art/illumina/meta.yml new file mode 100644 index 0000000..ed9960b --- /dev/null +++ b/modules/nf-core/art/illumina/meta.yml @@ -0,0 +1,56 @@ +name: "art_illumina" +description: Simulation tool to generate synthetic Illumina next-generation sequencing reads +keywords: + - fastq + - fasta + - illumina + - simulate +tools: + - "art": + description: "ART is a set of simulation tools to generate synthetic next-generation sequencing reads. ART simulates sequencing reads by mimicking real sequencing process with empirical error models or quality profiles summarized from large recalibrated sequencing data. ART can also simulate reads using user own read error model or quality profiles. " + homepage: "https://www.niehs.nih.gov/research/resources/software/biostatistics/art/index.cfm" + doi: "10.1093/bioinformatics/btr708" + licence: "GPL version 3 license" +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - fasta: + type: file + description: FASTA file of input DNA/RNA reference + pattern: "*.{fasta,fa}" + - sequencing_system: + type: string + description: The name of Illumina sequencing system of the built-in profile used for simulation + - fold_coverage: + type: integer + description: The fold of read coverage to be simulated or number of reads/read pairs generated for each amplicon + - read_length: + type: integer + description: The length of reads to be simulated +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fastq: + type: file + description: Simulated reads + pattern: "*.fq.gz" + - aln: + type: file + description: OPTIONAL Alignment file of the simulated reads. Enabled by default, to disable, use -na/--noALN. + pattern: "*.aln" + - sam: + type: file + description: OPTIONAL Alignment file in SAM format of the simulated reads. Enabled with -sam/--samout. + pattern: "*.sam" +authors: + - "@MarieLataretu" diff --git a/modules/nf-core/bedtools/getfasta/environment.yml b/modules/nf-core/bedtools/getfasta/environment.yml new file mode 100644 index 0000000..a89401f --- /dev/null +++ b/modules/nf-core/bedtools/getfasta/environment.yml @@ -0,0 +1,7 @@ +name: bedtools_getfasta +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bedtools=2.31.1 diff --git a/modules/nf-core/bedtools/getfasta/main.nf b/modules/nf-core/bedtools/getfasta/main.nf new file mode 100644 index 0000000..50549c7 --- /dev/null +++ b/modules/nf-core/bedtools/getfasta/main.nf @@ -0,0 +1,37 @@ +process BEDTOOLS_GETFASTA { + tag "$bed" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : + 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" + + input: + path bed + path fasta + + output: + path "*.fa" , emit: fasta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${bed.baseName}" + """ + bedtools \\ + getfasta \\ + $args \\ + -fi $fasta \\ + -bed $bed \\ + -fo ${prefix}.fa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/bedtools/getfasta/meta.yml b/modules/nf-core/bedtools/getfasta/meta.yml new file mode 100644 index 0000000..25fb19a --- /dev/null +++ b/modules/nf-core/bedtools/getfasta/meta.yml @@ -0,0 +1,36 @@ +name: bedtools_getfasta +description: extract sequences in a FASTA file based on intervals defined in a feature file. +keywords: + - bed + - fasta + - getfasta +tools: + - bedtools: + description: | + A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/intersect.html + licence: ["MIT"] +input: + - bed: + type: file + description: Bed feature file + pattern: "*.{bed}" + - fasta: + type: file + description: Input fasta file + pattern: "*.{fa,fasta}" +output: + - fasta: + type: file + description: Output fasta file with extracted sequences + pattern: "*.{fa}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bowtie2/align/environment.yml b/modules/nf-core/bowtie2/align/environment.yml new file mode 100644 index 0000000..afc3ea8 --- /dev/null +++ b/modules/nf-core/bowtie2/align/environment.yml @@ -0,0 +1,9 @@ +name: bowtie2_align +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bowtie2=2.4.4 + - bioconda::samtools=1.16.1 + - conda-forge::pigz=2.6 diff --git a/modules/nf-core/bowtie2/align/main.nf b/modules/nf-core/bowtie2/align/main.nf new file mode 100644 index 0000000..e67f6a0 --- /dev/null +++ b/modules/nf-core/bowtie2/align/main.nf @@ -0,0 +1,94 @@ +process BOWTIE2_ALIGN { + tag "$meta.id" + label "process_high" + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0' : + 'biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0' }" + + input: + tuple val(meta) , path(reads) + tuple val(meta2), path(index) + val save_unaligned + val sort_bam + + output: + tuple val(meta), path("*.{bam,sam}"), emit: aligned + tuple val(meta), path("*.log") , emit: log + tuple val(meta), path("*fastq.gz") , emit: fastq, optional:true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: "" + def args2 = task.ext.args2 ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" + + def unaligned = "" + def reads_args = "" + if (meta.single_end) { + unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : "" + reads_args = "-U ${reads}" + } else { + unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : "" + reads_args = "-1 ${reads[0]} -2 ${reads[1]}" + } + + def samtools_command = sort_bam ? 'sort' : 'view' + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension = (args2 ==~ extension_pattern) ? (args2 =~ extension_pattern)[0][2].toLowerCase() : "bam" + + """ + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/\\.rev.1.bt2\$//"` + [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/\\.rev.1.bt2l\$//"` + [ -z "\$INDEX" ] && echo "Bowtie2 index files not found" 1>&2 && exit 1 + + bowtie2 \\ + -x \$INDEX \\ + $reads_args \\ + --threads $task.cpus \\ + $unaligned \\ + $args \\ + 2> ${prefix}.bowtie2.log \\ + | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.${extension} - + + if [ -f ${prefix}.unmapped.fastq.1.gz ]; then + mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz + fi + + if [ -f ${prefix}.unmapped.fastq.2.gz ]; then + mv ${prefix}.unmapped.fastq.2.gz ${prefix}.unmapped_2.fastq.gz + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + + stub: + def args2 = task.ext.args2 ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension = (args2 ==~ extension_pattern) ? (args2 =~ extension_pattern)[0][2].toLowerCase() : "bam" + + """ + touch ${prefix}.${extension} + touch ${prefix}.bowtie2.log + touch ${prefix}.unmapped_1.fastq.gz + touch ${prefix}.unmapped_2.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + +} diff --git a/modules/nf-core/bowtie2/align/meta.yml b/modules/nf-core/bowtie2/align/meta.yml new file mode 100644 index 0000000..e66811d --- /dev/null +++ b/modules/nf-core/bowtie2/align/meta.yml @@ -0,0 +1,70 @@ +name: bowtie2_align +description: Align reads to a reference genome using bowtie2 +keywords: + - align + - map + - fasta + - fastq + - genome + - reference +tools: + - bowtie2: + description: | + Bowtie 2 is an ultrafast and memory-efficient tool for aligning + sequencing reads to long reference sequences. + homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml + documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml + doi: 10.1038/nmeth.1923 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: Bowtie2 genome index files + pattern: "*.ebwt" + - save_unaligned: + type: boolean + description: | + Save reads that do not map to the reference (true) or discard them (false) + (default: false) + - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" +output: + - aligned: + type: file + description: Output BAM/SAM file containing read alignments + pattern: "*.{bam,sam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fastq: + type: file + description: Unaligned FastQ files + pattern: "*.fastq.gz" + - log: + type: file + description: Aligment log + pattern: "*.log" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bowtie2/build/environment.yml b/modules/nf-core/bowtie2/build/environment.yml new file mode 100644 index 0000000..17af3e5 --- /dev/null +++ b/modules/nf-core/bowtie2/build/environment.yml @@ -0,0 +1,7 @@ +name: bowtie2_build +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bowtie2=2.4.5 diff --git a/modules/nf-core/bowtie2/build/main.nf b/modules/nf-core/bowtie2/build/main.nf new file mode 100644 index 0000000..3a90971 --- /dev/null +++ b/modules/nf-core/bowtie2/build/main.nf @@ -0,0 +1,42 @@ +process BOWTIE2_BUILD { + tag "$fasta" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bowtie2:2.4.5--py39hbb4e92a_0' : + 'biocontainers/bowtie2:2.4.5--py39hbb4e92a_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path('bowtie2') , emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + mkdir bowtie2 + bowtie2-build $args --threads $task.cpus $fasta bowtie2/${fasta.baseName} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + mkdir bowtie2 + touch bowtie2/${fasta.baseName}.{1..4}.bt2 + touch bowtie2/${fasta.baseName}.rev.{1,2}.bt2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bowtie2/build/meta.yml b/modules/nf-core/bowtie2/build/meta.yml new file mode 100644 index 0000000..2d68799 --- /dev/null +++ b/modules/nf-core/bowtie2/build/meta.yml @@ -0,0 +1,46 @@ +name: bowtie2_build +description: Builds bowtie index for reference genome +keywords: + - build + - index + - fasta + - genome + - reference +tools: + - bowtie2: + description: | + Bowtie 2 is an ultrafast and memory-efficient tool for aligning + sequencing reads to long reference sequences. + homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml + documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml + doi: 10.1038/nmeth.1923 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input genome fasta file +output: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: Bowtie2 genome index files + pattern: "*.bt2" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml new file mode 100644 index 0000000..9b3272b --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml @@ -0,0 +1,7 @@ +name: custom_dumpsoftwareversions +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.19 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index 800a609..f218761 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.14" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' : + 'biocontainers/multiqc:1.19--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml index c32657d..5f15a5f 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -1,4 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: custom_dumpsoftwareversions description: Custom module used to dump software versions within the nf-core pipeline template keywords: @@ -16,7 +16,6 @@ input: type: file description: YML file containing software versions pattern: "*.yml" - output: - yml: type: file @@ -30,7 +29,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@drpatelh" - "@grst" +maintainers: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py index e55b8d4..da03340 100755 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -4,11 +4,10 @@ """Provide functions to merge multiple versions.yml files.""" +import yaml import platform from textwrap import dedent -import yaml - def _make_versions_html(versions): """Generate a tabular HTML output of all versions for MultiQC.""" diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test new file mode 100644 index 0000000..b1e1630 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test @@ -0,0 +1,43 @@ +nextflow_process { + + name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS" + script "../main.nf" + process "CUSTOM_DUMPSOFTWAREVERSIONS" + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "dumpsoftwareversions" + tag "custom/dumpsoftwareversions" + + test("Should run without failures") { + when { + process { + """ + def tool1_version = ''' + TOOL1: + tool1: 0.11.9 + '''.stripIndent() + + def tool2_version = ''' + TOOL2: + tool2: 1.9 + '''.stripIndent() + + input[0] = Channel.of(tool1_version, tool2_version).collectFile() + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + file(process.out.mqc_yml[0]).readLines()[0..10], + file(process.out.yml[0]).readLines()[0..7] + ).match() + } + ) + } + } +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap new file mode 100644 index 0000000..5f59a93 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap @@ -0,0 +1,33 @@ +{ + "Should run without failures": { + "content": [ + [ + "versions.yml:md5,76d454d92244589d32455833f7c1ba6d" + ], + [ + "data: \"\\n\\n \\n \\n \\n \\n \\n \\n \\n\\", + " \\n\\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n \\n \\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n\\n\\n \\n\\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\" + ], + [ + "CUSTOM_DUMPSOFTWAREVERSIONS:", + " python: 3.11.7", + " yaml: 5.4.1", + "TOOL1:", + " tool1: 0.11.9", + "TOOL2:", + " tool2: '1.9'", + "Workflow:" + ] + ], + "timestamp": "2024-01-09T23:01:18.710682" + } +} \ No newline at end of file diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml new file mode 100644 index 0000000..405aa24 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml @@ -0,0 +1,2 @@ +custom/dumpsoftwareversions: + - modules/nf-core/custom/dumpsoftwareversions/** diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml new file mode 100644 index 0000000..1787b38 --- /dev/null +++ b/modules/nf-core/fastqc/environment.yml @@ -0,0 +1,7 @@ +name: fastqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fastqc=0.12.1 diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 9ae5838..9e19a74 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -2,10 +2,10 @@ process FASTQC { tag "$meta.id" label 'process_medium' - conda "bioconda::fastqc=0.11.9" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'quay.io/biocontainers/fastqc:0.11.9--0' }" + 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : + 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" input: tuple val(meta), path(reads) @@ -29,11 +29,15 @@ process FASTQC { printf "%s %s\\n" $rename_to | while read old_name new_name; do [ -f "\${new_name}" ] || ln -s \$old_name \$new_name done - fastqc $args --threads $task.cpus $renamed_files + + fastqc \\ + $args \\ + --threads $task.cpus \\ + $renamed_files cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ @@ -45,7 +49,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ } diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml index 4da5bb5..ee5507e 100644 --- a/modules/nf-core/fastqc/meta.yml +++ b/modules/nf-core/fastqc/meta.yml @@ -50,3 +50,8 @@ authors: - "@grst" - "@ewels" - "@FelixKrueger" +maintainers: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test new file mode 100644 index 0000000..70edae4 --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -0,0 +1,212 @@ +nextflow_process { + + name "Test Process FASTQC" + script "../main.nf" + process "FASTQC" + + tag "modules" + tag "modules_nfcore" + tag "fastqc" + + test("sarscov2 single-end [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id: 'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. + // looks like this:
    Mon 2 Oct 2023
    test.gz
    + // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_single") } + ) + } + } + + test("sarscov2 paired-end [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("") }, + { assert path(process.out.html[0][1][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_paired") } + ) + } + } + + test("sarscov2 interleaved [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_interleaved") } + ) + } + } + + test("sarscov2 paired-end [bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_bam") } + ) + } + } + + test("sarscov2 multiple [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, + { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, + { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("") }, + { assert path(process.out.html[0][1][1]).text.contains("") }, + { assert path(process.out.html[0][1][2]).text.contains("") }, + { assert path(process.out.html[0][1][3]).text.contains("") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_multiple") } + ) + } + } + + test("sarscov2 custom_prefix") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'mysample', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_custom_prefix") } + ) + } + } + + test("sarscov2 single-end [fastq] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id: 'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.html.collect { file(it[1]).getName() } + + process.out.zip.collect { file(it[1]).getName() } + + process.out.versions ).match("fastqc_stub") } + ) + } + } + +} diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap new file mode 100644 index 0000000..86f7c31 --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -0,0 +1,88 @@ +{ + "fastqc_versions_interleaved": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:07.293713" + }, + "fastqc_stub": { + "content": [ + [ + "test.html", + "test.zip", + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:31:01.425198" + }, + "fastqc_versions_multiple": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:55.797907" + }, + "fastqc_versions_bam": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:26.795862" + }, + "fastqc_versions_single": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:39:27.043675" + }, + "fastqc_versions_paired": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:39:47.584191" + }, + "fastqc_versions_custom_prefix": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:41:14.576531" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastqc/tests/tags.yml b/modules/nf-core/fastqc/tests/tags.yml new file mode 100644 index 0000000..7834294 --- /dev/null +++ b/modules/nf-core/fastqc/tests/tags.yml @@ -0,0 +1,2 @@ +fastqc: + - modules/nf-core/fastqc/** diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml new file mode 100644 index 0000000..7625b75 --- /dev/null +++ b/modules/nf-core/multiqc/environment.yml @@ -0,0 +1,7 @@ +name: multiqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.19 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 4b60474..1b9f7c4 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_single' - conda "bioconda::multiqc=1.14" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' : + 'biocontainers/multiqc:1.19--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" @@ -25,12 +25,14 @@ process MULTIQC { def args = task.ext.args ?: '' def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' """ multiqc \\ --force \\ $args \\ $config \\ $extra_config \\ + $logo \\ . cat <<-END_VERSIONS > versions.yml @@ -41,7 +43,7 @@ process MULTIQC { stub: """ - touch multiqc_data + mkdir multiqc_data touch multiqc_plots touch multiqc_report.html diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index f93b5ee..45a9bc3 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,5 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: MultiQC +name: multiqc description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: - QC @@ -13,7 +12,6 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] - input: - multiqc_files: type: file @@ -31,7 +29,6 @@ input: type: file description: Optional logo file for MultiQC pattern: "*.{png}" - output: - report: type: file @@ -54,3 +51,8 @@ authors: - "@bunop" - "@drpatelh" - "@jfy133" +maintainers: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test new file mode 100644 index 0000000..f1c4242 --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -0,0 +1,84 @@ +nextflow_process { + + name "Test Process MULTIQC" + script "../main.nf" + process "MULTIQC" + + tag "modules" + tag "modules_nfcore" + tag "multiqc" + + test("sarscov2 single-end [fastqc]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_single") } + ) + } + + } + + test("sarscov2 single-end [fastqc] [config]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_config") } + ) + } + } + + test("sarscov2 single-end [fastqc] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.report.collect { file(it).getName() } + + process.out.data.collect { file(it).getName() } + + process.out.plots.collect { file(it).getName() } + + process.out.versions ).match("multiqc_stub") } + ) + } + + } +} diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap new file mode 100644 index 0000000..549ba79 --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -0,0 +1,41 @@ +{ + "multiqc_versions_single": { + "content": [ + [ + "versions.yml:md5,14e9a2661241abd828f4f06a7b5c222d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:43:40.529579" + }, + "multiqc_stub": { + "content": [ + [ + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + "versions.yml:md5,14e9a2661241abd828f4f06a7b5c222d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:45:09.605359" + }, + "multiqc_versions_config": { + "content": [ + [ + "versions.yml:md5,14e9a2661241abd828f4f06a7b5c222d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:44:53.535994" + } +} \ No newline at end of file diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml new file mode 100644 index 0000000..bea6c0d --- /dev/null +++ b/modules/nf-core/multiqc/tests/tags.yml @@ -0,0 +1,2 @@ +multiqc: + - modules/nf-core/multiqc/** diff --git a/modules/nf-core/ncbigenomedownload/environment.yml b/modules/nf-core/ncbigenomedownload/environment.yml new file mode 100644 index 0000000..746c608 --- /dev/null +++ b/modules/nf-core/ncbigenomedownload/environment.yml @@ -0,0 +1,7 @@ +name: ncbigenomedownload +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::ncbi-genome-download=0.3.3 diff --git a/modules/nf-core/ncbigenomedownload/main.nf b/modules/nf-core/ncbigenomedownload/main.nf new file mode 100644 index 0000000..baa0b51 --- /dev/null +++ b/modules/nf-core/ncbigenomedownload/main.nf @@ -0,0 +1,55 @@ +process NCBIGENOMEDOWNLOAD { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ncbi-genome-download:0.3.3--pyh7cba7a3_0' : + 'biocontainers/ncbi-genome-download:0.3.3--pyh7cba7a3_0' }" + + input: + val meta + path accessions + path taxids + val groups + + output: + tuple val(meta), path("*_genomic.gbff.gz") , emit: gbk , optional: true + tuple val(meta), path("*_genomic.fna.gz") , emit: fna , optional: true + tuple val(meta), path("*_rm.out.gz") , emit: rm , optional: true + tuple val(meta), path("*_feature_table.txt.gz") , emit: features, optional: true + tuple val(meta), path("*_genomic.gff.gz") , emit: gff , optional: true + tuple val(meta), path("*_protein.faa.gz") , emit: faa , optional: true + tuple val(meta), path("*_protein.gpff.gz") , emit: gpff , optional: true + tuple val(meta), path("*_wgsmaster.gbff.gz") , emit: wgs_gbk , optional: true + tuple val(meta), path("*_cds_from_genomic.fna.gz"), emit: cds , optional: true + tuple val(meta), path("*_rna.fna.gz") , emit: rna , optional: true + tuple val(meta), path("*_rna_from_genomic.fna.gz"), emit: rna_fna , optional: true + tuple val(meta), path("*_assembly_report.txt") , emit: report , optional: true + tuple val(meta), path("*_assembly_stats.txt") , emit: stats , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def accessions_opt = accessions ? "-A ${accessions}" : "" + def taxids_opt = taxids ? "-t ${taxids}" : "" + """ + ncbi-genome-download \\ + $args \\ + $accessions_opt \\ + $taxids_opt \\ + --output-folder ./ \\ + --flat-output \\ + --parallel $task.cpus \\ + $groups + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ncbigenomedownload: \$( ncbi-genome-download --version ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/ncbigenomedownload/meta.yml b/modules/nf-core/ncbigenomedownload/meta.yml new file mode 100644 index 0000000..e7d3720 --- /dev/null +++ b/modules/nf-core/ncbigenomedownload/meta.yml @@ -0,0 +1,96 @@ +name: ncbigenomedownload +description: A tool to quickly download assemblies from NCBI's Assembly database +keywords: + - fasta + - download + - assembly +tools: + - ncbigenomedownload: + description: Download genome files from the NCBI FTP server. + homepage: https://github.com/kblin/ncbi-genome-download + documentation: https://github.com/kblin/ncbi-genome-download + tool_dev_url: https://github.com/kblin/ncbi-genome-download + licence: ["Apache Software License"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - accessions: + type: file + description: List of accessions (one per line) to download + pattern: "*.txt" + - taxids: + type: file + description: List of taxids (one per line) to download + pattern: "*.txt" + - groups: + type: string + description: NCBI taxonomic groups to download. Can be a comma-separated list. Options are ['all', 'archaea', 'bacteria', 'fungi', 'invertebrate', 'metagenomes', 'plant', 'protozoa', 'vertebrate_mammalian', 'vertebrate_other', 'viral'] +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - gbk: + type: file + description: GenBank format of the genomic sequence(s) in the assembly + pattern: "*_genomic.gbff.gz" + - fna: + type: file + description: FASTA format of the genomic sequence(s) in the assembly. + pattern: "*_genomic.fna.gz" + - rm: + type: file + description: RepeatMasker output for eukaryotes. + pattern: "*_rm.out.gz" + - features: + type: file + description: Tab-delimited text file reporting locations and attributes for a subset of annotated features + pattern: "*_feature_table.txt.gz" + - gff: + type: file + description: Annotation of the genomic sequence(s) in GFF3 format + pattern: "*_genomic.gff.gz" + - faa: + type: file + description: FASTA format of the accessioned protein products annotated on the genome assembly. + pattern: "*_protein.faa.gz" + - gpff: + type: file + description: GenPept format of the accessioned protein products annotated on the genome assembly. + pattern: "*_protein.gpff.gz" + - wgs_gbk: + type: file + description: GenBank flat file format of the WGS master for the assembly + pattern: "*_wgsmaster.gbff.gz" + - cds: + type: file + description: FASTA format of the nucleotide sequences corresponding to all CDS features annotated on the assembly + pattern: "*_cds_from_genomic.fna.gz" + - rna: + type: file + description: FASTA format of accessioned RNA products annotated on the genome assembly + pattern: "*_rna.fna.gz" + - rna_fna: + type: file + description: FASTA format of the nucleotide sequences corresponding to all RNA features annotated on the assembly + pattern: "*_rna_from_genomic.fna.gz" + - report: + type: file + description: Tab-delimited text file reporting the name, role and sequence accession.version for objects in the assembly + pattern: "*_assembly_report.txt" + - stats: + type: file + description: Tab-delimited text file reporting statistics for the assembly + pattern: "*_assembly_stats.txt" +authors: + - "@rpetit3" +maintainers: + - "@rpetit3" diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml new file mode 100644 index 0000000..296ed99 --- /dev/null +++ b/modules/nf-core/samtools/index/environment.yml @@ -0,0 +1,7 @@ +name: samtools_index +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.18 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf new file mode 100644 index 0000000..8ad18fd --- /dev/null +++ b/modules/nf-core/samtools/index/main.nf @@ -0,0 +1,48 @@ +process SAMTOOLS_INDEX { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' : + 'biocontainers/samtools:1.18--h50ea8bc_1' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.bai") , optional:true, emit: bai + tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai"), optional:true, emit: crai + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + index \\ + -@ ${task.cpus-1} \\ + $args \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${input}.bai + touch ${input}.crai + touch ${input}.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml new file mode 100644 index 0000000..01a4ee0 --- /dev/null +++ b/modules/nf-core/samtools/index/meta.yml @@ -0,0 +1,57 @@ +name: samtools_index +description: Index SAM/BAM/CRAM file +keywords: + - index + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - crai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - csi: + type: file + description: CSI index file + pattern: "*.{csi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@maxulysse" +maintainers: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/samtools/index/tests/csi.nextflow.config b/modules/nf-core/samtools/index/tests/csi.nextflow.config new file mode 100644 index 0000000..0ed260e --- /dev/null +++ b/modules/nf-core/samtools/index/tests/csi.nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_INDEX { + ext.args = '-c' + } + +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test new file mode 100644 index 0000000..c76a916 --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -0,0 +1,87 @@ +nextflow_process { + + name "Test Process SAMTOOLS_INDEX" + script "../main.nf" + process "SAMTOOLS_INDEX" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/index" + + test("sarscov2 [BAI]") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.bai).match("bai") }, + { assert path(process.out.versions.get(0)).getText().contains("samtools") } + ) + } + } + + test("homo_sapiens [CRAI]") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.crai).match("crai") }, + { assert path(process.out.versions.get(0)).getText().contains("samtools") } + ) + } + } + + test("homo_sapiens [CSI]") { + + config "./csi.nextflow.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert path(process.out.csi.get(0).get(1)).exists() }, + { assert path(process.out.versions.get(0)).getText().contains("samtools") } + ) + } + } +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap new file mode 100644 index 0000000..b3baee7 --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -0,0 +1,28 @@ +{ + "crai": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ] + ], + "timestamp": "2023-11-15T15:17:37.30801" + }, + "bai": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ] + ], + "timestamp": "2023-11-15T15:17:30.869234" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/index/tests/tags.yml b/modules/nf-core/samtools/index/tests/tags.yml new file mode 100644 index 0000000..e0f58a7 --- /dev/null +++ b/modules/nf-core/samtools/index/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/index: + - modules/nf-core/samtools/index/** diff --git a/nextflow.config b/nextflow.config index 8f89683..30b2d03 100644 --- a/nextflow.config +++ b/nextflow.config @@ -9,14 +9,67 @@ // Global default params, used in configs params { - // TODO nf-core: Specify your pipeline's command line flags // Input options input = null // References genome = null - igenomes_base = 's3://ngi-igenomes/igenomes' + igenomes_base = 's3://ngi-igenomes/igenomes/' igenomes_ignore = false - + fasta = null + ncbidownload_accessions = null + ncbidownload_taxids = null + ncbidownload_group = "all" + ncbidownload_section = "refseq" + + // Simulation options + amplicon = false + target_capture = false + metagenome = false + wholegenome = false + + // Amplicon options + amplicon_fw_primer = 'GTCGGTAAAACTCGTGCCAGC' + amplicon_rv_primer = 'CATAGTGGGGTATCTAATCCCAGTTTG' + amplicon_read_count = 500 + amplicon_read_length = 130 + amplicon_seq_system = 'HS25' + amplicon_crabs_ispcr_error = 4.5 + + // Target capture options + probe_file = null + probe_ref_name = 'Tetrapods-UCE-5Kv1' + target_capture_mode = 'illumina' + target_capture_fmedian = 500 + target_capture_fshape = 6.0 + target_capture_smedian = 1300 + target_capture_sshape = 6.0 + target_capture_tmedian = null + target_capture_tshape = null + target_capture_num = 500000 + target_capture_illen = 150 + target_capture_pblen = 30000 + target_capture_ilmode = 'pe' + + // Metagenome options + metagenome_abundance = 'lognormal' + metagenome_abundance_file = null + metagenome_coverage = null + metagenome_coverage_file = null + metagenome_n_reads = '1M' + metagenome_mode = 'kde' + metagenome_model = 'MiSeq' + metagenome_gc_bias = null + + // Whole Genome options + wholegenome_error_rate = 0.02 + wholegenome_outer_dist = 500 + wholegenome_standard_dev = 50 + wholegenome_n_reads = 1000000 + wholegenome_r1_length = 70 + wholegenome_r2_length = 70 + wholegenome_mutation_rate = 0.001 + wholegenome_indel_fraction = 0.15 + wholegenome_indel_extended = 0.3 // MultiQC options multiqc_config = null @@ -43,7 +96,7 @@ params { custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" config_profile_contact = null config_profile_url = null - + // Max resource options // Defaults only, expecting to be overwritten @@ -54,15 +107,17 @@ params { // Schema validation default options validationFailUnrecognisedParams = false validationLenientMode = false - validationSchemaIgnoreParams = 'genomes' + validationSchemaIgnoreParams = 'genomes,igenomes_base,probe_ref_db' validationShowHiddenParams = false validate_params = true - } // Load base.config by default for all pipelines includeConfig 'conf/base.config' +// Load ref_databases.config for reference taxonomy +includeConfig 'conf/ref_databases.config' + // Load nf-core custom profiles from different Institutions try { includeConfig "${params.custom_config_base}/nfcore_custom.config" @@ -71,7 +126,7 @@ try { } // Load nf-core/readsimulator custom profiles from different institutions. -// Warning: Uncomment only if a pipeline-specific instititutional config already exists on nf-core/configs! +// Warning: Uncomment only if a pipeline-specific institutional config already exists on nf-core/configs! // try { // includeConfig "${params.custom_config_base}/pipeline/readsimulator.config" // } catch (Exception e) { @@ -82,6 +137,7 @@ profiles { dumpHashes = true process.beforeScript = 'echo $HOSTNAME' cleanup = false + nextflow.enable.configProcessNamesValidation = true } conda { conda.enabled = true @@ -90,6 +146,7 @@ profiles { podman.enabled = false shifter.enabled = false charliecloud.enabled = false + channels = ['conda-forge', 'bioconda', 'defaults'] apptainer.enabled = false } mamba { @@ -104,16 +161,16 @@ profiles { } docker { docker.enabled = true - docker.userEmulation = true conda.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { singularity.enabled = true @@ -154,6 +211,7 @@ profiles { } apptainer { apptainer.enabled = true + apptainer.autoMounts = true conda.enabled = false docker.enabled = false singularity.enabled = false @@ -163,8 +221,8 @@ profiles { } gitpod { executor.name = 'local' - executor.cpus = 16 - executor.memory = 60.GB + executor.cpus = 4 + executor.memory = 8.GB } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } @@ -180,7 +238,7 @@ singularity.registry = 'quay.io' // Nextflow plugins plugins { - id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet } // Load igenomes.config if required @@ -188,6 +246,7 @@ if (!params.igenomes_ignore) { includeConfig 'conf/igenomes.config' } else { params.genomes = [:] + params.igenomes_base = null } // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. @@ -203,6 +262,9 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false + def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true @@ -228,7 +290,7 @@ manifest { description = """A workflow to simulate reads""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '1.0dev' + version = '1.0.0' doi = '' } diff --git a/nextflow_schema.json b/nextflow_schema.json index e2112bb..90ffab0 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -20,7 +20,8 @@ "pattern": "^\\S+\\.csv$", "description": "Path to comma-separated file containing information about the samples in the experiment.", "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/readsimulator/usage#samplesheet-input).", - "fa_icon": "fas fa-file-csv" + "fa_icon": "fas fa-file-csv", + "schema": "assets/schema_input.json" }, "outdir": { "type": "string", @@ -42,6 +43,281 @@ } } }, + "simulation_options": { + "title": "Simulation options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Choose the data types that should be simulated by the pipeline.", + "properties": { + "amplicon": { + "type": "boolean", + "description": "Option to simulate amplicon sequencing reads.", + "fa_icon": "fas fa-question-circle" + }, + "target_capture": { + "type": "boolean", + "description": "Option to simulate target capture sequencing reads.", + "fa_icon": "fas fa-question-circle" + }, + "metagenome": { + "type": "boolean", + "description": "Option to simulate metagenomic sequencing reads.", + "fa_icon": "fas fa-question-circle" + }, + "wholegenome": { + "type": "boolean", + "description": "Option to simulate wholegenomic sequencing reads.", + "fa_icon": "fas fa-question-circle" + } + } + }, + "amplicon_options": { + "title": "Amplicon options", + "type": "object", + "fa_icon": "fas fa-dna", + "description": "Options for simulating amplicon sequencing reads.", + "properties": { + "amplicon_fw_primer": { + "type": "string", + "description": "Forward primer to use with crabs_insilicopcr.", + "default": "GTCGGTAAAACTCGTGCCAGC" + }, + "amplicon_rv_primer": { + "type": "string", + "description": "Reverse primer to use with crabs_insilicopcr.", + "default": "CATAGTGGGGTATCTAATCCCAGTTTG" + }, + "amplicon_read_count": { + "type": "integer", + "description": "Number of reads to be simulated per amplicon.", + "default": 500 + }, + "amplicon_read_length": { + "type": "integer", + "description": "Length of reads to be simulated.", + "default": 130 + }, + "amplicon_seq_system": { + "type": "string", + "description": "Sequencing system of reads to be simulated.", + "default": "HS25", + "help_text": "Can be 'GA1' for Genome Analyser I, 'GA2' for Genome Analyser II, 'HS10' for HiSeq 1000, 'HS20' for HiSeq 2000, 'HS25' for HiSeq 2500, 'HSXn' for HiSeqX PCR free, 'HSXt' for HiSeqX TruSeq, 'MinS' for MiniSeq TruSeq, 'MSv1' for MiSeq v1, 'MSv3' for MiSeq v3, or 'NS50' for NextSeq500 v2.", + "enum": ["GA1", "GA2", "HS10", "HS20", "HS25", "HSXn", "HSXt", "MinS", "MSv1", "MSv3", "NS50"] + }, + "amplicon_crabs_ispcr_error": { + "type": "number", + "default": 4.5, + "description": "Maximum number of errors allowed in CRABS insilicoPCR primer sequences" + } + } + }, + "target_capture_options": { + "title": "Target capture options", + "type": "object", + "fa_icon": "fas fa-dna", + "description": "Options for simulating target capture sequencing reads.", + "properties": { + "probe_file": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "description": "Path to bait/probe file. Can be a fasta file or a bed file.", + "help_text": "This parameter is *mandatory* if `--probe_ref_name` is not specified but `--target_capture` is specified.", + "fa_icon": "far fa-file-code" + }, + "probe_ref_name": { + "type": "string", + "default": "Tetrapods-UCE-5Kv1", + "description": "Name of supported probe. Mandatory if not using `--probes` parameter.", + "help_text": "Supported probes are 'Tetrapods-UCE-2.5Kv1', 'Tetrapods-UCE-5Kv1', 'Actinopterygians-0.5Kv1', 'Acanthomorphs-1Kv1', 'Arachnida-1.1Kv1', 'Coleoptera-1.1Kv1', 'Diptera-2.7Kv1', 'Hemiptera-2.7Kv1', 'Hymenoptera-1.5Kv1', 'Hymenoptera-2.5Kv2', and 'Anthozoa-1.7Kv1'", + "enum": [ + "Tetrapods-UCE-2.5Kv1", + "Tetrapods-UCE-5Kv1", + "Actinopterygians-0.5Kv1", + "Acanthomorphs-1Kv1", + "Arachnida-1.1Kv1", + "Coleoptera-1.1Kv1", + "Diptera-2.7Kv1", + "Hemiptera-2.7Kv1", + "Hymenoptera-1.5Kv1", + "Hymenoptera-2.5Kv2", + "Anthozoa-1.7Kv1" + ] + }, + "target_capture_mode": { + "type": "string", + "default": "illumina", + "description": "Simulate 'illumina' or 'pacbio' reads.", + "enum": ["illumina", "pacbio"] + }, + "target_capture_fmedian": { + "type": "integer", + "default": 500, + "description": "Median of fragment size at shearing." + }, + "target_capture_fshape": { + "type": "number", + "default": 6.0, + "description": "Shape parameter of the fragment size distribution." + }, + "target_capture_smedian": { + "type": "integer", + "default": 1300, + "description": "Median of fragment size distribution." + }, + "target_capture_sshape": { + "type": "number", + "default": 6.0, + "description": "Shape parameter of the fragment size distribution." + }, + "target_capture_tmedian": { + "type": "integer", + "description": "Median of target fragment size (the fragment size of the data). If specified, will override '--fmedian' and '--smedian'. Othersise will be estimated." + }, + "target_capture_tshape": { + "type": "number", + "description": "Shape parameter of the effective fragment size distribution." + }, + "target_capture_num": { + "type": "integer", + "default": 500000, + "description": "Number of fragments." + }, + "target_capture_illen": { + "type": "integer", + "default": 150, + "description": "Illumina: read length." + }, + "target_capture_pblen": { + "type": "integer", + "default": 30000, + "description": "PacBio: Average (polymerase) read length." + }, + "target_capture_ilmode": { + "type": "string", + "default": "pe", + "description": "Illumina: Sequencing mode.", + "help_text": "'pe' = paired-end, 'mp' = mate-paired and 'se' = singled-end", + "enum": ["pe", "mp", "se"] + } + } + }, + "metagenome_options": { + "title": "Metagenome options", + "type": "object", + "fa_icon": "fas fa-dna", + "description": "Options for simulating metagenomic sequencing reads.", + "properties": { + "metagenome_abundance": { + "type": "string", + "default": "lognormal", + "description": "Abundance distribution.", + "help_text": "Can be 'uniform', 'halfnormal', 'exponential', 'lognormal', or 'zero_inflated_lognormal'", + "enum": ["uniform", "halfnormal", "exponential", "lognormal", "zero_inflated_lognormal"] + }, + "metagenome_abundance_file": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.tsv$", + "mimetype": "text/plain", + "description": "Path to tab-separated file containing abundance distribution.", + "help_text": "The first column should contain the genome and the second column should contain abundance proportion. It's recommended that the total abundace in your file equals 1." + }, + "metagenome_coverage": { + "type": "string", + "description": "Coverage distribution.", + "help_text": "Can be 'uniform', 'halfnormal', 'exponential', 'lognormal', or 'zero_inflated_lognormal'", + "enum": ["uniform", "halfnormal", "exponential", "lognormal", "zero_inflated_lognormal"] + }, + "metagenome_coverage_file": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.tsv$", + "mimetype": "text/plain", + "description": "Path to tab-separated file containing coverage information.", + "help_text": "The first column should contain the genome and the second column should contain the coverage (e.g., use the value 20 for a coverage of 20X)." + }, + "metagenome_n_reads": { + "type": "string", + "default": "1M", + "description": "Number of reads to generate.", + "help_text": "Supported suffixes are 'k', 'K', 'm', 'M', 'g', and 'G'." + }, + "metagenome_mode": { + "type": "string", + "default": "kde", + "description": "Can be 'kde', or 'basic'.", + "help_text": "Set this to basic if you don't want to use a model with `--metagenome_model`.", + "enum": ["kde", "basic"] + }, + "metagenome_model": { + "type": "string", + "default": "MiSeq", + "description": "Can be 'HiSeq', 'NovaSeq', or 'MiSeq'.", + "enum": ["HiSeq", "NovaSeq", "MiSeq"] + }, + "metagenome_gc_bias": { + "type": "boolean", + "description": "Use this option to prevent simulating reads that have abnormal GC content." + } + } + }, + "wholegenome_options": { + "title": "Wholegenome options", + "type": "object", + "fa_icon": "fas fa-dna", + "description": "Options for simulating wholegenome sequencing reads.", + "properties": { + "wholegenome_error_rate": { + "type": "number", + "default": 0.02, + "description": "The base error rate." + }, + "wholegenome_outer_dist": { + "type": "integer", + "default": 500, + "description": "The outer distance between the two ends." + }, + "wholegenome_standard_dev": { + "type": "integer", + "default": 50, + "description": "The standard deviation." + }, + "wholegenome_n_reads": { + "type": "integer", + "default": 1000000, + "description": "The number of read pairs." + }, + "wholegenome_r1_length": { + "type": "integer", + "default": 70, + "description": "The length of the first reads." + }, + "wholegenome_r2_length": { + "type": "integer", + "default": 70, + "description": "The length of the second reads." + }, + "wholegenome_mutation_rate": { + "type": "number", + "default": 0.001, + "description": "The rate of mutations." + }, + "wholegenome_indel_fraction": { + "type": "number", + "default": 0.15, + "description": "The fraction of indels." + }, + "wholegenome_indel_extended": { + "type": "number", + "default": 0.3, + "description": "The probability that an indel is extended." + } + } + }, "reference_genome_options": { "title": "Reference genome options", "type": "object", @@ -60,24 +336,54 @@ "exists": true, "mimetype": "text/plain", "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", - "description": "Path to FASTA genome file.", - "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", + "description": "Path to reference FASTA file.", + "help_text": "If this parameter is not used, the pipeline will download a fasta file, either using the `--genome` parameter or by using ncbi-genome-download (relevant parameters for ncbi-genome-download all start with `--ncbidownload_`).", "fa_icon": "far fa-file-code" }, - "igenomes_base": { - "type": "string", - "format": "directory-path", - "description": "Directory / URL base for iGenomes references.", - "default": "s3://ngi-igenomes/igenomes", - "fa_icon": "fas fa-cloud-download-alt", - "hidden": true - }, "igenomes_ignore": { "type": "boolean", "description": "Do not load the iGenomes reference config.", "fa_icon": "fas fa-ban", "hidden": true, "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." + }, + "ncbidownload_accessions": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "description": "Path to text file containing accession ids (one accession per row)." + }, + "ncbidownload_taxids": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "description": "Path to text file containing taxids (one taxid per row)." + }, + "ncbidownload_group": { + "type": "string", + "default": "all", + "description": "The NCBI taxonomic groups to download. Options include 'all', 'archaea', 'bacteria', 'fungi', 'invertebrate', 'metagenomes', 'plant', 'protozoa', 'vertebrate_mammalian', 'vertebrate_other', and 'viral'. A comma-separated list is also valid (e.g., 'bacteria,viral').", + "enum": [ + "all", + "archaea", + "bacteria", + "fungi", + "invertebrate", + "metagenomes", + "plant", + "protozoa", + "vertebrate_mammalian", + "vertebrate_other", + "viral" + ] + }, + "ncbidownload_section": { + "type": "string", + "default": "refseq", + "description": "The NCBI section to download. 'refseq' or 'genbank'.", + "enum": ["refseq", "genbank"] } } }, @@ -175,14 +481,12 @@ "type": "boolean", "description": "Display help text.", "fa_icon": "fas fa-question-circle", - "default": false, "hidden": true }, "version": { "type": "boolean", "description": "Display version and exit.", "fa_icon": "fas fa-question-circle", - "default": false, "hidden": true }, "publish_dir_mode": { @@ -206,7 +510,6 @@ "type": "boolean", "description": "Send plain-text email instead of HTML.", "fa_icon": "fas fa-remove-format", - "default": false, "hidden": true }, "max_multiqc_email_size": { @@ -221,7 +524,6 @@ "type": "boolean", "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", - "default": false, "hidden": true }, "hook_url": { @@ -260,7 +562,6 @@ "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", - "default": false, "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." }, @@ -268,7 +569,6 @@ "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters fails when an unrecognised parameter is found.", - "default": false, "hidden": true, "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." }, @@ -276,7 +576,6 @@ "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters in lenient more.", - "default": false, "hidden": true, "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." } @@ -287,6 +586,21 @@ { "$ref": "#/definitions/input_output_options" }, + { + "$ref": "#/definitions/simulation_options" + }, + { + "$ref": "#/definitions/amplicon_options" + }, + { + "$ref": "#/definitions/target_capture_options" + }, + { + "$ref": "#/definitions/metagenome_options" + }, + { + "$ref": "#/definitions/wholegenome_options" + }, { "$ref": "#/definitions/reference_genome_options" }, diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 0000000..6f5e2c4 --- /dev/null +++ b/nf-test.config @@ -0,0 +1,10 @@ +config { + // Location of nf-tests + testsDir "." + + // nf-test directory used to create temporary files for each test + workDir System.getenv("NFT_WORKDIR") ?: ".nf-test" + + // Location of an optional nextflow.config file specific for executing pipeline tests + configFile "tests/nextflow.config" +} diff --git a/pyproject.toml b/pyproject.toml index 0d62beb..7d08e1c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,13 @@ -# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. +# Config file for Python. Mostly used to configure linting of bin/*.py with Ruff. # Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. -[tool.black] +[tool.ruff] line-length = 120 -target_version = ["py37", "py38", "py39", "py310"] +target-version = "py38" +select = ["I", "E1", "E4", "E7", "E9", "F", "UP", "N"] +cache-dir = "~/.cache/ruff" -[tool.isort] -profile = "black" -known_first_party = ["nf_core"] -multi_line_output = 3 +[tool.ruff.isort] +known-first-party = ["nf_core"] + +[tool.ruff.per-file-ignores] +"__init__.py" = ["E402", "F401"] diff --git a/subworkflows/local/amplicon_workflow.nf b/subworkflows/local/amplicon_workflow.nf new file mode 100644 index 0000000..1073768 --- /dev/null +++ b/subworkflows/local/amplicon_workflow.nf @@ -0,0 +1,71 @@ +// +// Simulate amplicon reads +// + +include { CRABS_DBIMPORT } from '../../modules/local/crabs/dbimport/main' +include { CRABS_INSILICOPCR } from '../../modules/local/crabs/insilicopcr/main' +include { ART_ILLUMINA } from '../../modules/nf-core/art/illumina/main' + +workflow AMPLICON_WORKFLOW { + + take: + ch_fasta // file: /path/to/reference.fasta + ch_input // channel: [ meta ] + + main: + ch_ref_fasta = Channel.empty() + ch_versions = Channel.empty() + + // + // MODULE: Run Crabs db_import + // + ch_meta_fasta = ch_fasta + .map { + fasta -> + return [ [id:"amplicon"], fasta ] + } + + CRABS_DBIMPORT ( + ch_meta_fasta + ) + ch_versions = ch_versions.mix(CRABS_DBIMPORT.out.versions) + ch_ref_fasta = CRABS_DBIMPORT.out.fasta + + // + // MODULE: Run Crabs insilico_pcr + // + CRABS_INSILICOPCR ( + ch_ref_fasta + ) + ch_versions = ch_versions.mix(CRABS_INSILICOPCR.out.versions) + + // Now that we have processed our fasta file, + // we need to map it to our sample data + ch_art_input = CRABS_INSILICOPCR.out.fasta + .combine ( ch_input ) + .map { + it = [ it[2], it[1] ] + } + + // + // MODULE: Simulate Illumina reads + // + ART_ILLUMINA ( + ch_art_input, + params.amplicon_seq_system, + params.amplicon_read_length + ) + ch_versions = ch_versions.mix(ART_ILLUMINA.out.versions.first()) + + ch_illumina_reads = ART_ILLUMINA.out.fastq + .map { + meta, fastqs -> + meta.outdir = "art_illumina" + meta.datatype = "amplicon_illumina" + return [ meta, fastqs ] + } + + emit: + reads = ch_illumina_reads // channel: [ meta, fastq ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/target_capture_workflow.nf b/subworkflows/local/target_capture_workflow.nf new file mode 100644 index 0000000..011d9de --- /dev/null +++ b/subworkflows/local/target_capture_workflow.nf @@ -0,0 +1,120 @@ +// +// Simulate UCE target capture reads +// + +include { BEDTOOLS_GETFASTA } from '../../modules/nf-core/bedtools/getfasta/main' +include { BOWTIE2_BUILD } from '../../modules/nf-core/bowtie2/build/main' +include { BOWTIE2_ALIGN } from '../../modules/nf-core/bowtie2/align/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' +include { JAPSA_CAPSIM } from '../../modules/local/japsa/capsim/main' +include { UNZIP } from '../../modules/local/unzip/main' +include { UNCOMPRESS_FASTA } from '../../modules/local/uncompress_fasta/main' + +workflow TARGET_CAPTURE_WORKFLOW { + take: + ch_fasta // file: /path/to/reference.fasta + ch_input // channel: [ meta ] + ch_probes // file: /path/to/probes.fasta + + main: + ch_versions = Channel.empty() + + // + // MODULE: Unzip probes file if user is downloading a reference probe file + // + if ( !params.probe_file ) { + ch_zip_file = Channel.fromPath(params.probe_ref_db[params.probe_ref_name]["url"]) + ch_probes = UNZIP ( + ch_zip_file + ).file + } else { + // + // MODULE: Run bedtools_getfasta if the probe file is a bed file + // + if ( params.probe_file.endsWith('.bed') ) { + // Bedtools_getfasta requires an uncompressed fasta file + ch_uncompressed_fasta = UNCOMPRESS_FASTA ( + ch_fasta + ).fasta + + BEDTOOLS_GETFASTA ( + ch_probes, + ch_uncompressed_fasta + ) + + ch_probes = BEDTOOLS_GETFASTA.out.fasta + } + } + + ch_probes = ch_probes + .map { + fasta -> + def meta = [:] + meta.id = "probes" + meta.single_end = true + [ meta, fasta ] + } + + ch_meta_fasta = ch_fasta + .map { + fasta -> + return [ [id:"target_capture"], fasta ] + } + + // + // MODULE: Create Bowtie index + // + BOWTIE2_BUILD ( + ch_meta_fasta + ) + ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions.first()) + + // + // MODULE: Align probes to genome + // + BOWTIE2_ALIGN ( + ch_probes, + BOWTIE2_BUILD.out.index, + false, + false + ) + ch_versions = ch_versions.mix(BOWTIE2_ALIGN.out.versions.first()) + + // + // MODULES: Get SAM index + // + SAMTOOLS_INDEX ( + BOWTIE2_ALIGN.out.aligned + ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + // Now that we have our fasta file + BAM file + index, + // we need to map them to our sample data + ch_capsim_input = ch_meta_fasta.map { it = it[1] } + .combine ( BOWTIE2_ALIGN.out.aligned.map { it = it[1] } ) + .combine ( SAMTOOLS_INDEX.out.bai.map { it = it[1] } ) + .combine ( ch_input ) + .map { + fasta, bam, index, meta -> [ meta, fasta, bam, index ] + } + + // + // MODULE: Simulate target capture reads + // + ch_reads = Channel.empty() + JAPSA_CAPSIM ( + ch_capsim_input + ) + ch_versions = ch_versions.mix(JAPSA_CAPSIM.out.versions.first()) + ch_reads = JAPSA_CAPSIM.out.fastq + .map { + meta, fastqs -> + meta.outdir = "capsim" + meta.datatype = "target_capture" + return [ meta, fastqs ] + } + + emit: + reads = ch_reads // channel: [ meta, fastq ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/tests/nextflow.config b/tests/nextflow.config new file mode 100644 index 0000000..60bd0ef --- /dev/null +++ b/tests/nextflow.config @@ -0,0 +1,27 @@ +params { + // Base directory for nf-core/modules test data + modules_testdata_base_path = 's3://ngi-igenomes/testdata/nf-core/modules/' +} + +// Load test_data.config for for using test-datasets +includeConfig '../conf/test_data.config' + +// Impose sensible resource limits for testing +process { + withName: '.*' { + cpus = 2 + memory = 3.GB + time = 2.h + } +} + +// Impose same minimum Nextflow version as the pipeline for testing +manifest { + nextflowVersion = '!>=23.04.0' +} + +// Disable all Nextflow reporting options +timeline { enabled = false } +report { enabled = false } +trace { enabled = false } +dag { enabled = false } diff --git a/workflows/readsimulator.nf b/workflows/readsimulator.nf deleted file mode 100644 index 8a2b8b3..0000000 --- a/workflows/readsimulator.nf +++ /dev/null @@ -1,133 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - PRINT PARAMS SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' - -def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) -def citation = '\n' + WorkflowMain.citation(workflow) + '\n' -def summary_params = paramsSummaryMap(workflow) - -// Print parameter summary log to screen -log.info logo + paramsSummaryLog(workflow) + citation - -WorkflowReadsimulator.initialise(params, log) - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CONFIG FILES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() -ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() -ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules -// -include { INPUT_CHECK } from '../subworkflows/local/input_check' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT NF-CORE MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// MODULE: Installed directly from nf-core/modules -// -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN MAIN WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// Info required for completion email and summary -def multiqc_report = [] - -workflow READSIMULATOR { - - ch_versions = Channel.empty() - - // - // SUBWORKFLOW: Read in samplesheet, validate and stage input files - // - INPUT_CHECK ( - file(params.input) - ) - ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - // TODO: OPTIONAL, you can use nf-validation plugin to create an input channel from the samplesheet with Channel.fromSamplesheet("input") - // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ - // ! There is currently no tooling to help you write a sample sheet schema - - // - // MODULE: Run FastQC - // - FASTQC ( - INPUT_CHECK.out.reads - ) - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) - - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) - - // - // MODULE: MultiQC - // - workflow_summary = WorkflowReadsimulator.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) - - methods_description = WorkflowReadsimulator.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) - ch_methods_description = Channel.value(methods_description) - - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) - - MULTIQC ( - ch_multiqc_files.collect(), - ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList() - ) - multiqc_report = MULTIQC.out.report.toList() -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COMPLETION EMAIL AND SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) - } - NfcoreTemplate.summary(workflow, params, log) - if (params.hook_url) { - NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) - } -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ diff --git a/workflows/readsimulator/main.nf b/workflows/readsimulator/main.nf new file mode 100644 index 0000000..db38605 --- /dev/null +++ b/workflows/readsimulator/main.nf @@ -0,0 +1,275 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + PRINT PARAMS SUMMARY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' + +def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) +def citation = '\n' + WorkflowMain.citation(workflow) + '\n' +def summary_params = paramsSummaryMap(workflow) + +// Print parameter summary log to screen +log.info logo + paramsSummaryLog(workflow) + citation + +WorkflowReadsimulator.initialise(params, log) + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + CONFIG FILES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() +ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() +ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Local modules +// +include { INSILICOSEQ_GENERATE } from '../../modules/local/insilicoseq/generate/main' // TODO: Add module to nf-core/modules +include { CREATE_SAMPLESHEET } from '../../modules/local/custom/create_samplesheet/main' +include { MERGE_SAMPLESHEETS } from '../../modules/local/custom/merge_samplesheets/main' +include { WGSIM } from '../../modules/local/wgsim/main' // TODO: Add module to nf-core/modules + +// +// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules +// +include { AMPLICON_WORKFLOW } from '../../subworkflows/local/amplicon_workflow' +include { TARGET_CAPTURE_WORKFLOW } from '../../subworkflows/local/target_capture_workflow' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-CORE MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Installed directly from nf-core/modules +// +include { FASTQC } from '../../modules/nf-core/fastqc/main' +include { NCBIGENOMEDOWNLOAD } from '../../modules/nf-core/ncbigenomedownload/main' +include { MULTIQC } from '../../modules/nf-core/multiqc/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../../modules/nf-core/custom/dumpsoftwareversions/main' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// Info required for completion email and summary +def multiqc_report = [] + +workflow READSIMULATOR { + + take: + ch_input + + main: + ch_versions = Channel.empty() + ch_simulated_reads = Channel.empty() + ch_taxids = Channel.empty() + ch_accessions = Channel.empty() + + if ( params.fasta ) { + ch_fasta = Channel.fromPath(params.fasta) + } else { + if ( params.ncbidownload_accessions ) { + ch_accessions = Channel.fromPath(params.ncbidownload_accessions) + } else if ( params.ncbidownload_taxids ) { + ch_taxids = Channel.fromPath(params.ncbidownload_taxids) + } + + // + // MODULE: Download reference fasta files + // + NCBIGENOMEDOWNLOAD ( + [ id:"ncbigenomedownload" ], + ch_accessions.ifEmpty([]), + ch_taxids.ifEmpty([]), + params.ncbidownload_group + ) + + // + // MODULE: Combine FASTA files + // + MERGE_FASTAS ( + NCBIGENOMEDOWNLOAD.out.fna + ) + + ch_fasta = MERGE_FASTAS.out.fasta + .map { + meta, fasta -> + return fasta + } + } + + if ( params.probe_file ) { + ch_probes = Channel.fromPath(params.probe_file) + } else { + ch_probes = Channel.empty() + } + + // + // SUBWORKFLOW: Simulate amplicon reads + // + if ( params.amplicon ) { + AMPLICON_WORKFLOW ( + ch_fasta.ifEmpty([]), + ch_input + ) + ch_versions = ch_versions.mix(AMPLICON_WORKFLOW.out.versions.first()) + ch_simulated_reads = ch_simulated_reads.mix(AMPLICON_WORKFLOW.out.reads) + } + + // + // SUBWORKFLOW: Simulate UCE target capture reads + // + if ( params.target_capture ) { + TARGET_CAPTURE_WORKFLOW ( + ch_fasta, + ch_input, + ch_probes.ifEmpty([]) + ) + ch_versions = ch_versions.mix(TARGET_CAPTURE_WORKFLOW.out.versions.first()) + ch_simulated_reads = ch_simulated_reads.mix(TARGET_CAPTURE_WORKFLOW.out.reads) + } + + // + // MODULE: Simulate metagenomic reads + // + if ( params.metagenome ) { + INSILICOSEQ_GENERATE ( + ch_input.combine(ch_fasta.ifEmpty([[]])) + ) + ch_versions = ch_versions.mix(INSILICOSEQ_GENERATE.out.versions.first()) + ch_metagenome_reads = INSILICOSEQ_GENERATE.out.fastq + .map { + meta, fastqs -> + meta.outdir = "insilicoseq" + meta.datatype = "metagenomic_illumina" + return [ meta, fastqs ] + } + ch_simulated_reads = ch_simulated_reads.mix(ch_metagenome_reads) + } + + // + // MODULE: Simulate wholegenomic reads + // + if ( params.wholegenome ) { + WGSIM ( + ch_input.combine(ch_fasta) + ) + ch_versions = ch_versions.mix(WGSIM.out.versions.first()) + ch_wholegenome_reads = WGSIM.out.fastq + .map { + meta, fastqs -> + meta.outdir = "wgsim" + meta.datatype = "wholegenome" + return [ meta, fastqs ] + } + ch_simulated_reads = ch_simulated_reads.mix(ch_wholegenome_reads) + } + + // MODULE: Create sample sheet (just the header and one row) + CREATE_SAMPLESHEET ( + ch_simulated_reads + ) + + // Group the samplesheets by datatype so that we can merge them + ch_samplesheets = CREATE_SAMPLESHEET.out.samplesheet + .map { + meta, samplesheet -> + tuple( meta.datatype, meta, samplesheet ) + } + .groupTuple(sort: 'deep') + .map { + datatype, old_meta, samplesheet -> + def meta = [:] + meta.id = datatype + return [ meta, samplesheet ] + } + + // MODULE: Merge the samplesheets by data type + ch_final_samplesheet = MERGE_SAMPLESHEETS ( + ch_samplesheets + ) + + // + // MODULE: Run FastQC + // + FASTQC ( + ch_simulated_reads + ) + ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + + CUSTOM_DUMPSOFTWAREVERSIONS ( + ch_versions.unique().collectFile(name: 'collated_versions.yml') + ) + + // + // MODULE: MultiQC + // + workflow_summary = WorkflowReadsimulator.paramsSummaryMultiqc(workflow, summary_params) + ch_workflow_summary = Channel.value(workflow_summary) + + methods_description = WorkflowReadsimulator.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) + ch_methods_description = Channel.value(methods_description) + + ch_multiqc_files = Channel.empty() + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) + + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList() + ) + multiqc_report = MULTIQC.out.report.toList() + + emit: + simulated_reads = ch_simulated_reads + samplesheet = ch_final_samplesheet +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + COMPLETION EMAIL AND SUMMARY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow.onComplete { + if (params.email || params.email_on_fail) { + NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) + } + NfcoreTemplate.dump_parameters(workflow, params) + NfcoreTemplate.summary(workflow, params, log) + if (params.hook_url) { + NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) + } +} + +workflow.onError { + if (workflow.errorReport.contains("Process requirement exceeds available memory")) { + println("🛑 Default resources exceed availability 🛑 ") + println("💡 See here on how to configure pipeline: https://nf-co.re/docs/usage/configuration#tuning-workflow-resources 💡") + } +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ diff --git a/workflows/readsimulator/tests/tags.yml b/workflows/readsimulator/tests/tags.yml new file mode 100644 index 0000000..393123b --- /dev/null +++ b/workflows/readsimulator/tests/tags.yml @@ -0,0 +1,8 @@ +readsimulator_test_amplicon: + - ./** +readsimulator_test_wholegenome: + - ./** +readsimulator_test_metagenome: + - ./** +readsimulator_test_target_capture: + - ./** diff --git a/workflows/readsimulator/tests/test_amplicon.nf.test b/workflows/readsimulator/tests/test_amplicon.nf.test new file mode 100644 index 0000000..3eccfd0 --- /dev/null +++ b/workflows/readsimulator/tests/test_amplicon.nf.test @@ -0,0 +1,41 @@ +nextflow_workflow { + + name "Test workflow: READSIMULATOR" + script "../main.nf" + workflow "READSIMULATOR" + tag "workflows" + tag "readsimulator" + tag "readsimulator_test_amplicon" + + test("amplicon = true") { + + when { + workflow { + """ + input[0] = Channel.of( + [ [ id:'first', seed:40] ], + [ [ id:'second', seed:41] ], + [ [ id:'third', seed:42] ], + ) + """ + } + params { + amplicon = true + fw_primer = 'AAAATAAT' + rv_primer = 'GATTACTTT' + amplicon_read_count = 1000 + amplicon_crabs_ispcr_error = 0 + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/readsimulator/testdata/GCF_024334085.1_ASM2433408v1_genomic.fna.gz' + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out + ).match() } + ) + } + } +} diff --git a/workflows/readsimulator/tests/test_amplicon.nf.test.snap b/workflows/readsimulator/tests/test_amplicon.nf.test.snap new file mode 100644 index 0000000..09f0d95 --- /dev/null +++ b/workflows/readsimulator/tests/test_amplicon.nf.test.snap @@ -0,0 +1,101 @@ +{ + "amplicon = true": { + "content": [ + { + "0": [ + [ + { + "id": "first", + "seed": 40, + "outdir": "art_illumina", + "datatype": "amplicon_illumina" + }, + [ + "first1.fq.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "first2.fq.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + { + "id": "second", + "seed": 41, + "outdir": "art_illumina", + "datatype": "amplicon_illumina" + }, + [ + "second1.fq.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "second2.fq.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + { + "id": "third", + "seed": 42, + "outdir": "art_illumina", + "datatype": "amplicon_illumina" + }, + [ + "third1.fq.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "third2.fq.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + { + "id": "amplicon_illumina" + }, + "amplicon_illumina_samplesheet.csv:md5,2f0440b621dd2de3c89cda85c81e5be7" + ] + ], + "samplesheet": [ + [ + { + "id": "amplicon_illumina" + }, + "amplicon_illumina_samplesheet.csv:md5,2f0440b621dd2de3c89cda85c81e5be7" + ] + ], + "simulated_reads": [ + [ + { + "id": "first", + "seed": 40, + "outdir": "art_illumina", + "datatype": "amplicon_illumina" + }, + [ + "first1.fq.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "first2.fq.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + { + "id": "second", + "seed": 41, + "outdir": "art_illumina", + "datatype": "amplicon_illumina" + }, + [ + "second1.fq.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "second2.fq.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + { + "id": "third", + "seed": 42, + "outdir": "art_illumina", + "datatype": "amplicon_illumina" + }, + [ + "third1.fq.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "third2.fq.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ] + } + ], + "timestamp": "2024-01-19T19:15:08.881730553" + } +} \ No newline at end of file diff --git a/workflows/readsimulator/tests/test_metagenome.nf.test b/workflows/readsimulator/tests/test_metagenome.nf.test new file mode 100644 index 0000000..ee53e2d --- /dev/null +++ b/workflows/readsimulator/tests/test_metagenome.nf.test @@ -0,0 +1,38 @@ +nextflow_workflow { + + name "Test workflow: READSIMULATOR" + script "../main.nf" + workflow "READSIMULATOR" + tag "workflows" + tag "readsimulator" + tag "readsimulator_test_metagenome" + + test("metagenome = true") { + + when { + workflow { + """ + input[0] = Channel.of( + [ [ id:'first', seed:40] ], + [ [ id:'second', seed:41] ], + [ [ id:'third', seed:42] ], + ) + """ + } + params { + metagenome = true + metagenome_n_reads = '100K' + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/readsimulator/testdata/GCF_024334085.1_ASM2433408v1_genomic.fna.gz' + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out + ).match() } + ) + } + } +} diff --git a/workflows/readsimulator/tests/test_metagenome.nf.test.snap b/workflows/readsimulator/tests/test_metagenome.nf.test.snap new file mode 100644 index 0000000..a28510d --- /dev/null +++ b/workflows/readsimulator/tests/test_metagenome.nf.test.snap @@ -0,0 +1,101 @@ +{ + "metagenome = true": { + "content": [ + { + "0": [ + [ + { + "id": "first", + "seed": 40, + "outdir": "insilicoseq", + "datatype": "metagenomic_illumina" + }, + [ + "first_R1.fastq.gz:md5,2a324062e0afb4d3eab8134479f317d4", + "first_R2.fastq.gz:md5,9e0c86c0ff15091734fe7d52039f7727" + ] + ], + [ + { + "id": "second", + "seed": 41, + "outdir": "insilicoseq", + "datatype": "metagenomic_illumina" + }, + [ + "second_R1.fastq.gz:md5,3536190205e1a6ba3609fe6a1567e0c7", + "second_R2.fastq.gz:md5,a91ff7cf5ea2069d541fc7d081c82bfb" + ] + ], + [ + { + "id": "third", + "seed": 42, + "outdir": "insilicoseq", + "datatype": "metagenomic_illumina" + }, + [ + "third_R1.fastq.gz:md5,137d1f3ced4783a0a45f6d7aed6da743", + "third_R2.fastq.gz:md5,3a78f893f1c12af70d13fe3df59eaa84" + ] + ] + ], + "1": [ + [ + { + "id": "metagenomic_illumina" + }, + "metagenomic_illumina_samplesheet.csv:md5,8064936eff731e7c4c879916eb8b2edb" + ] + ], + "samplesheet": [ + [ + { + "id": "metagenomic_illumina" + }, + "metagenomic_illumina_samplesheet.csv:md5,8064936eff731e7c4c879916eb8b2edb" + ] + ], + "simulated_reads": [ + [ + { + "id": "first", + "seed": 40, + "outdir": "insilicoseq", + "datatype": "metagenomic_illumina" + }, + [ + "first_R1.fastq.gz:md5,2a324062e0afb4d3eab8134479f317d4", + "first_R2.fastq.gz:md5,9e0c86c0ff15091734fe7d52039f7727" + ] + ], + [ + { + "id": "second", + "seed": 41, + "outdir": "insilicoseq", + "datatype": "metagenomic_illumina" + }, + [ + "second_R1.fastq.gz:md5,3536190205e1a6ba3609fe6a1567e0c7", + "second_R2.fastq.gz:md5,a91ff7cf5ea2069d541fc7d081c82bfb" + ] + ], + [ + { + "id": "third", + "seed": 42, + "outdir": "insilicoseq", + "datatype": "metagenomic_illumina" + }, + [ + "third_R1.fastq.gz:md5,137d1f3ced4783a0a45f6d7aed6da743", + "third_R2.fastq.gz:md5,3a78f893f1c12af70d13fe3df59eaa84" + ] + ] + ] + } + ], + "timestamp": "2024-01-19T00:09:35.115953915" + } +} \ No newline at end of file diff --git a/workflows/readsimulator/tests/test_target_capture.nf.test b/workflows/readsimulator/tests/test_target_capture.nf.test new file mode 100644 index 0000000..ef73254 --- /dev/null +++ b/workflows/readsimulator/tests/test_target_capture.nf.test @@ -0,0 +1,38 @@ +nextflow_workflow { + + name "Test workflow: READSIMULATOR" + script "../main.nf" + workflow "READSIMULATOR" + tag "workflows" + tag "readsimulator" + tag "readsimulator_test_target_capture" + + test("target_capture = true") { + + when { + workflow { + """ + input[0] = Channel.of( + [ [ id:'first', seed:40] ], + [ [ id:'second', seed:41] ], + [ [ id:'third', seed:42] ], + ) + """ + } + params { + target_capture = true + probe_ref_name = 'Diptera-2.7Kv1' + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/readsimulator/testdata/GCF_024334085.1_ASM2433408v1_genomic.fna.gz' + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out + ).match() } + ) + } + } +} diff --git a/workflows/readsimulator/tests/test_target_capture.nf.test.snap b/workflows/readsimulator/tests/test_target_capture.nf.test.snap new file mode 100644 index 0000000..f34ebaf --- /dev/null +++ b/workflows/readsimulator/tests/test_target_capture.nf.test.snap @@ -0,0 +1,101 @@ +{ + "target_capture = true": { + "content": [ + { + "0": [ + [ + { + "id": "first", + "seed": 40, + "outdir": "capsim", + "datatype": "target_capture" + }, + [ + "first_1.fastq.gz:md5,a0d28da7897cdce5380fd803ca43fbd8", + "first_2.fastq.gz:md5,4b72a945f433797a05635996d7e41bdf" + ] + ], + [ + { + "id": "second", + "seed": 41, + "outdir": "capsim", + "datatype": "target_capture" + }, + [ + "second_1.fastq.gz:md5,3cb9456d6373288c877eb59ef01cb4b1", + "second_2.fastq.gz:md5,c5d09485a7b3dffba13fd5f8c827c498" + ] + ], + [ + { + "id": "third", + "seed": 42, + "outdir": "capsim", + "datatype": "target_capture" + }, + [ + "third_1.fastq.gz:md5,b7d11fb1d6fe768d17ee9940ed90d03c", + "third_2.fastq.gz:md5,995ea1c4eb2bcbb55b252dac290def09" + ] + ] + ], + "1": [ + [ + { + "id": "target_capture" + }, + "target_capture_samplesheet.csv:md5,5279e81ea6bcdee0e3d9e54f7cef1dab" + ] + ], + "samplesheet": [ + [ + { + "id": "target_capture" + }, + "target_capture_samplesheet.csv:md5,5279e81ea6bcdee0e3d9e54f7cef1dab" + ] + ], + "simulated_reads": [ + [ + { + "id": "first", + "seed": 40, + "outdir": "capsim", + "datatype": "target_capture" + }, + [ + "first_1.fastq.gz:md5,a0d28da7897cdce5380fd803ca43fbd8", + "first_2.fastq.gz:md5,4b72a945f433797a05635996d7e41bdf" + ] + ], + [ + { + "id": "second", + "seed": 41, + "outdir": "capsim", + "datatype": "target_capture" + }, + [ + "second_1.fastq.gz:md5,3cb9456d6373288c877eb59ef01cb4b1", + "second_2.fastq.gz:md5,c5d09485a7b3dffba13fd5f8c827c498" + ] + ], + [ + { + "id": "third", + "seed": 42, + "outdir": "capsim", + "datatype": "target_capture" + }, + [ + "third_1.fastq.gz:md5,b7d11fb1d6fe768d17ee9940ed90d03c", + "third_2.fastq.gz:md5,995ea1c4eb2bcbb55b252dac290def09" + ] + ] + ] + } + ], + "timestamp": "2024-01-19T00:15:17.918173096" + } +} \ No newline at end of file diff --git a/workflows/readsimulator/tests/test_wholegenome.nf.test b/workflows/readsimulator/tests/test_wholegenome.nf.test new file mode 100644 index 0000000..1ad3498 --- /dev/null +++ b/workflows/readsimulator/tests/test_wholegenome.nf.test @@ -0,0 +1,37 @@ +nextflow_workflow { + + name "Test workflow: READSIMULATOR" + script "../main.nf" + workflow "READSIMULATOR" + tag "workflows" + tag "readsimulator" + tag "readsimulator_test_wholegenome" + + test("wholegenome = true") { + + when { + workflow { + """ + input[0] = Channel.of( + [ [ id:'first', seed:40] ], + [ [ id:'second', seed:41] ], + [ [ id:'third', seed:42] ], + ) + """ + } + params { + wholegenome = true + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/readsimulator/testdata/GCF_024334085.1_ASM2433408v1_genomic.fna.gz' + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out + ).match() } + ) + } + } +} diff --git a/workflows/readsimulator/tests/test_wholegenome.nf.test.snap b/workflows/readsimulator/tests/test_wholegenome.nf.test.snap new file mode 100644 index 0000000..0d4e785 --- /dev/null +++ b/workflows/readsimulator/tests/test_wholegenome.nf.test.snap @@ -0,0 +1,101 @@ +{ + "wholegenome = true": { + "content": [ + { + "0": [ + [ + { + "id": "first", + "seed": 40, + "outdir": "wgsim", + "datatype": "wholegenome" + }, + [ + "first_R1.fq.gz:md5,90ebdcc4f7783290afe3e6ee02d6d803", + "first_R2.fq.gz:md5,bfb972c268a36edcef29317fd1ced544" + ] + ], + [ + { + "id": "second", + "seed": 41, + "outdir": "wgsim", + "datatype": "wholegenome" + }, + [ + "second_R1.fq.gz:md5,7902aa5c9041fc9e2585ebdd0c83597e", + "second_R2.fq.gz:md5,fa9f52716714e16d354e575c331d4ffc" + ] + ], + [ + { + "id": "third", + "seed": 42, + "outdir": "wgsim", + "datatype": "wholegenome" + }, + [ + "third_R1.fq.gz:md5,a76e6d7ab7a4f4bae18747ae6abb607e", + "third_R2.fq.gz:md5,e42714cc210d161fbb3df55c75e96d61" + ] + ] + ], + "1": [ + [ + { + "id": "wholegenome" + }, + "wholegenome_samplesheet.csv:md5,310b2ee2f03a2b44beb88da1816bc3d0" + ] + ], + "samplesheet": [ + [ + { + "id": "wholegenome" + }, + "wholegenome_samplesheet.csv:md5,310b2ee2f03a2b44beb88da1816bc3d0" + ] + ], + "simulated_reads": [ + [ + { + "id": "first", + "seed": 40, + "outdir": "wgsim", + "datatype": "wholegenome" + }, + [ + "first_R1.fq.gz:md5,90ebdcc4f7783290afe3e6ee02d6d803", + "first_R2.fq.gz:md5,bfb972c268a36edcef29317fd1ced544" + ] + ], + [ + { + "id": "second", + "seed": 41, + "outdir": "wgsim", + "datatype": "wholegenome" + }, + [ + "second_R1.fq.gz:md5,7902aa5c9041fc9e2585ebdd0c83597e", + "second_R2.fq.gz:md5,fa9f52716714e16d354e575c331d4ffc" + ] + ], + [ + { + "id": "third", + "seed": 42, + "outdir": "wgsim", + "datatype": "wholegenome" + }, + [ + "third_R1.fq.gz:md5,a76e6d7ab7a4f4bae18747ae6abb607e", + "third_R2.fq.gz:md5,e42714cc210d161fbb3df55c75e96d61" + ] + ] + ] + } + ], + "timestamp": "2024-01-19T00:04:12.073328968" + } +}
    Process Name \\", + " \\ Software Version
    CUSTOM_DUMPSOFTWAREVERSIONSpython3.11.7
    yaml5.4.1
    TOOL1tool10.11.9
    TOOL2tool21.9
    WorkflowNextflow
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls