diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 4ecfbfe3..b290e090 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -10,15 +10,7 @@ "vscode": { // Set *default* container specific settings.json values on container create. "settings": { - "python.defaultInterpreterPath": "/opt/conda/bin/python", - "python.linting.enabled": true, - "python.linting.pylintEnabled": true, - "python.formatting.autopep8Path": "/opt/conda/bin/autopep8", - "python.formatting.yapfPath": "/opt/conda/bin/yapf", - "python.linting.flake8Path": "/opt/conda/bin/flake8", - "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", - "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", - "python.linting.pylintPath": "/opt/conda/bin/pylint" + "python.defaultInterpreterPath": "/opt/conda/bin/python" }, // Add the IDs of extensions you want installed when the container is created. diff --git a/.editorconfig b/.editorconfig index b6b31907..72dda289 100644 --- a/.editorconfig +++ b/.editorconfig @@ -18,7 +18,16 @@ end_of_line = unset insert_final_newline = unset trim_trailing_whitespace = unset indent_style = unset -indent_size = unset +[/subworkflows/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset [/assets/email*] indent_size = unset + +# ignore python and markdown +[*.{py,md}] +indent_style = unset diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index a5d66311..fbf55836 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -9,9 +9,8 @@ Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) -:::info -If you need help using or modifying nf-core/funcscan then the best place to ask is on the nf-core Slack [#funcscan](https://nfcore.slack.com/channels/funcscan) channel ([join our Slack here](https://nf-co.re/join/slack)). -::: +> [!NOTE] +> If you need help using or modifying nf-core/funcscan then the best place to ask is on the nf-core Slack [#funcscan](https://nfcore.slack.com/channels/funcscan) channel ([join our Slack here](https://nf-co.re/join/slack)). ## Contribution workflow @@ -27,6 +26,12 @@ If you're not used to this workflow with git, you can start with some [docs from ## Tests +You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command: + +```bash +nf-test test --profile debug,test,docker --verbose +``` + When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. @@ -87,7 +92,7 @@ Once there, use `nf-core schema build` to add to `nextflow_schema.json`. Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. -The process resources can be passed on to the tool dynamically within the process with the `${task.cpu}` and `${task.memory}` variables in the `script:` block. +The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. ### Naming schemes diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 2ebc49ce..fa844526 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -19,6 +19,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/func - [ ] If necessary, also make a PR on the nf-core/funcscan _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index b4ab0269..6652683e 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -8,12 +8,12 @@ on: types: [published] workflow_dispatch: jobs: - run-tower: + run-platform: name: Run AWS full tests if: github.repository == 'nf-core/funcscan' runs-on: ubuntu-latest steps: - - name: Launch workflow via tower + - name: Launch workflow via Seqera Platform uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} @@ -28,9 +28,9 @@ jobs: } profiles: test_full - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: - name: Tower debug log file + name: Seqera Platform debug log file path: | - tower_action_*.log - tower_action_*.json + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index cb05b166..b3c01745 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -5,13 +5,13 @@ name: nf-core AWS test on: workflow_dispatch: jobs: - run-tower: + run-platform: name: Run AWS tests if: github.repository == 'nf-core/funcscan' runs-on: ubuntu-latest steps: - # Launch workflow using Tower CLI tool action - - name: Launch workflow via tower + # Launch workflow using Seqera Platform CLI tool action + - name: Launch workflow via Seqera Platform uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} @@ -25,9 +25,9 @@ jobs: } profiles: test - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: - name: Tower debug log file + name: Seqera Platform debug log file path: | - tower_action_*.log - tower_action_*.json + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index 76d358ff..6e4495ad 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -19,7 +19,7 @@ jobs: # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets - name: Post PR comment if: failure() - uses: mshick/add-pr-comment@v1 + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 with: message: | ## This PR is against the `master` branch :x: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2138f415..dd8aa75b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,102 +1,101 @@ -name: nf-core CI # This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors +name: nf-core CI on: push: branches: - - dev + - "dev" pull_request: + branches: + - "dev" + - "master" release: - types: [published] + types: + - "published" env: NXF_ANSI_LOG: false + NFTEST_VER: "0.8.4" concurrency: - group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true jobs: - test: - name: Run pipeline with test data (AMP and ARG workflows) - # Only run on push if this is the nf-core dev branch (merged PRs) - if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/funcscan') }}" + define_nxf_versions: + name: Choose nextflow versions to test against depending on target branch runs-on: ubuntu-latest - strategy: - matrix: - NXF_VER: - - "23.04.0" - - "latest-everything" - parameters: - - "--annotation_tool prodigal" - - "--annotation_tool prokka" - - "--annotation_tool bakta --annotation_bakta_db_downloadtype light" - + outputs: + matrix: ${{ steps.nxf_versions.outputs.matrix }} steps: - - name: Check out pipeline code - uses: actions/checkout@v3 - - - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 - with: - version: "${{ matrix.NXF_VER }}" - - - name: Run pipeline with test data (AMP and ARG workflows) + - id: nxf_versions run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results ${{ matrix.parameters }} + if [[ "${{ github.event_name }}" == "pull_request" && "${{ github.base_ref }}" == "dev" && "${{ matrix.NXF_VER }}" != "latest-everything" ]]; then + echo matrix='["latest-everything"]' | tee -a $GITHUB_OUTPUT + else + echo matrix='["latest-everything", "23.10.0"]' | tee -a $GITHUB_OUTPUT + fi - test_bgc: - name: Run pipeline with test data (BGC workflow) - # Only run on push if this is the nf-core dev branch (merged PRs) - if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/funcscan') }}" + test: + name: nf-test + needs: define_nxf_versions runs-on: ubuntu-latest strategy: + fail-fast: false matrix: - NXF_VER: - - "23.04.0" - - "latest-everything" - parameters: - - "--annotation_tool prodigal" - - "--annotation_tool prokka" - - "--annotation_tool bakta --annotation_bakta_db_downloadtype light" + NXF_VER: ${{ fromJson(needs.define_nxf_versions.outputs.matrix) }} + tags: + - "test" + - "test_nothing" + - "test_bakta" + - "test_prokka" + - "test_bgc_pyrodigal" + - "test_bgc_bakta" + - "test_bgc_prokka" + - "test_taxonomy_pyrodigal" + - "test_taxonomy_bakta" + - "test_taxonomy_prokka" + - "test_preannotated" + - "test_preannotated_bgc" + profile: + - "docker" steps: - name: Check out pipeline code - uses: actions/checkout@v2 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + + - name: Check out test data + uses: actions/checkout@v3 + with: + repository: nf-core/test-datasets + ref: funcscan + path: test-datasets/ + fetch-depth: 1 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 with: version: "${{ matrix.NXF_VER }}" - - name: Run pipeline with test data (BGC workflow) + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - name: Install nf-test run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_bgc,docker --outdir ./results ${{ matrix.parameters }} --bgc_skip_deepbgc + wget -qO- https://code.askimed.com/install/nf-test | bash -s $NFTEST_VER + sudo mv nf-test /usr/local/bin/ - test_deeparg: - name: Run pipeline with test data (DeepARG only workflow) - # Only run on push if this is the nf-core dev branch (merged PRs) - if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/funcscan') }}" - runs-on: ubuntu-latest - strategy: - matrix: - NXF_VER: - - "23.04.0" - - "latest-everything" - parameters: - - "--annotation_tool bakta --annotation_bakta_db_downloadtype light" - - "--annotation_tool pyrodigal" + - name: Run nf-test + run: | + nf-test test --tag ${{ matrix.tags }} --profile ${{ matrix.tags }},${{ matrix.profile }} --junitxml=test.xml - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 + - name: Output log on failure + if: failure() + run: | + sudo apt install bat > /dev/null + batcat --decorations=always --color=always ${{ github.workspace }}/.nf-test/*/tests/output/pipeline_info/software_versions.yml - - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + - name: Publish Test Report + uses: mikepenz/action-junit-report@v3 + if: always() # always run even if the previous step fails with: - version: "${{ matrix.NXF_VER }}" - - - name: Run pipeline with test data (DeepARG workflow) - run: | - wget https://zenodo.org/record/8280582/files/deeparg.zip ## download from zenodo due to instability of deepARG server - unzip deeparg.zip - nextflow run ${GITHUB_WORKSPACE} -profile test_deeparg,docker --outdir ./results ${{ matrix.parameters }} --arg_deeparg_data 'deeparg/' + report_paths: "*.xml" diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml index 694e90ec..0b6b1f27 100644 --- a/.github/workflows/clean-up.yml +++ b/.github/workflows/clean-up.yml @@ -10,7 +10,7 @@ jobs: issues: write pull-requests: write steps: - - uses: actions/stale@v7 + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 with: stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml new file mode 100644 index 00000000..2d20d644 --- /dev/null +++ b/.github/workflows/download_pipeline.yml @@ -0,0 +1,86 @@ +name: Test successful pipeline download with 'nf-core download' + +# Run the workflow when: +# - dispatched manually +# - when a PR is opened or reopened to master branch +# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. +on: + workflow_dispatch: + inputs: + testbranch: + description: "The specific branch you wish to utilize for the test execution of nf-core download." + required: true + default: "dev" + pull_request: + types: + - opened + - edited + - synchronize + branches: + - master + pull_request_target: + branches: + - master + +env: + NXF_ANSI_LOG: false + +jobs: + download: + runs-on: ubuntu-latest + steps: + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + architecture: "x64" + - uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7 + with: + singularity-version: 3.8.3 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install git+https://github.com/nf-core/tools.git@dev + + - name: Get the repository name and current branch set as environment variable + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} + echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} + echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} + + - name: Download the pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + run: | + nf-core download ${{ env.REPO_LOWERCASE }} \ + --revision ${{ env.REPO_BRANCH }} \ + --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ + --compress "none" \ + --container-system 'singularity' \ + --container-library "quay.io" -l "docker.io" -l "ghcr.io" \ + --container-cache-utilisation 'amend' \ + --download-configuration + + - name: Inspect download + run: tree ./${{ env.REPOTITLE_LOWERCASE }} + + - name: Run the downloaded pipeline (stub) + id: stub_run_pipeline + continue-on-error: true + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results + - name: Run the downloaded pipeline (stub run not supported) + id: run_pipeline + if: ${{ job.steps.stub_run_pipeline.status == failure() }} + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -profile test,singularity --outdir ./results diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index e280f7c4..f0aa68f2 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -4,7 +4,7 @@ on: types: [created] jobs: - deploy: + fix-linting: # Only run if comment is on a PR with the main repo, and if it contains the magic keywords if: > contains(github.event.comment.html_url, '/pull/') && @@ -13,10 +13,17 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@v3 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 with: token: ${{ secrets.nf_core_bot_auth_token }} + # indication that the linting is being fixed + - name: React on comment + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: eyes + # Action runs on the issue comment, so we don't get the PR by default # Use the gh cli to check out the PR - name: Checkout Pull Request @@ -24,32 +31,59 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - uses: actions/setup-node@v3 + # Install and run pre-commit + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" - - name: Install Prettier - run: npm install -g prettier @prettier/plugin-php + - name: Install pre-commit + run: pip install pre-commit - # Check that we actually need to fix something - - name: Run 'prettier --check' - id: prettier_status - run: | - if prettier --check ${GITHUB_WORKSPACE}; then - echo "result=pass" >> $GITHUB_OUTPUT - else - echo "result=fail" >> $GITHUB_OUTPUT - fi + - name: Run pre-commit + id: pre-commit + run: pre-commit run --all-files + continue-on-error: true - - name: Run 'prettier --write' - if: steps.prettier_status.outputs.result == 'fail' - run: prettier --write ${GITHUB_WORKSPACE} + # indication that the linting has finished + - name: react if linting finished succesfully + if: steps.pre-commit.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: "+1" - name: Commit & push changes - if: steps.prettier_status.outputs.result == 'fail' + id: commit-and-push + if: steps.pre-commit.outcome == 'failure' run: | git config user.email "core@nf-co.re" git config user.name "nf-core-bot" git config push.default upstream git add . git status - git commit -m "[automated] Fix linting with Prettier" + git commit -m "[automated] Fix code linting" git push + + - name: react if linting errors were fixed + id: react-if-fixed + if: steps.commit-and-push.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: hooray + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: confused + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + issue-number: ${{ github.event.issue.number }} + body: | + @${{ github.actor }} I tried to fix the linting errors, but it didn't work. Please fix them manually. + See [CI log](https://github.com/nf-core/funcscan/actions/runs/${{ github.run_id }}) for more details. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index b8bdd214..1fcafe88 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -11,74 +11,34 @@ on: types: [published] jobs: - EditorConfig: + pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - - uses: actions/setup-node@v3 - - - name: Install editorconfig-checker - run: npm install -g editorconfig-checker - - - name: Run ECLint check - run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') - - Prettier: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - uses: actions/setup-node@v3 - - - name: Install Prettier - run: npm install -g prettier - - - name: Run Prettier --check - run: prettier --check ${GITHUB_WORKSPACE} - - PythonBlack: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - name: Check code lints with Black - uses: psf/black@stable - - # If the above check failed, post a comment on the PR explaining the failure - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@v1 + - name: Set up Python 3.12 + uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - message: | - ## Python linting (`black`) is failing - - To keep the code consistent with lots of contributors, we run automated code consistency checks. - To fix this CI test, please run: - - * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` - * Fix formatting errors in your pipeline: `black .` - - Once you push these changes the test should pass, and you can hide this comment :+1: + python-version: "3.12" - We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + - name: Install pre-commit + run: pip install pre-commit - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false + - name: Run pre-commit + run: pre-commit run --all-files nf-core: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: "3.11" + python-version: "3.12" architecture: "x64" - name: Install dependencies @@ -99,7 +59,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 0bbcd30f..40acc23f 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@v2 + uses: dawidd6/action-download-artifact@09f2f74827fd3a8607589e5ad7f9398816f540fe # v3 with: workflow: linting.yml workflow_conclusion: completed @@ -21,7 +21,7 @@ jobs: run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@v2 + uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/.github/workflows/release-announcments.yml b/.github/workflows/release-announcements.yml similarity index 80% rename from .github/workflows/release-announcments.yml rename to .github/workflows/release-announcements.yml index 6ad33927..03ecfcf7 100644 --- a/.github/workflows/release-announcments.yml +++ b/.github/workflows/release-announcements.yml @@ -9,6 +9,11 @@ jobs: toot: runs-on: ubuntu-latest steps: + - name: get topics and convert to hashtags + id: get_topics + run: | + echo "topics=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ')" >> $GITHUB_OUTPUT + - uses: rzr/fediverse-action@master with: access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} @@ -20,11 +25,13 @@ jobs: Please see the changelog: ${{ github.event.release.html_url }} + ${{ steps.get_topics.outputs.topics }} #nfcore #openscience #nextflow #bioinformatics + send-tweet: runs-on: ubuntu-latest steps: - - uses: actions/setup-python@v4 + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: python-version: "3.10" - name: Install dependencies @@ -56,7 +63,7 @@ jobs: bsky-post: runs-on: ubuntu-latest steps: - - uses: zentered/bluesky-post-action@v0.0.2 + - uses: zentered/bluesky-post-action@80dbe0a7697de18c15ad22f4619919ceb5ccf597 # v0.1.0 with: post: | Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! diff --git a/.gitignore b/.gitignore index ced46a87..2eef655b 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,4 @@ results/ testing/ testing* *.pyc -tests/ +.nf-test* diff --git a/.gitpod.yml b/.gitpod.yml index 25488dcc..105a1821 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -4,16 +4,17 @@ tasks: command: | pre-commit install --install-hooks nextflow self-update + - name: unset JAVA_TOOL_OPTIONS + command: | + unset JAVA_TOOL_OPTIONS vscode: extensions: # based on nf-core.nf-core-extensionpack - - codezombiech.gitignore # Language support for .gitignore files - # - cssho.vscode-svgviewer # SVG viewer - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code - - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar - mechatroner.rainbow-csv # Highlight columns in csv files in different colors - # - nextflow.nextflow # Nextflow syntax highlighting + # - nextflow.nextflow # Nextflow syntax highlighting - oderwat.indent-rainbow # Highlight indentation level - streetsidesoftware.code-spell-checker # Spelling checker for source code + - charliermarsh.ruff # Code linter Ruff diff --git a/.nf-core.yml b/.nf-core.yml index 3805dc81..318ad93d 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1 +1,4 @@ repository_type: pipeline +lint: + actions_ci: False ## TODO: re-activate once nf-test ci.yml structure updated +nf_core_version: "2.14.1" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0c31cdb9..4dc0f1dc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,13 @@ repos: - repo: https://github.com/pre-commit/mirrors-prettier - rev: "v2.7.1" + rev: "v3.1.0" hooks: - id: prettier + additional_dependencies: + - prettier@3.2.5 + + - repo: https://github.com/editorconfig-checker/editorconfig-checker.python + rev: "2.7.3" + hooks: + - id: editorconfig-checker + alias: ec diff --git a/CHANGELOG.md b/CHANGELOG.md index 5eff7c3a..ba188b70 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,94 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v2.0.0 - [2024-08] + +### `Breaking change` + +- [#391](https://github.com/nf-core/funcscan/pull/391) Made all "database" parameter names consistent, skip hmmsearch by default. (by @jasmezz) + +| Old parameter | New parameter | +| ------------------------------------------------ | --------------------------------------- | +| `annotation_bakta_db_localpath` | `annotation_bakta_db` | +| `arg_abricate_db` | `arg_abricate_db_id` | +| `arg_abricate_localdbdir` | `arg_abricate_db` | +| `arg_deeparg_data` | `arg_deeparg_db` | +| `arg_deeparg_data_version` | `arg_deeparg_db_version` | +| `arg_rgi_database` | `arg_rgi_db` | +| `bgc_antismash_databases` | `bgc_antismash_db` | +| `bgc_antismash_installationdirectory` | `bgc_antismash_installdir` | +| `bgc_deepbgc_database` | `bgc_deepbgc_db` | +| `save_databases` | `save_db` | +| `taxa_classification_mmseqs_databases_localpath` | `taxa_classification_mmseqs_db` | +| `taxa_classification_mmseqs_databases_id` | `taxa_classification_mmseqs_db_id` | +| `taxa_classification_mmseqs_databases_savetmp` | `taxa_classification_mmseqs_db_savetmp` | +| `amp_skip_hmmsearch` | `amp_run_hmmsearch` | +| `bgc_skip_hmmsearch` | `bgc_run_hmmsearch` | + +- [#343](https://github.com/nf-core/funcscan/pull/343) Standardized the resulting workflow summary tables to always start with 'sample_id\tcontig_id\t..'. Reformatted the output of `hamronization/summarize` module. (by @darcy220606) +- [#411](https://github.com/nf-core/funcscan/pull/411) Optimised hAMRonization input: only high-quality hits from fARGene output are reported. (by @jasmezz, @jfy133) + +### `Added` + +- [#322](https://github.com/nf-core/funcscan/pull/322) Updated all modules: introduce environment.yml files. (by @jasmezz) +- [#324](https://github.com/nf-core/funcscan/pull/324) Removed separate DeepARG test profile because database download is now stable. (by @jasmezz) +- [#332](https://github.com/nf-core/funcscan/pull/332) & [#327](https://github.com/nf-core/funcscan/pull/327) Merged pipeline template of nf-core/tools version 2.12.1 (by @jfy133, @jasmezz) +- [#338](https://github.com/nf-core/funcscan/pull/338) Set `--meta` parameter to default for Bakta, with singlemode optional. (by @jasmezz) +- [#343](https://github.com/nf-core/funcscan/pull/343) Added contig taxonomic classification using [MMseqs2](https://github.com/soedinglab/MMseqs2/). (by @darcy220606) +- [#358](https://github.com/nf-core/funcscan/pull/358) Improved RGI databases handling, users can supply their own CARD now. (by @jasmezz) +- [#375](https://github.com/nf-core/funcscan/pull/375) Merged pipeline template of nf-core/tools version 2.14.1. (by @jfy133) +- [#381](https://github.com/nf-core/funcscan/pull/381) Added support for supplying pre-annotated sequences to the pipeline. (by @jfy133, @jasmezz) +- [#382](https://github.com/nf-core/funcscan/pull/382) Optimised BGC screening run time and prevent crashes due to too-short contigs by adding contig length filtering for BGC workflow only. (by @jfy133, @darcy220606) +- [#366](https://github.com/nf-core/funcscan/pull/366) Added nf-test on pipeline level. (by @jfy133, @Darcy220606, @jasmezz) +- [#403](https://github.com/nf-core/funcscan/pull/403) Added antiSMASH parameters `--pfam2go`, `--rre`, and `--tfbs`. (reported by @Darcy220606, added by @jasmezz) +- [#405](https://github.com/nf-core/funcscan/pull/405) Added argNorm to ARG subworkflow. (by @Vedanth-Ramji) + +### `Fixed` + +- [#348](https://github.com/nf-core/funcscan/pull/348) Updated samplesheet for pipeline tests to 'samplesheet_reduced.csv' with smaller datasets to reduce resource consumption. Updated prodigal module to fix pigz issue. Removed `tests/` from `.gitignore`. (by @darcy220606) +- [#362](https://github.com/nf-core/funcscan/pull/362) Save annotations from bakta in subdirectories per sample. (by @jasmezz) +- [#363](https://github.com/nf-core/funcscan/pull/363) Removed warning from DeepBGC usage docs. (by @jasmezz) +- [#365](https://github.com/nf-core/funcscan/pull/365) Fixed AMRFinderPlus module and usage docs for manual database download. (by @jasmezz) +- [#371](https://github.com/nf-core/funcscan/pull/371) Fixed AMRFinderPlus parameter `arg_amrfinderplus_name`. (by @m3hdad) +- [#377](https://github.com/nf-core/funcscan/pull/377) Fixed an occasional RGI process failure when certain files not produced. (❤️ to @amizeranschi for reporting, fix by @amizeranschi & @jfy133) +- [#386](https://github.com/nf-core/funcscan/pull/386) Updated DeepBGC module to fix output file names, separate annotation step for all BGC tools, add warning if no BGCs found, fix MultiQC reporting of annotation workflow. (by @jfy133, @jasmezz) +- [#393](https://github.com/nf-core/funcscan/pull/393) & [#397](https://github.com/nf-core/funcscan/pull/397) Fixed a docker/singularity only error appearing when running with conda. (❤️ to @ewissel for reporting, fix by @jfy33 & @jasmezz) +- [#391](https://github.com/nf-core/funcscan/pull/391) Skip hmmmsearch by default to not crash pipeline if user provides no HMM files, updated docs. (by @jasmezz) +- [#397](https://github.com/nf-core/funcscan/pull/397) Removed deprecated AMPcombi module, fixed variable name in BGC workflow, updated minor parts in docs (usage, parameter schema). (by @jasmezz) +- [#402](https://github.com/nf-core/funcscan/pull/402) Fixed BGC length calculation for antiSMASH hits by comBGC. (by @jasmezz) +- [#406](https://github.com/nf-core/funcscan/pull/406) Fixed prediction tools not being executed if annotation workflow skipped. (by @jasmezz) +- [#407](https://github.com/nf-core/funcscan/pull/407) Fixed comBGC bug when parsing multiple antiSMASH files. (by @jasmezz) +- [#409](https://github.com/nf-core/funcscan/pull/409) Fixed argNorm overwriting its output for DeepARG. (by @jasmezz, @jfy133) +- [#412](https://github.com/nf-core/funcscan/pull/412) Improve all pre-run database download documentation. (by @jfy133) + +### `Dependencies` + +| Tool | Previous version | New version | +| ------------- | ---------------- | ----------- | +| AMPcombi | 0.1.7 | 0.2.2 | +| AMPlify | 1.1.0 | 2.0.0 | +| AMRFinderPlus | 3.11.18 | 3.12.8 | +| antiSMASH | 6.1.1 | 7.1.0 | +| argNorm | NA | 0.5.0 | +| bioawk | 1.0 | NA | +| comBGC | 1.6.1 | 1.6.2 | +| DeepARG | 1.0.2 | 1.0.4 | +| DeepBGC | 0.1.30 | 0.1.31 | +| GECCO | 0.9.8 | 0.9.10 | +| hAMRonization | 1.1.1 | 1.1.4 | +| HMMER | 3.3.2 | 3.4 | +| MMSeqs | NA | 2:15.6f452 | +| MultiQC | 1.15 | 1.24 | +| Pyrodigal | 2.1.0 | 3.3.0 | +| RGI | 5.2.1 | 6.0.3 | +| seqkit | NA | 2.8.1 | +| tabix/htslib | 1.11 | 1.20 | + +### `Deprecated` + +- [#384](https://github.com/nf-core/funcscan/pull/384) Deprecated AMPcombi and exchanged it with full suite of AMPcombi2 submodules. (by @darcy220606) +- [#382](https://github.com/nf-core/funcscan/pull/382) Optimised BGC screening run time and prevent crashes due to too-short contigs by adding contig length filtering for BGC workflow only. Bioawk is replaced with seqkit. (by @jfy133, @darcy220606) + ## v1.1.6 - [2024-07-08] ### `Added` @@ -37,7 +125,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` - [#306](https://github.com/nf-core/funcscan/pull/306) Added new parameter `annotation_prokka_retaincontigheaders` to allow prokka to retain the original contig headers/locus tag. (by @darcy220606) -- [#307](https://github.com/nf-core/funcscan/pull/307) Fixed stability of deepARG tests by using Zenodo copy of database (❤️ to Gustavo Arango and Liqing Zhang for uploading, fix by @jfy133) +- [#307](https://github.com/nf-core/funcscan/pull/307) Fixed stability of deepARG tests by using Zenodo copy of database. (❤️ to Gustavo Arango and Liqing Zhang for uploading, fix by @jfy133) - [#310](https://github.com/nf-core/funcscan/pull/310) Fixed error when supplying uncompressed input; added "fas" file extension for FASTA files. (by @tavareshugo) - [#311](https://github.com/nf-core/funcscan/pull/311) Merged pipeline template of nf-core/tools version 2.10. (by @jasmezz) diff --git a/CITATIONS.md b/CITATIONS.md index 84ada6d1..80493194 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -30,17 +30,17 @@ > Blin, K., Shaw, S., Kloosterman, A. M., Charlop-Powers, Z., van Wezel, G. P., Medema, M. H., & Weber, T. (2021). antiSMASH 6.0: improving cluster detection and comparison capabilities. Nucleic acids research, 49(W1), W29–W35. [DOI: 10.1093/nar/gkab335](https://doi.org/10.1093/nar/gkab335) -- [Bakta](https://doi.org/10.1099/mgen.0.000685) +- [argNorm](https://github.com/BigDataBiology/argNorm) - > Schwengers, O., Jelonek, L., Dieckmann, M. A., Beyvers, S., Blom, J., & Goesmann, A. (2021). Bakta: rapid and standardized annotation of bacterial genomes via alignment-free sequence identification. Microbial Genomics, 7(11). [DOI: 10.1099/mgen.0.000685](https://doi.org/10.1099/mgen.0.000685) + > Perovic, S. U., Ramji, V., Chong, H., Duan, Y., Maguire, F., Coelho, L. P. (2024). BigDataBiology/argNorm. [DOI: 10.5281/zenodo.10963591](https://zenodo.org/doi/10.5281/zenodo.10963591) -- [bioawk](https://github.com/lh3/bioawk) +- [Bakta](https://doi.org/10.1099/mgen.0.000685) - > Li, H. (2023). bioawk: BWK awk modified for biological data. Github. Retrieved July 12, 2023, from https://github.com/lh3/bioawk + > Schwengers, O., Jelonek, L., Dieckmann, M. A., Beyvers, S., Blom, J., & Goesmann, A. (2021). Bakta: rapid and standardized annotation of bacterial genomes via alignment-free sequence identification. Microbial Genomics, 7(11). [DOI: 10.1099/mgen.0.000685](https://doi.org/10.1099/mgen.0.000685) - [comBGC](https://github.com/nf-core/funcscan) - > Frangenberg, J., Fellows Yates, J. A., Ibrahim, A., Perelo, L., & Beber, M. E. (2023). nf-core/funcscan: 1.0.0 - German Rollmops - 2023-02-15. https://doi.org/10.5281/zenodo.7643100 + > Frangenberg, J., Fellows Yates, J. A., Ibrahim, A., Perelo, L., & Beber, M. E. (2023). nf-core/funcscan: 1.0.0 - German Rollmops - 2023-02-15. [DOI: 10.5281/zenodo.7643100](https://doi.org/10.5281/zenodo.7643099) - [DeepARG](https://doi.org/10.1186/s40168-018-0401-z) @@ -56,7 +56,7 @@ - [GECCO](https://gecco.embl.de) - > Carroll, L. M. , Larralde, M., Fleck, J. S., Ponnudurai, R., Milanese, A., Cappio Barazzone, E. & Zeller, G. (2021). Accurate de novo identification of biosynthetic gene clusters with GECCO. bioRxiv. [DOI: 10.1101/2021.05.03.442509](https://doi.org/10.1101/2021.05.03.442509) + > Carroll, L. M., Larralde, M., Fleck, J. S., Ponnudurai, R., Milanese, A., Cappio Barazzone, E. & Zeller, G. (2021). Accurate de novo identification of biosynthetic gene clusters with GECCO. bioRxiv. [DOI: 10.1101/2021.05.03.442509](https://doi.org/10.1101/2021.05.03.442509) - [AMPcombi](https://github.com/Darcy220606/AMPcombi) @@ -64,7 +64,7 @@ - [hAMRonization](https://github.com/pha4ge/hAMRonization) - > Maguire, F., Fornika, D., Mendes, I., Phelan, J., Underwood, A., Witney, A., pvanheus, Manuele, A., Lee, T., amos, & imendes. (2023). pha4ge/hAMRonization: Zenodo Release. Zenodo. https://doi.org/10.5281/ZENODO.8131134 + > Mendes, I., Griffiths, E., Manuele, A., Fornika, D., Tausch, S. H., Le-Viet, T., Phelan, J., Meehan, C. J., Raphenya, A. R., Alcock, B., Culp, E., Lorenzo, F., Haim, M. S., Witney, A., Black, A., Katz, L., Oluniyi, P., Olawoye, I., Timme, R., Neoh, H., Lam, S. D., Jamaluddin, T. Z. M. T., Nathan, S., Ang, M. Y., Di Gregorio, S., Vandelannoote, K., Dusadeepong, R, Chindelevitch, L., Nasar, M. I., Aanensen, D., Afolayan, A. O., Odih, E. E., McArthur, A. G., Feldgarden, M., Galas, M. M., Campos, J., Okeke, I. N., Underwood, A., Page, A. J., MacCannell, D., Maguire, F. (2023). hAMRonization: Enhancing antimicrobial resistance prediction using the PHA4GE AMR detection specification and tooling. bioRxiv. [DOI: 10.1101/2024.03.07.583950](https://doi.org/10.1101/2024.03.07.583950) - [HMMER](https://doi.org/10.1371/journal.pcbi.1002195.) @@ -74,6 +74,10 @@ > Santos-Júnior, C. D., Pan, S., Zhao, X. M., & Coelho, L. P. (2020). Macrel: antimicrobial peptide screening in genomes and metagenomes. PeerJ, 8, e10555. [DOI: 10.7717/peerj.10555](https://doi.org/10.7717/peerj.10555) +- [MMseqs2](https://doi.org/10.1093/bioinformatics/btab184) + + > Mirdita, M., Steinegger, M., Breitwieser, F., Söding, J., Levy Karin, E. (2021). Fast and sensitive taxonomic assignment to metagenomic contigs. Bioinformatics, 37(18),3029–3031. [DOI: 10.1093/bioinformatics/btab184](https://doi.org/10.1093/bioinformatics/btab184) + - [Prodigal](https://doi.org/10.1186/1471-2105-11-119) > Hyatt, D., Chen, G. L., Locascio, P. F., Land, M. L., Larimer, F. W., & Hauser, L. J. (2010). Prodigal: prokaryotic gene recognition and translation initiation site identification. BMC bioinformatics, 11, 119. [DOI: 10.1186/1471-2105-11-119](https://doi.org/10.1186/1471-2105-11-119) @@ -88,7 +92,11 @@ - [RGI](https://doi.org/10.1093/nar/gkz935) - > Alcock, B. P., Raphenya, A. R., Lau, T., Tsang, K. K., Bouchard, M., Edalatmand, A., Huynh, W., Nguyen, A. V., Cheng, A. A., Liu, S., Min, S. Y., Miroshnichenko, A., Tran, H. K., Werfalli, R. E., Nasir, J. A., Oloni, M., Speicher, D. J., Florescu, A., Singh, B., Faltyn, M., … McArthur, A. G. (2020). CARD 2020: antibiotic resistome surveillance with the comprehensive antibiotic resistance database. Nucleic acids research, 48(D1), D517–D525. [DOI: 10.1093/nar/gkz935](https://doi.org/10.1093/nar/gkz935) + > Alcock, B. P., Huynh, W., Chalil, R., Smith, K. W., Raphenya, A. R., Wlodarski, M. A., Edalatmand, A., Petkau, A., Syed, S. A., Tsang, K. K., Baker, S. J. C., Dave, M., McCarthy, M. C., Mukiri, K. M., Nasir, J. A., Golbon, B., Imtiaz, H., Jiang, X., Kaur, K., Kwong, M., Liang, Z. C., Niu, K. C., Shan, P., Yang, J. Y. J., Gray, K. L., Hoad, G. R., Jia, B., Bhando, T., Carfrae, L. A., Farha, M. A., French, S., Gordzevich, R., Rachwalski, K., Tu, M. M., Bordeleau, E., Dooley, D., Griffiths, E., Zubyk, H. L., Brown, E. D., Maguire, F., Beiko, R. G., Hsiao, W. W. L., Brinkman F. S. L., Van Domselaar, G., McArthur, A. G. (2023). CARD 2023: expanded curation, support for machine learning, and resistome prediction at the Comprehensive Antibiotic Resistance Database. Nucleic acids research, 51(D1):D690-D699. [DOI: 10.1093/nar/gkac920](https://doi.org/10.1093/nar/gkac920) + +- [SeqKit](https://bioinf.shenwei.me/seqkit/) + + > Shen, W., Sipos, B., & Zhao, L. (2024). SeqKit2: A Swiss army knife for sequence and alignment processing. iMeta, e191. [https://doi.org/10.1002/imt2.191](https://doi.org/10.1002/imt2.191) ## Software packaging/containerisation tools diff --git a/README.md b/README.md index 0a420be0..d5c9fcda 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,20 @@ -# ![nf-core/funscan](docs/images/nf-core-funcscan_logo_flat_light.png#gh-light-mode-only) ![nf-core/funscan](docs/images/nf-core-funcscan_logo_flat_dark.png#gh-dark-mode-only) +

+ + + nf-core/funcscan + +

-[![GitHub Actions CI Status](https://github.com/nf-core/funcscan/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/funcscan/actions?query=workflow%3A%22nf-core+CI%22) -[![GitHub Actions Linting Status](https://github.com/nf-core/funcscan/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/funcscan/actions?query=workflow%3A%22nf-core+linting%22)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/funcscan/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7643099-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7643099) +[![GitHub Actions CI Status](https://github.com/nf-core/funcscan/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/funcscan/actions/workflows/ci.yml) +[![GitHub Actions Linting Status](https://github.com/nf-core/funcscan/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/funcscan/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/funcscan/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7643099-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7643099) +[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) -[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/funcscan) +[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/funcscan) [![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23funcscan-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/funcscan)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) @@ -24,22 +30,21 @@ The nf-core/funcscan AWS full test dataset are contigs generated by the MGnify s ## Pipeline summary -1. Annotation of assembled prokaryotic contigs with [`Prodigal`](https://github.com/hyattpd/Prodigal), [`Pyrodigal`](https://github.com/althonos/pyrodigal), [`Prokka`](https://github.com/tseemann/prokka), or [`Bakta`](https://github.com/oschwengers/bakta) -2. Screening contigs for antimicrobial peptide-like sequences with [`ampir`](https://cran.r-project.org/web/packages/ampir/index.html), [`Macrel`](https://github.com/BigDataBiology/macrel), [`HMMER`](http://hmmer.org/), [`AMPlify`](https://github.com/bcgsc/AMPlify) -3. Screening contigs for antibiotic resistant gene-like sequences with [`ABRicate`](https://github.com/tseemann/abricate), [`AMRFinderPlus`](https://github.com/ncbi/amr), [`fARGene`](https://github.com/fannyhb/fargene), [`RGI`](https://card.mcmaster.ca/analyze/rgi), [`DeepARG`](https://bench.cs.vt.edu/deeparg) -4. Screening contigs for biosynthetic gene cluster-like sequences with [`antiSMASH`](https://antismash.secondarymetabolites.org), [`DeepBGC`](https://github.com/Merck/deepbgc), [`GECCO`](https://gecco.embl.de/), [`HMMER`](http://hmmer.org/) -5. Creating aggregated reports for all samples across the workflows with [`AMPcombi`](https://github.com/Darcy220606/AMPcombi) for AMPs, [`hAMRonization`](https://github.com/pha4ge/hAMRonization) for ARGs, and [`comBGC`](https://raw.githubusercontent.com/nf-core/funcscan/master/bin/comBGC.py) for BGCs -6. Software version and methods text reporting with [`MultiQC`](http://multiqc.info/) +1. Quality control of input sequences with [`SeqKit`](https://bioinf.shenwei.me/seqkit/) +2. Taxonomic classification of contigs of **prokaryotic origin** with [`MMseqs2`](https://github.com/soedinglab/MMseqs2) +3. Annotation of assembled prokaryotic contigs with [`Prodigal`](https://github.com/hyattpd/Prodigal), [`Pyrodigal`](https://github.com/althonos/pyrodigal), [`Prokka`](https://github.com/tseemann/prokka), or [`Bakta`](https://github.com/oschwengers/bakta) +4. Screening contigs for antimicrobial peptide-like sequences with [`ampir`](https://cran.r-project.org/web/packages/ampir/index.html), [`Macrel`](https://github.com/BigDataBiology/macrel), [`HMMER`](http://hmmer.org/), [`AMPlify`](https://github.com/bcgsc/AMPlify) +5. Screening contigs for antibiotic resistant gene-like sequences with [`ABRicate`](https://github.com/tseemann/abricate), [`AMRFinderPlus`](https://github.com/ncbi/amr), [`fARGene`](https://github.com/fannyhb/fargene), [`RGI`](https://card.mcmaster.ca/analyze/rgi), [`DeepARG`](https://bench.cs.vt.edu/deeparg). [`argNorm`](https://github.com/BigDataBiology/argNorm) is used to map the outputs of `DeepARG`, `AMRFinderPlus`, and `ABRicate` to the [`Antibiotic Resistance Ontology`](https://www.ebi.ac.uk/ols4/ontologies/aro) for consistent ARG classification terms. +6. Screening contigs for biosynthetic gene cluster-like sequences with [`antiSMASH`](https://antismash.secondarymetabolites.org), [`DeepBGC`](https://github.com/Merck/deepbgc), [`GECCO`](https://gecco.embl.de/), [`HMMER`](http://hmmer.org/) +7. Creating aggregated reports for all samples across the workflows with [`AMPcombi`](https://github.com/Darcy220606/AMPcombi) for AMPs, [`hAMRonization`](https://github.com/pha4ge/hAMRonization) for ARGs, and [`comBGC`](https://raw.githubusercontent.com/nf-core/funcscan/master/bin/comBGC.py) for BGCs +8. Software version and methods text reporting with [`MultiQC`](http://multiqc.info/) ![funcscan metro workflow](docs/images/funcscan_metro_workflow.png) ## Usage -:::note -If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how -to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) -with `-profile test` before running the workflow on actual data. -::: +> [!NOTE] +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. First, prepare a samplesheet with your input data that looks as follows: @@ -66,11 +71,9 @@ nextflow run nf-core/funcscan \ --run_bgc_screening ``` -:::warning -Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those -provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; -see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). -::: +> [!WARNING] +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; +> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/funcscan/usage) and the [parameter documentation](https://nf-co.re/funcscan/parameters). @@ -86,7 +89,7 @@ nf-core/funcscan was originally written by Jasmin Frangenberg, Anan Ibrahim, Lou We thank the following people for their extensive assistance in the development of this pipeline: -Rosa Herbst, Martin Klapper. +Adam Talbot, Alexandru Mizeranschi, Hugo Tavares, Júlia Mir Pedrol, Martin Klapper, Mehrdad Jaberi, Robert Syme, Rosa Herbst, Vedanth Ramji, @Microbion. ## Contributions and Support diff --git a/assets/email_template.html b/assets/email_template.html index 8885af5d..062c157e 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -12,7 +12,7 @@ -

nf-core/funcscan v${version}

+

nf-core/funcscan ${version}

Run Name: $runName

<% if (!success){ diff --git a/assets/email_template.txt b/assets/email_template.txt index 4b65c9d4..5c9bd9d5 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -4,7 +4,7 @@ |\\ | |__ __ / ` / \\ |__) |__ } { | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, `._,._,' - nf-core/funcscan v${version} + nf-core/funcscan ${version} ---------------------------------------------------- Run Name: $runName diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index 2cc2aa50..f51e757f 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -3,8 +3,7 @@ description: "Suggested text and references to use when describing pipeline usag section_name: "nf-core/funcscan Methods Description" section_href: "https://github.com/nf-core/funcscan" plot_type: "html" -## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline -## You inject any metadata in the Nextflow '${workflow}' object +## Inject metadata in the Nextflow '${workflow}' object data: |

Methods

Data was processed using nf-core/funcscan v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 042e6c78..5471c44d 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/funcscan + This report has been generated by the nf-core/funcscan analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-funcscan-methods-description": order: -1000 @@ -10,8 +10,18 @@ report_section_order: "nf-core-funcscan-summary": order: -1002 +run_modules: + - prokka + - custom_content + +table_columns_visible: + Prokka: + organism: False + export_plots: true +disable_version_detection: true + custom_logo: "nf-core-funcscan_logo_flat_light.png" custom_logo_url: https://nf-co.re/funcscan custom_logo_title: "nf-core/funcscan" diff --git a/assets/nf-core-funcscan_logo_light.png b/assets/nf-core-funcscan_logo_light.png index f00d53e8..f63ae1f5 100644 Binary files a/assets/nf-core-funcscan_logo_light.png and b/assets/nf-core-funcscan_logo_light.png differ diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 22583f22..791912cd 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,4 @@ -sample,fasta -sample_1,https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/wastewater_metagenome_contigs_1.fasta.gz -sample_2,https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/wastewater_metagenome_contigs_2.fasta.gz +sample,fasta,protein,gbk +sample_1,https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/wastewater_metagenome_contigs_1.fasta.gz,https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/wastewater_metagenome_contigs_prokka_1.faa,https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/wastewater_metagenome_contigs_prokka_1.gbk +sample_2,https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/wastewater_metagenome_contigs_2.fasta.gz,https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/wastewater_metagenome_contigs_prokka_2.faa.gz,https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/wastewater_metagenome_contigs_prokka_2.gbk.gz +sample_3,https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/wastewater_metagenome_contigs.fasta diff --git a/assets/schema_input.json b/assets/schema_input.json index 757969c2..62b4ece9 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -18,9 +18,27 @@ "type": "string", "format": "file-path", "exists": true, - "pattern": "^\\S+\\.(fasta|fas|fa|fna)(\\.gz)?$", - "errorMessage": "Fasta file for reads must be provided, cannot contain spaces and must have extension '.fasta', '.fas', '.fa' or '.fna' (any of these can be optionally compressed as '.gz')", + "pattern": "^\\S+\\.(fasta|fas|fna|fa)(\\.gz)?$", + "errorMessage": "Fasta file for reads must be provided, cannot contain spaces and must have extension `.fa.gz`, `.fna.gz` or `.fasta.gz`", "unique": true + }, + "protein": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.(faa|fasta)(\\.gz)?$", + "errorMessage": "Input file for peptide annotations has incorrect file format. File must end in `.fasta` or `.faa`", + "unique": true, + "dependentRequired": ["gbk"] + }, + "gbk": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.(gbk|gbff)(\\.gz)?$", + "errorMessage": "Input file for feature annotations has incorrect file format. File must end in `.gbk.gz` or `.gbff.gz`", + "unique": true, + "dependentRequired": ["protein"] } }, "required": ["sample", "fasta"] diff --git a/assets/slackreport.json b/assets/slackreport.json index 501716d6..b2459e0b 100644 --- a/assets/slackreport.json +++ b/assets/slackreport.json @@ -3,7 +3,7 @@ { "fallback": "Plain-text summary of the attachment.", "color": "<% if (success) { %>good<% } else { %>danger<%} %>", - "author_name": "nf-core/funcscan v${version} - ${runName}", + "author_name": "nf-core/funcscan ${version} - ${runName}", "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", "fields": [ diff --git a/bin/comBGC.py b/bin/comBGC.py index 81f1eb3b..dccece69 100755 --- a/bin/comBGC.py +++ b/bin/comBGC.py @@ -1,5 +1,8 @@ #!/usr/bin/env python3 +# Written by Jasmin Frangenberg and released under the MIT license. +# See below for full license text. + from Bio import SeqIO import pandas as pd import argparse @@ -32,22 +35,25 @@ SOFTWARE. """ -tool_version = "0.6.1" +tool_version = "0.6.3" welcome = """\ ........................ * comBGC v.{version} * ........................ This tool aggregates the results of BGC prediction tools: antiSMASH, deepBGC, and GECCO - For detailed usage documentation please - refer to https://nf-co.re/funcscan + For detailed usage documentation please refer + to https://nf-co.re/funcscan .........................................................""".format( version=tool_version ) # Initialize parser parser = argparse.ArgumentParser( - prog="comBGC", formatter_class=argparse.RawTextHelpFormatter, description=(welcome), add_help=True + prog="comBGC", + formatter_class=argparse.RawTextHelpFormatter, + description=(welcome), + add_help=True, ) # Input options @@ -85,8 +91,12 @@ sample). Can only be used if --input is not specified.""", type=str, ) -parser.add_argument("-vv", "--verbose", help="increase output verbosity", action="store_true") -parser.add_argument("-v", "--version", help="show version number and exit", action="store_true") +parser.add_argument( + "-vv", "--verbose", help="increase output verbosity", action="store_true" +) +parser.add_argument( + "-v", "--version", help="show version number and exit", action="store_true" +) # Get command line arguments args = parser.parse_args() @@ -108,7 +118,7 @@ # Assign input files to respective tools if input: for path in input: - if path.endswith(".gbk"): + if path.endswith(".gbk") and not re.search("region\d\d\d\.gbk$", path): # Make sure to only fetch relevant GBK files, i.e. those containing all collective antiSMASH BGCs with open(path) as infile: for line in infile: if re.search("##GECCO-Data-START##", line): @@ -131,7 +141,9 @@ # Make sure that at least one input argument is given if not (input_antismash or input_gecco or input_deepbgc or dir_antismash): - exit("Please specify at least one input file (i.e. output from antismash, deepbgc, or gecco) or see --help") + exit( + "Please specify at least one input file (i.e. output from antismash, deepbgc, or gecco) or see --help" + ) ######################## # ANTISMASH FUNCTIONS @@ -213,131 +225,155 @@ def antismash_workflow(antismash_paths): else: gbk_path = path - kcb_files = [] - if kcb_path: - kcb_files = [file for file in os.listdir(kcb_path) if file.startswith("c") and file.endswith(".txt")] - - # Aggregate information - Sample_ID = gbk_path.split("/")[-1].split(".gbk")[-2] # Assuming file name equals sample name - if verbose: - print("\nParsing antiSMASH file(s): " + Sample_ID + "\n... ", end="") - - with open(gbk_path) as gbk: - for record in SeqIO.parse(gbk, "genbank"): # GBK records are contigs in this case - # Initiate variables per contig - cluster_num = 1 - antismash_out_line = {} - Contig_ID = record.id - Product_class = "" - BGC_complete = "" - BGC_start = "" - BGC_end = "" - BGC_length = "" - PFAM_domains = [] - MIBiG_ID = "NA" - - for feature in record.features: - # Extract relevant infos from the first protocluster feature from the contig record - if feature.type == "protocluster": - if ( - antismash_out_line - ): # If there is more than 1 BGC per contig, reset the output line for new BGC. Assuming that BGCs do not overlap. - if not CDS_ID: - CDS_ID = ["NA"] - antismash_out_line = { # Create dictionary of BGC info - "Sample_ID": Sample_ID, - "Prediction_tool": "antiSMASH", - "Contig_ID": Contig_ID, - "Product_class": ";".join(Product_class), - "BGC_probability": "NA", - "BGC_complete": BGC_complete, - "BGC_start": BGC_start, - "BGC_end": BGC_end, - "BGC_length": BGC_length, - "CDS_ID": ";".join(CDS_ID), - "CDS_count": CDS_count, - "PFAM_domains": ";".join(PFAM_domains), - "MIBiG_ID": MIBiG_ID, - "InterPro_ID": "NA", - } - antismash_out_line = pd.DataFrame([antismash_out_line]) - antismash_out = pd.concat([antismash_out, antismash_out_line], ignore_index=True) - antismash_out_line = {} - - # Reset variables per BGC - CDS_ID = [] - CDS_count = 0 - PFAM_domains = [] - - # Extract all the BGC info - Product_class = feature.qualifiers["product"] - for i in range(len(Product_class)): - Product_class[i] = ( - Product_class[i][0].upper() + Product_class[i][1:] - ) # Make first letters uppercase, e.g. lassopeptide -> Lassopeptide - - if feature.qualifiers["contig_edge"] == ["True"]: - BGC_complete = "No" - elif feature.qualifiers["contig_edge"] == ["False"]: - BGC_complete = "Yes" - - BGC_start = feature.location.start + 1 # +1 because zero-based start position - BGC_end = feature.location.end - BGC_length = feature.location.end - feature.location.start + 1 - - # If there are knownclusterblast files for the BGC, get MIBiG IDs of their homologs - if kcb_files: - print(kcb_files) - kcb_file = "{}_c{}.txt".format( - record.id, str(cluster_num) - ) # Check if this filename is among the knownclusterblast files - if kcb_file in kcb_files: - MIBiG_IDs = ";".join(parse_knownclusterblast(os.path.join(kcb_path, kcb_file))) - if MIBiG_IDs != "": - MIBiG_ID = MIBiG_IDs - cluster_num += 1 - - # Count functional CDSs (no pseudogenes) and get the PFAM annotation - elif ( - feature.type == "CDS" and "translation" in feature.qualifiers.keys() and BGC_start != "" - ): # Make sure not to count pseudogenes (which would have no "translation tag") and count no CDSs before first BGC - if feature.location.end <= BGC_end: # Make sure CDS is within the current BGC region - if "locus_tag" in feature.qualifiers: - CDS_ID.append(feature.qualifiers["locus_tag"][0]) - CDS_count += 1 - if "sec_met_domain" in feature.qualifiers.keys(): - for PFAM_domain in feature.qualifiers["sec_met_domain"]: - PFAM_domain_name = re.search("(.+) \(E-value", PFAM_domain).group(1) - PFAM_domains.append(PFAM_domain_name) - - # Create dictionary of BGC info - if not CDS_ID: - CDS_ID = ["NA"] - antismash_out_line = { - "Sample_ID": Sample_ID, - "Prediction_tool": "antiSMASH", - "Contig_ID": Contig_ID, - "Product_class": ";".join(Product_class), - "BGC_probability": "NA", - "BGC_complete": BGC_complete, - "BGC_start": BGC_start, - "BGC_end": BGC_end, - "BGC_length": BGC_length, - "CDS_ID": ";".join(CDS_ID), - "CDS_count": CDS_count, - "PFAM_domains": ";".join(PFAM_domains), - "MIBiG_ID": MIBiG_ID, - "InterPro_ID": "NA", - } - - if BGC_start != "": # Only keep records with BGCs - antismash_out_line = pd.DataFrame([antismash_out_line]) - antismash_out = pd.concat([antismash_out, antismash_out_line], ignore_index=True) - - # Reset variables per BGC - CDS_ID = [] - CDS_count = 0 + kcb_files = [] + if kcb_path: + kcb_files = [ + file + for file in os.listdir(kcb_path) + if file.startswith("c") and file.endswith(".txt") + ] + + # Aggregate information + Sample_ID = gbk_path.split("/")[-1].split(".gbk")[ + -2 + ] # Assuming file name equals sample name + if verbose: + print("\nParsing antiSMASH file(s): " + Sample_ID + "\n... ", end="") + + with open(gbk_path) as gbk: + for record in SeqIO.parse( + gbk, "genbank" + ): # GBK records are contigs in this case + # Initiate variables per contig + cluster_num = 1 + antismash_out_line = {} + Contig_ID = record.id + Product_class = "" + BGC_complete = "" + BGC_start = "" + BGC_end = "" + BGC_length = "" PFAM_domains = [] + MIBiG_ID = "NA" + + for feature in record.features: + # Extract relevant infos from the first protocluster feature from the contig record + if feature.type == "protocluster": + if ( + antismash_out_line + ): # If there is more than 1 BGC per contig, reset the output line for new BGC. Assuming that BGCs do not overlap. + if not CDS_ID: + CDS_ID = ["NA"] + antismash_out_line = { # Create dictionary of BGC info + "Sample_ID": Sample_ID, + "Prediction_tool": "antiSMASH", + "Contig_ID": Contig_ID, + "Product_class": ";".join(Product_class), + "BGC_probability": "NA", + "BGC_complete": BGC_complete, + "BGC_start": BGC_start, + "BGC_end": BGC_end, + "BGC_length": BGC_length, + "CDS_ID": ";".join(CDS_ID), + "CDS_count": CDS_count, + "PFAM_domains": ";".join(PFAM_domains), + "MIBiG_ID": MIBiG_ID, + "InterPro_ID": "NA", + } + antismash_out_line = pd.DataFrame([antismash_out_line]) + antismash_out = pd.concat( + [antismash_out, antismash_out_line], ignore_index=True + ) + antismash_out_line = {} + + # Reset variables per BGC + CDS_ID = [] + CDS_count = 0 + PFAM_domains = [] + + # Extract all the BGC info + Product_class = feature.qualifiers["product"] + for i in range(len(Product_class)): + Product_class[i] = ( + Product_class[i][0].upper() + Product_class[i][1:] + ) # Make first letters uppercase, e.g. lassopeptide -> Lassopeptide + + if feature.qualifiers["contig_edge"] == ["True"]: + BGC_complete = "No" + elif feature.qualifiers["contig_edge"] == ["False"]: + BGC_complete = "Yes" + + BGC_start = ( + feature.location.start + 1 + ) # +1 because zero-based start position + BGC_end = feature.location.end + BGC_length = feature.location.end - feature.location.start + + # If there are knownclusterblast files for the BGC, get MIBiG IDs of their homologs + if kcb_files: + print(kcb_files) + kcb_file = "{}_c{}.txt".format( + record.id, str(cluster_num) + ) # Check if this filename is among the knownclusterblast files + if kcb_file in kcb_files: + MIBiG_IDs = ";".join( + parse_knownclusterblast( + os.path.join(kcb_path, kcb_file) + ) + ) + if MIBiG_IDs != "": + MIBiG_ID = MIBiG_IDs + cluster_num += 1 + + # Count functional CDSs (no pseudogenes) and get the PFAM annotation + elif ( + feature.type == "CDS" + and "translation" in feature.qualifiers.keys() + and BGC_start != "" + ): # Make sure not to count pseudogenes (which would have no "translation tag") and count no CDSs before first BGC + if ( + feature.location.end <= BGC_end + ): # Make sure CDS is within the current BGC region + if "locus_tag" in feature.qualifiers: + CDS_ID.append(feature.qualifiers["locus_tag"][0]) + CDS_count += 1 + if "sec_met_domain" in feature.qualifiers.keys(): + for PFAM_domain in feature.qualifiers["sec_met_domain"]: + PFAM_domain_name = re.search( + "(.+) \(E-value", PFAM_domain + ).group(1) + PFAM_domains.append(PFAM_domain_name) + + # Create dictionary of BGC info + if not CDS_ID: + CDS_ID = ["NA"] + antismash_out_line = { + "Sample_ID": Sample_ID, + "Prediction_tool": "antiSMASH", + "Contig_ID": Contig_ID, + "Product_class": ";".join(Product_class), + "BGC_probability": "NA", + "BGC_complete": BGC_complete, + "BGC_start": BGC_start, + "BGC_end": BGC_end, + "BGC_length": BGC_length, + "CDS_ID": ";".join(CDS_ID), + "CDS_count": CDS_count, + "PFAM_domains": ";".join(PFAM_domains), + "MIBiG_ID": MIBiG_ID, + "InterPro_ID": "NA", + } + + if BGC_start != "": # Only keep records with BGCs + antismash_out_line = pd.DataFrame([antismash_out_line]) + antismash_out = pd.concat( + [antismash_out, antismash_out_line], ignore_index=True + ) + + # Reset variables per BGC + CDS_ID = [] + CDS_count = 0 + PFAM_domains = [] if verbose: print("Done.") @@ -413,7 +449,11 @@ def deepbgc_workflow(deepbgc_path): deepbgc_out = pd.DataFrame(columns=deepbgc_sum_cols) # Add relevant deepBGC output columns per BGC - deepbgc_df = pd.read_csv(deepbgc_path, sep="\t").drop(deepbgc_unused_cols, axis=1).rename(columns=deepbgc_map_dict) + deepbgc_df = ( + pd.read_csv(deepbgc_path, sep="\t") + .drop(deepbgc_unused_cols, axis=1) + .rename(columns=deepbgc_map_dict) + ) deepbgc_df["Sample_ID"] = sample deepbgc_df["Prediction_tool"] = "deepBGC" deepbgc_df["BGC_complete"] = "NA" @@ -511,7 +551,11 @@ def gecco_workflow(gecco_paths): # Add sample information sample = tsv_path.split("/")[-1].split(".")[0] - gecco_df = pd.read_csv(tsv_path, sep="\t").drop(unused_cols, axis=1).rename(columns=map_dict) + gecco_df = ( + pd.read_csv(tsv_path, sep="\t") + .drop(unused_cols, axis=1) + .rename(columns=map_dict) + ) # Fill columns (1 row per BGC) gecco_df["Sample_ID"] = sample @@ -524,7 +568,9 @@ def gecco_workflow(gecco_paths): # Add column 'InterPro_ID' for gbk_path in gbk_paths: bgc_id = gbk_path.split("/")[-1][0:-4] - gecco_df.loc[gecco_df["cluster_id"] == bgc_id, "InterPro_ID"] = getInterProID(gbk_path) + gecco_df.loc[gecco_df["cluster_id"] == bgc_id, "InterPro_ID"] = getInterProID( + gbk_path + ) # Add empty columns with no output from GECCO gecco_df["BGC_complete"] = "NA" @@ -551,7 +597,11 @@ def gecco_workflow(gecco_paths): if __name__ == "__main__": if input_antismash: - tools = {"antiSMASH": input_antismash, "deepBGC": input_deepbgc, "GECCO": input_gecco} + tools = { + "antiSMASH": input_antismash, + "deepBGC": input_deepbgc, + "GECCO": input_gecco, + } elif dir_antismash: tools = {"antiSMASH": dir_antismash} else: @@ -578,7 +628,9 @@ def gecco_workflow(gecco_paths): antismash_paths = prepare_multisample_input_antismash(dir_antismash) for input_antismash in antismash_paths: summary_antismash_temp = antismash_workflow(input_antismash) - summary_antismash = pd.concat([summary_antismash, summary_antismash_temp]) + summary_antismash = pd.concat( + [summary_antismash, summary_antismash_temp] + ) else: summary_antismash = antismash_workflow(input_antismash) elif tool == "deepBGC": @@ -589,11 +641,19 @@ def gecco_workflow(gecco_paths): # Summarize and sort data frame summary_all = pd.concat([summary_antismash, summary_deepbgc, summary_gecco]) summary_all.sort_values( - by=["Sample_ID", "Contig_ID", "BGC_start", "BGC_length", "Prediction_tool"], axis=0, inplace=True + by=["Sample_ID", "Contig_ID", "BGC_start", "BGC_length", "Prediction_tool"], + axis=0, + inplace=True, ) + # Rearrange and rename the columns in the summary df + summary_all = summary_all.iloc[:, [0, 2, 1] + list(range(3, len(summary_all.columns)))] + summary_all.rename(columns={'Sample_ID':'sample_id', 'Contig_ID':'contig_id', 'CDS_ID':'BGC_region_contig_ids'}, inplace=True) + # Write results to TSV if not os.path.exists(outdir): os.makedirs(outdir) - summary_all.to_csv(os.path.join(outdir, "combgc_summary.tsv"), sep="\t", index=False) + summary_all.to_csv( + os.path.join(outdir, "combgc_summary.tsv"), sep="\t", index=False + ) print("Your BGC summary file is: " + os.path.join(outdir, "combgc_summary.tsv")) diff --git a/bin/merge_taxonomy.py b/bin/merge_taxonomy.py new file mode 100755 index 00000000..44eed31a --- /dev/null +++ b/bin/merge_taxonomy.py @@ -0,0 +1,233 @@ +#!/usr/bin/env python3 + +# Written by Anan Ibrahim and released under the MIT license. +# See git repository (https://github.com/Darcy220606/AMPcombi) for full license text. +# Date: March 2024 +# Version: 0.1.0 + +# Required modules +import sys +import os +import pandas as pd +import numpy as np +import argparse + +tool_version = "0.1.0" +######################################### +# TOP LEVEL: AMPCOMBI +######################################### +parser = argparse.ArgumentParser(prog = 'merge_taxonomy', formatter_class=argparse.RawDescriptionHelpFormatter, + usage='%(prog)s [options]', + description=('''\ + ............................................................................. + *merge_taxonomy* + ............................................................................. + This script merges all three funcscan workflows with + MMseqs2 taxonomy results. This is done in three submodules that can be + activated seperately. + .............................................................................'''), + epilog='''Thank you for running taxonomy_merge!''', + add_help=True) +parser.add_argument('--version', action='version', version='merge_taxonomy ' + tool_version) + +######################################### +# SUBPARSERS +######################################### +subparsers = parser.add_subparsers(required=True) + +######################################### +# SUBPARSER: AMPCOMBI +######################################### +ampcombi_parser = subparsers.add_parser('ampcombi_taxa') + +ampcombi_parser.add_argument("--ampcombi", dest="amp", nargs='?', help="Enter the path to the ampcombi_complete_summary.tsv' \n (default: %(default)s)", + type=str, default='ampcombi_complete_summary.csv') +ampcombi_parser.add_argument("--taxonomy", dest="taxa1", nargs='+', help="Enter the list of taxonomy files for all samples. ") + +######################################### +# SUBPARSER: COMBGC +######################################### +combgc_parser = subparsers.add_parser('combgc_taxa') + +combgc_parser.add_argument("--combgc", dest="bgc", nargs='?', help="Enter the path to the combgc_complete_summary.tsv' \n (default: %(default)s)", + type=str, default='combgc_complete_summary.csv') +combgc_parser.add_argument("--taxonomy", dest="taxa2", nargs='+', help="Enter the list of taxonomy files for all samples. ") + +######################################### +# SUBPARSER: HAMRONIZATION +######################################### +hamronization_parser = subparsers.add_parser('hamronization_taxa') + +hamronization_parser.add_argument("--hamronization", dest="arg", nargs='?', help="Enter the path to the hamronization_complete_summary.tsv' \n (default: %(default)s)", + type=str, default='hamronization_complete_summary.csv') +hamronization_parser.add_argument("--taxonomy", dest="taxa3",nargs='+', help="Enter the list of taxonomy files for all samples. ") + +######################################### +# TAXONOMY +######################################### +def reformat_mmseqs_taxonomy(mmseqs_taxonomy): + mmseqs2_df = pd.read_csv(mmseqs_taxonomy, sep='\t', header=None, names=['contig_id', 'taxid', 'rank_label', 'scientific_name', 'lineage', 'mmseqs_lineage_contig']) + # remove the lineage column + mmseqs2_df.drop('lineage', axis=1, inplace=True) + mmseqs2_df['mmseqs_lineage_contig'].unique() + # convert any classification that has Eukaryota/root to NaN as funcscan targets bacteria ONLY ** + for i, row in mmseqs2_df.iterrows(): + lineage = str(row['mmseqs_lineage_contig']) + if 'Eukaryota' in lineage or 'root' in lineage: + mmseqs2_df.at[i, 'mmseqs_lineage_contig'] = np.nan + # insert the sample name in the first column according to the file basename + file_basename = os.path.basename(mmseqs_taxonomy) + filename = os.path.splitext(file_basename)[0] + mmseqs2_df.insert(0, 'sample_id', filename) + return mmseqs2_df + +######################################### +# FUNCTION: AMPCOMBI +######################################### +def ampcombi_taxa(args): + merged_df = pd.DataFrame() + + # assign input args to variables + ampcombi = args.amp + taxa_list = args.taxa1 + + # prepare the taxonomy files + taxa_df = pd.DataFrame() + # append the dfs to the taxonomy_files_combined + for file in taxa_list: # list of taxa files ['',''] + df = reformat_mmseqs_taxonomy(file) + taxa_df = pd.concat([taxa_df, df]) + + # filter the tool df + tool_df = pd.read_csv(ampcombi, sep='\t') + # remove the column with contig_id - duplicate #NOTE: will be fixed in AMPcombi v2.0.0 + tool_df = tool_df.drop('contig_id', axis=1) + # make sure 1st and 2nd column have the same column labels + tool_df.rename(columns={tool_df.columns[0]: 'sample_id'}, inplace=True) + tool_df.rename(columns={tool_df.columns[1]: 'contig_id'}, inplace=True) + # grab the real contig id in another column copy for merging + tool_df['contig_id_merge'] = tool_df['contig_id'].str.rsplit('_', 1).str[0] + + # merge rows from taxa to ampcombi_df based on substring match in sample_id + # grab the unique sample names from the taxonomy table + samples_taxa = taxa_df['sample_id'].unique() + # for every sampleID in taxadf merge the results + for sampleID in samples_taxa: + # subset ampcombi + subset_tool = tool_df.loc[tool_df['sample_id'].str.contains(sampleID)] + # subset taxa + subset_taxa = taxa_df.loc[taxa_df['sample_id'].str.contains(sampleID)] + # merge + subset_df = pd.merge(subset_tool, subset_taxa, left_on = 'contig_id_merge', right_on='contig_id', how='left') + # cleanup the table + columnsremove = ['contig_id_merge','contig_id_y', 'sample_id_y'] + subset_df.drop(columnsremove, axis=1, inplace=True) + subset_df.rename(columns={'contig_id_x': 'contig_id', 'sample_id_x':'sample_id'},inplace=True) + # append in the combined_df + merged_df = merged_df.append(subset_df, ignore_index=True) + + # write to file + merged_df.to_csv('ampcombi_complete_summary_taxonomy.tsv', sep='\t', index=False) + +######################################### +# FUNCTION: COMBGC +######################################### +def combgc_taxa(args): + merged_df = pd.DataFrame() + + # assign input args to variables + combgc = args.bgc + taxa_list = args.taxa2 + + # prepare the taxonomy files + taxa_df = pd.DataFrame() + # append the dfs to the taxonomy_files_combined + for file in taxa_list: # list of taxa files ['',''] + df = reformat_mmseqs_taxonomy(file) + taxa_df = pd.concat([taxa_df, df]) + + # filter the tool df + tool_df = pd.read_csv(combgc, sep='\t') + # make sure 1st and 2nd column have the same column labels + tool_df.rename(columns={tool_df.columns[0]: 'sample_id'}, inplace=True) + tool_df.rename(columns={tool_df.columns[1]: 'contig_id'}, inplace=True) + + # merge rows from taxa to ampcombi_df based on substring match in sample_id + # grab the unique sample names from the taxonomy table + samples_taxa = taxa_df['sample_id'].unique() + # for every sampleID in taxadf merge the results + for sampleID in samples_taxa: + # subset ampcombi + subset_tool = tool_df.loc[tool_df['sample_id'].str.contains(sampleID)] + # subset taxa + subset_taxa = taxa_df.loc[taxa_df['sample_id'].str.contains(sampleID)] + # merge + subset_df = pd.merge(subset_tool, subset_taxa, left_on = 'contig_id', right_on='contig_id', how='left') + # cleanup the table + columnsremove = ['sample_id_y'] + subset_df.drop(columnsremove, axis=1, inplace=True) + subset_df.rename(columns={'sample_id_x':'sample_id'},inplace=True) + # append in the combined_df + merged_df = merged_df.append(subset_df, ignore_index=True) + + # write to file + merged_df.to_csv('combgc_complete_summary_taxonomy.tsv', sep='\t', index=False) + +######################################### +# FUNCTION: HAMRONIZATION +######################################### +def hamronization_taxa(args): + merged_df = pd.DataFrame() + + # assign input args to variables + hamronization = args.arg + taxa_list = args.taxa3 + + # prepare the taxonomy files + taxa_df = pd.DataFrame() + # append the dfs to the taxonomy_files_combined + for file in taxa_list: # list of taxa files ['',''] + df = reformat_mmseqs_taxonomy(file) + taxa_df = pd.concat([taxa_df, df]) + + # filter the tool df + tool_df = pd.read_csv(hamronization, sep='\t') + # rename the columns + tool_df.rename(columns={'input_file_name':'sample_id', 'input_sequence_id':'contig_id'}, inplace=True) + # reorder the columns + new_order = ['sample_id', 'contig_id'] + [col for col in tool_df.columns if col not in ['sample_id', 'contig_id']] + tool_df = tool_df.reindex(columns=new_order) + # grab the real contig id in another column copy for merging + tool_df['contig_id_merge'] = tool_df['contig_id'].str.rsplit('_', 1).str[0] + + # merge rows from taxa to ampcombi_df based on substring match in sample_id + # grab the unique sample names from the taxonomy table + samples_taxa = taxa_df['sample_id'].unique() + # for every sampleID in taxadf merge the results + for sampleID in samples_taxa: + # subset ampcombi + subset_tool = tool_df.loc[tool_df['sample_id'].str.contains(sampleID)] + # subset taxa + subset_taxa = taxa_df.loc[taxa_df['sample_id'].str.contains(sampleID)] + # merge + subset_df = pd.merge(subset_tool, subset_taxa, left_on = 'contig_id_merge', right_on='contig_id', how='left') + # cleanup the table + columnsremove = ['contig_id_merge','contig_id_y', 'sample_id_y'] + subset_df.drop(columnsremove, axis=1, inplace=True) + subset_df.rename(columns={'contig_id_x': 'contig_id', 'sample_id_x':'sample_id'},inplace=True) + # append in the combined_df + merged_df = merged_df.append(subset_df, ignore_index=True) + + # write to file + merged_df.to_csv('hamronization_complete_summary_taxonomy.tsv', sep='\t', index=False) + +######################################### +# SUBPARSERS: DEFAULT +######################################### +ampcombi_parser.set_defaults(func=ampcombi_taxa) +combgc_parser.set_defaults(func=combgc_taxa) +hamronization_parser.set_defaults(func=hamronization_taxa) + +if __name__ == '__main__': + args = parser.parse_args() + args.func(args) # call the default function diff --git a/conf/base.config b/conf/base.config index d689625f..a928e380 100644 --- a/conf/base.config +++ b/conf/base.config @@ -58,9 +58,6 @@ process { errorStrategy = 'retry' maxRetries = 2 } - withName:CUSTOM_DUMPSOFTWAREVERSIONS { - cache = false - } /* * Resource specifications @@ -76,21 +73,12 @@ process { cpus = 1 } - withName:BIOAWK { - cache = false - } - withName: PROKKA { memory = { check_max( 8.GB * task.attempt, 'memory' ) } cpus = { check_max( 4 * task.attempt, 'cpus' ) } time = { check_max( 8.h * task.attempt, 'time' ) } } - withName: PRODIGAL_GFF { - memory = { check_max( 2.GB * task.attempt, 'memory' ) } - cpus = 1 - } - withName: PRODIGAL_GBK { memory = { check_max( 2.GB * task.attempt, 'memory' ) } cpus = 1 @@ -216,8 +204,29 @@ process { cpus = 1 } - withName: AMPCOMBI { + withName: ARGNORM_DEEPARG { + memory = { check_max( 4.GB * task.attempt, 'memory' ) } + cpus = 1 + } + + withName: ARGNORM_ABRICATE { + memory = { check_max( 4.GB * task.attempt, 'memory' ) } + cpus = 1 + } + + withName: ARGNORM_AMRFINDERPLUS { + memory = { check_max( 4.GB * task.attempt, 'memory' ) } + cpus = 1 + } + + withName: AMPCOMBI2_PARSETABLES { memory = { check_max( 8.GB * task.attempt, 'memory' ) } time = { check_max( 2.h * task.attempt, 'time' ) } } + + withName: AMPCOMBI2_CLUSTER { + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 2.h * task.attempt, 'time' ) } + } + } diff --git a/conf/modules.config b/conf/modules.config index c94e5324..96b1eb98 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -18,43 +18,88 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: CUSTOM_DUMPSOFTWAREVERSIONS { + withName: 'MULTIQC' { + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ - path: { "${params.outdir}/pipeline_info" }, + path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, - pattern: '*_versions.yml' + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: MULTIQC { - ext.args = params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + withName: GUNZIP { publishDir = [ - path: { "${params.outdir}/multiqc" }, + enabled: false + ] + } + + withName: MMSEQS_DATABASES { + publishDir = [ + path: { "${params.outdir}/databases/mmseqs/" }, mode: params.publish_dir_mode, + enabled: params.save_db, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] + ext.args = [ + params.taxa_classification_mmseqs_db_savetmp ? "" : "--remove-tmp-files" , + ].join(' ').trim() } - withName: GUNZIP { + withName: MMSEQS_CREATEDB { publishDir = [ - enabled: false + path: { "${params.outdir}/databases/mmseqs/mmseqs_createdb/" }, + mode: params.publish_dir_mode, + enabled: params.save_db, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: MMSEQS_TAXONOMY { + publishDir = [ + path: { "${params.outdir}/databases/mmseqs/mmseqs_taxonomy/" }, + mode: params.publish_dir_mode, + enabled: params.save_db, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = [ + params.taxa_classification_mmseqs_taxonomy_savetmp ? "" : "--remove-tmp-files", + "--search-type ${params.taxa_classification_mmseqs_taxonomy_searchtype}", + "--lca-ranks ${params.taxa_classification_mmseqs_taxonomy_lcaranks}", + "--tax-lineage ${params.taxa_classification_mmseqs_taxonomy_taxlineage}", + "-s ${params.taxa_classification_mmseqs_taxonomy_sensitivity}", + "--orf-filter-s ${params.taxa_classification_mmseqs_taxonomy_orffilters}", + "--lca-mode ${params.taxa_classification_mmseqs_taxonomy_lcamode}", + "--vote-mode ${params.taxa_classification_mmseqs_taxonomy_votemode}", + ].join(' ').trim() + } + + withName: MMSEQS_CREATETSV { + publishDir = [ + path: { "${params.outdir}/taxa_classification/mmseqs_createtsv/${meta.id}/" }, + mode: params.publish_dir_mode, + enabled: params.run_taxa_classification, + pattern: "*.tsv", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: BIOAWK { - ext.args = "-c fastx \'{print \">\" \$name ORS length(\$seq)}\'" + withName: SEQKIT_SEQ { + ext.prefix = { "${meta.id}_long" } publishDir = [ - path: { "${params.outdir}/" }, + path: { "${params.outdir}/bgc/seqkit/" }, mode: params.publish_dir_mode, - enabled: false, + enabled: params.bgc_savefilteredcontigs, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - ext.prefix = { "${meta.id}.fa" } + ext.args = [ + "--min-len ${params.bgc_mincontiglength}" + ].join(' ').trim() } withName: PROKKA { + ext.prefix = { "${meta.id}_prokka" } // to prevent pigz symlink problems of input files if already uncompressed during post-annotation gzipping publishDir = [ - path: { "${params.outdir}/annotation/prokka/" }, + path: { "${params.outdir}/annotation/prokka/${meta.category}/" }, mode: params.publish_dir_mode, enabled: params.save_annotations, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } @@ -71,7 +116,7 @@ process { params.annotation_prokka_rawproduct ? '--rawproduct' : '', params.annotation_prokka_rnammer ? '--rnammer' : '', params.annotation_prokka_compliant ? '--compliant' : '', - params.annotation_prokka_addgenes ? '--addgenes' : '' + params.annotation_prokka_addgenes ? '--addgenes' : '', ].join(' ').trim() } @@ -79,7 +124,7 @@ process { publishDir = [ path: { "${params.outdir}/databases/bakta" }, mode: params.publish_dir_mode, - enabled: params.save_databases, + enabled: params.save_db, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] ext.args = [ @@ -88,13 +133,15 @@ process { } withName: BAKTA_BAKTA { + ext.prefix = { "${meta.id}_bakta" } // to prevent pigz symlink problems of input files if already uncompressed during post-annotation gzipping publishDir = [ - path: { "${params.outdir}/annotation/bakta" }, + path: { "${params.outdir}/annotation/bakta/${meta.category}/" }, mode: params.publish_dir_mode, enabled: params.save_annotations, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] ext.args = [ + params.annotation_bakta_singlemode ? '' : '--meta', "--min-contig-length ${params.annotation_bakta_mincontiglen}", "--translation-table ${params.annotation_bakta_translationtable}", "--gram ${params.annotation_bakta_gram}", @@ -116,28 +163,12 @@ process { ].join(' ').trim() } - withName: PRODIGAL_GFF { - publishDir = [ - path: { "${params.outdir}/annotation/prodigal/${meta.id}" }, - mode: params.publish_dir_mode, - enabled: params.save_annotations, - pattern: "*.{faa,fna,gff}.gz", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = [ - params.annotation_prodigal_singlemode ? "-p single" : "-p meta", - params.annotation_prodigal_closed ? "-c" : "", - params.annotation_prodigal_forcenonsd ? "-n" : "", - "-g ${params.annotation_prodigal_transtable}" - ].join(' ').trim() - } - - withName: PRODIGAL_GBK { + withName: PRODIGAL { publishDir = [ - path: { "${params.outdir}/annotation/prodigal/${meta.id}" }, + path: { "${params.outdir}/annotation/prodigal/${meta.category}/" }, mode: params.publish_dir_mode, enabled: params.save_annotations, - pattern: "*.gbk.gz", + pattern: "*.{faa,fna,gbk,faa.gz,faa.gz,fna.gz,gbk.gz}", saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] ext.args = [ @@ -149,11 +180,12 @@ process { } withName: PYRODIGAL { + ext.prefix = { "${meta.id}_pyrodigal" } // to prevent pigz symlink problems of input files if already uncompressed during post-annotation gzipping publishDir = [ - path: { "${params.outdir}/annotation/pyrodigal/${meta.id}" }, + path: { "${params.outdir}/annotation/pyrodigal/${meta.category}/" }, mode: params.publish_dir_mode, enabled: params.save_annotations, - pattern: "*.{faa,fna,gff,score}.gz", + pattern: "*.{faa,fna,gbk,score}.gz", saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] ext.args = [ @@ -171,7 +203,7 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] ext.args = [ - "--db ${params.arg_abricate_db}", + "--db ${params.arg_abricate_db_id}", "--minid ${params.arg_abricate_minid}", "--mincov ${params.arg_abricate_mincov}" ].join(' ').trim() @@ -181,7 +213,7 @@ process { publishDir = [ path: { "${params.outdir}/databases/amrfinderplus" }, mode: params.publish_dir_mode, - enabled: params.save_databases, + enabled: params.save_db, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } @@ -192,20 +224,22 @@ process { mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - ext.args = [ - "--ident_min ${params.arg_amrfinderplus_identmin}", - "--coverage_min ${params.arg_amrfinderplus_coveragemin}", - "--translation_table ${params.arg_amrfinderplus_translationtable}", - params.arg_amrfinderplus_plus ? '--plus' : '', - params.arg_amrfinderplus_name ? '--name ${meta.id}' : '' - ].join(' ').trim() + ext.args = { + [ + "--ident_min ${params.arg_amrfinderplus_identmin}", + "--coverage_min ${params.arg_amrfinderplus_coveragemin}", + "--translation_table ${params.arg_amrfinderplus_translationtable}", + params.arg_amrfinderplus_plus ? '--plus' : '', + params.arg_amrfinderplus_name ? "--name ${meta.id}" : '' + ].join(' ').trim() + } } withName: DEEPARG_DOWNLOADDATA { publishDir = [ path: { "${params.outdir}/databases/deeparg" }, mode: params.publish_dir_mode, - enabled: params.save_databases, + enabled: params.save_db, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } @@ -227,6 +261,7 @@ process { } withName: FARGENE { + tag = {"${meta.id}|${hmm_model}"} publishDir = [ [ path: { "${params.outdir}/arg/fargene/${meta.id}" }, @@ -244,13 +279,13 @@ process { path: { "${params.outdir}/arg/fargene/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - pattern: "*/{predictedGenes,retrievedFragments}/*" + pattern: "*/{hmmsearchresults,predictedGenes,retrievedFragments}/*" ], [ path: { "${params.outdir}/arg/fargene/${meta.id}/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - pattern: "*/{hmmsearchresults,tmpdir}/*", + pattern: "*/{tmpdir}/*", enabled: params.arg_fargene_savetmpfiles ] ] @@ -259,6 +294,31 @@ process { ext.args = params.arg_fargene_orffinder ? '--orf-finder' : '' } + withName: UNTAR_CARD { + + ext.prefix = "card_database" + publishDir = [ + [ + path: { "${params.outdir}/databases/rgi" }, + mode: params.publish_dir_mode, + enabled: params.save_db, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ] + + } + + withName: RGI_CARDANNOTATION { + publishDir = [ + [ + path: { "${params.outdir}/databases/rgi" }, + mode: params.publish_dir_mode, + enabled: params.save_db, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ] + } + withName: RGI_MAIN { publishDir = [ [ @@ -282,12 +342,13 @@ process { enabled: params.arg_rgi_savetmpfiles ] ] - ext.args = [ + ext.args2 = [ "--alignment_tool ${params.arg_rgi_alignmenttool}", "--data ${params.arg_rgi_data}", params.arg_rgi_includeloose ? '--include_loose' : '', - params.arg_rgi_excludenudge ? '--exclude_nudge' : '', - params.arg_rgi_lowquality ? '--low_quality' : '' + params.arg_rgi_includenudge ? '--include_nudge' : '', + params.arg_rgi_lowquality ? '--low_quality' : '', + params.arg_rgi_split_prodigal_jobs ? '--split_prodigal_jobs' : '' ].join(' ').trim() } @@ -301,6 +362,7 @@ process { } withName: AMPLIFY_PREDICT { + ext.prefix = { "${meta.id}.amplify" } publishDir = [ path: { "${params.outdir}/amp/amplify/${meta.id}/" }, mode: params.publish_dir_mode, @@ -315,10 +377,11 @@ process { mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - ext.prefix = { "${meta.id}_${meta.hmm_id}" } + ext.prefix = { "${meta.id}_${meta.hmm_id}.hmmer_hmmsearch" } } withName: MACREL_CONTIGS { + ext.prefix = { "${meta.id}.macrel" } publishDir = [ path: { "${params.outdir}/amp/macrel" }, mode: params.publish_dir_mode, @@ -349,6 +412,9 @@ process { params.bgc_antismash_smcogtrees ? '--smcog-trees' : '', params.bgc_antismash_ccmibig ? '--cc-mibig' : '', params.bgc_antismash_cbsubclusters ? '--cb-subclusters' : '', + params.bgc_antismash_pfam2go ? '--pfam2go' : '', + params.bgc_antismash_rre ? '--rre' : '', + params.bgc_antismash_tfbs ? '--tfbs' : '', "--allow-long-headers", "--minlength ${params.bgc_antismash_contigminlength}", "--hmmdetection-strictness ${params.bgc_antismash_hmmdetectionstrictness}", @@ -360,7 +426,7 @@ process { publishDir = [ path: { "${params.outdir}/databases/antismash" }, mode: params.publish_dir_mode, - enabled: params.save_databases, + enabled: params.save_db, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } @@ -369,7 +435,7 @@ process { publishDir = [ path: { "${params.outdir}/databases/deepbgc" }, mode: params.publish_dir_mode, - enabled: params.save_databases, + enabled: params.save_db, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } @@ -457,24 +523,99 @@ process { publishDir = [ path: { "${params.outdir}/reports/hamronization_summarize" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { (params.run_taxa_classification == false) ? it : null } + ] + } + + withName: MERGE_TAXONOMY_HAMRONIZATION { + publishDir = [ + path: { "${params.outdir}/reports/hamronization_summarize" }, + mode: params.publish_dir_mode, + saveAs: { _ -> null } // do not save the file ] } - withName: AMPCOMBI { + withName: ARG_TABIX_BGZIP { publishDir = [ - path: { "${params.outdir}/reports/ampcombi" }, + path: { "${params.outdir}/reports/hamronization_summarize" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: AMPCOMBI2_PARSETABLES { + publishDir = [ + path: { "${params.outdir}/reports/ampcombi2/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] // Have to use a custom `ext` due to deep nested quotes - ext.tooldict = '{"ampir":"ampir.tsv", "amplify":".tsv", "macrel":".prediction", "neubi":"neubi.fasta", "hmmer_hmmsearch":".txt"}' - ext.args = "--tooldict '${ext.tooldict}' --cutoff ${params.amp_ampcombi_cutoff}" + ext.args = [ + "--aminoacid_length ${params.amp_ampcombi_parsetables_aalength}", + "--db_evalue ${params.amp_ampcombi_parsetables_dbevalue}", + "--amp_cutoff ${params.amp_ampcombi_parsetables_cutoff}", + "--ampir_file ${params.amp_ampcombi_parsetables_ampir}", + "--amplify_file ${params.amp_ampcombi_parsetables_amplify}", + "--macrel_file ${params.amp_ampcombi_parsetables_macrel}", + "--hmmsearch_file ${params.amp_ampcombi_parsetables_hmmsearch}", + "--ampgram_file '.tsv' --amptransformer_file '.txt' --neubi_file '.fasta' --log TRUE", + "--hmm_evalue ${params.amp_ampcombi_parsetables_hmmevalue}", + "--window_size_stop_codon ${params.amp_ampcombi_parsetables_windowstopcodon}", + "--window_size_transporter ${params.amp_ampcombi_parsetables_windowtransport}", + params.amp_ampcombi_parsetables_removehitswostopcodons ? '--remove_stop_codons' : '' + ].join(' ').trim() + ext.prefix = { "${meta.id}" } + } + withName: AMPCOMBI2_COMPLETE { + publishDir = [ + path: { "${params.outdir}/reports/ampcombi2" }, + mode: params.publish_dir_mode, + saveAs: { filename -> + if (filename.equals('versions.yml')) { + return filename + } else { + return !params.run_taxa_classification ? filename : null + } + }, + ] + ext.args = "--log TRUE" + } + + withName: AMPCOMBI2_CLUSTER { + publishDir = [ + path: { "${params.outdir}/reports/ampcombi2" }, + mode: params.publish_dir_mode, + saveAs: { filename -> + if (filename.equals('versions.yml')) { + return filename + } else { + return !params.run_taxa_classification ? filename : null + } + }, + ] + ext.args = [ + "--cluster_cov_mode ${params.amp_ampcombi_cluster_covmode}", + "--cluster_mode ${params.amp_ampcombi_cluster_mode}", + "--cluster_coverage ${params.amp_ampcombi_cluster_coverage}", + "--cluster_seq_id ${params.amp_ampcombi_cluster_seqid}", + "--cluster_sensitivity ${params.amp_ampcombi_cluster_sensitivity}", + "--cluster_min_member ${params.amp_ampcombi_cluster_minmembers}", + "--log TRUE", + params.amp_ampcombi_cluster_removesingletons ? '--cluster_remove_singletons' : '' + ].join(' ').trim() + } + + withName: MERGE_TAXONOMY_AMPCOMBI { + publishDir = [ + path: { "${params.outdir}/reports/ampcombi2" }, + mode: params.publish_dir_mode, + saveAs: { _ -> null } // do not save the file + ] } - withName: TABIX_BGZIP { + withName: AMP_TABIX_BGZIP { publishDir = [ - path: { "${params.outdir}/reports/ampcombi" }, + path: { "${params.outdir}/reports/ampcombi2" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -488,11 +629,57 @@ process { ] } + withName: ARGNORM_ABRICATE { + publishDir = [ + path: {"${params.outdir}/arg/argnorm/abricate/"}, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}.normalized.tsv" } + ext.args = "--hamronized" + } + + withName: ARGNORM_AMRFINDERPLUS { + publishDir = [ + path: {"${params.outdir}/arg/argnorm/amrfinderplus/"}, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}.normalized.tsv" } + ext.args = "--hamronized" + } + + withName: ARGNORM_DEEPARG { + publishDir = [ + path: {"${params.outdir}/arg/argnorm/deeparg/"}, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { input_tsv.toString().endsWith(".potential.ARG.deeparg.tsv") ? "${meta.id}.potential.ARG.normalized.tsv" : "${meta.id}.ARG.normalized.tsv" } + ext.args = "--hamronized" + } + + withName: MERGE_TAXONOMY_COMBGC { + publishDir = [ + path: { "${params.outdir}/reports/combgc" }, + mode: params.publish_dir_mode, + saveAs: { _ -> null } // do not save the file + ] + } + + withName: BGC_TABIX_BGZIP { + publishDir = [ + path: { "${params.outdir}/reports/combgc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: DRAMP_DOWNLOAD { publishDir = [ path: { "${params.outdir}/databases/dramp" }, mode: params.publish_dir_mode, - enabled: params.save_databases, + enabled: params.save_db, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } diff --git a/conf/test.config b/conf/test.config index ffd6a39e..907bdd69 100644 --- a/conf/test.config +++ b/conf/test.config @@ -11,26 +11,23 @@ */ params { - config_profile_name = 'Test profile' + config_profile_name = 'AMP/ARG Pyrodigal test profile' config_profile_description = 'Minimal test dataset to check pipeline function' // Limit resources so that this can run on GitHub Actions max_cpus = 2 - max_memory = '6.GB' + max_memory = '8.GB' max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/samplesheet.csv' - amp_hmmsearch_models = 'https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/hmms/mybacteriocin.hmm' + input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_reduced.csv' - annotation_tool = 'prodigal' + annotation_tool = 'pyrodigal' - run_arg_screening = true - arg_fargene_hmmmodel = 'class_a,class_b_1_2' - // Database download regularly fails due to server downtime - // Tested instead with -profile test_deeparg to allow checking of other tools - arg_skip_deeparg = true + run_arg_screening = true + arg_fargene_hmmmodel = 'class_a,class_b_1_2' - - run_amp_screening = true + run_amp_screening = true + amp_run_hmmsearch = true + amp_hmmsearch_models = params.pipelines_testdata_base_path + 'funcscan/hmms/mybacteriocin.hmm' } diff --git a/conf/test_bakta.config b/conf/test_bakta.config new file mode 100644 index 00000000..72c540c5 --- /dev/null +++ b/conf/test_bakta.config @@ -0,0 +1,34 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/funcscan -profile test_bakta, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'AMP/ARG Bakta test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '8.GB' + max_time = '6.h' + + // Input data + input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_reduced.csv' + + annotation_tool = 'bakta' + annotation_bakta_db_downloadtype = 'light' + + run_amp_screening = true + amp_run_hmmsearch = true + amp_hmmsearch_models = params.pipelines_testdata_base_path + 'funcscan/hmms/mybacteriocin.hmm' + + run_arg_screening = true + arg_fargene_hmmmodel = 'class_a,class_b_1_2' +} diff --git a/conf/test_bgc_bakta.config b/conf/test_bgc_bakta.config new file mode 100644 index 00000000..d879fe38 --- /dev/null +++ b/conf/test_bgc_bakta.config @@ -0,0 +1,34 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/funcscan -profile test_bgc_bakta, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'BGC Bakta test profile' + config_profile_description = 'Minimal test dataset to check BGC workflow function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '8.GB' + max_time = '6.h' + + // Input data + input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_reduced.csv' + + annotation_tool = 'bakta' + annotation_bakta_db_downloadtype = "light" + + run_arg_screening = false + run_amp_screening = false + run_bgc_screening = true + + bgc_run_hmmsearch = true + bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' +} diff --git a/conf/test_bgc_prokka.config b/conf/test_bgc_prokka.config new file mode 100644 index 00000000..0a7b4e18 --- /dev/null +++ b/conf/test_bgc_prokka.config @@ -0,0 +1,33 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/funcscan -profile test_bgc_prokka, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'BGC Prokka test profile' + config_profile_description = 'Minimal test dataset to check BGC workflow function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '8.GB' + max_time = '6.h' + + // Input data + input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_reduced.csv' + + annotation_tool = 'prokka' + + run_arg_screening = false + run_amp_screening = false + run_bgc_screening = true + + bgc_run_hmmsearch = true + bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' +} diff --git a/conf/test_bgc_pyrodigal.config b/conf/test_bgc_pyrodigal.config new file mode 100644 index 00000000..f5ef07a9 --- /dev/null +++ b/conf/test_bgc_pyrodigal.config @@ -0,0 +1,33 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/funcscan -profile test_bgc_pyrodigal, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'BGC Pyrodigal test profile' + config_profile_description = 'Minimal test dataset to check BGC workflow function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '8.GB' + max_time = '6.h' + + // Input data + input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_reduced.csv' + + annotation_tool = 'pyrodigal' + + run_arg_screening = false + run_amp_screening = false + run_bgc_screening = true + + bgc_run_hmmsearch = true + bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' +} diff --git a/conf/test_deeparg.config b/conf/test_deeparg.config deleted file mode 100644 index ff229340..00000000 --- a/conf/test_deeparg.config +++ /dev/null @@ -1,38 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test. - - Use as follows: - nextflow run nf-core/funcscan -profile test_deeparg, --outdir - - This executes DeepARG separately due to regular flakiness in database download server - ----------------------------------------------------------------------------------------- -*/ - -params { - config_profile_name = 'DeepARG test profile' - config_profile_description = 'Minimal test dataset to check DeepARG ARG screening function' - - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' - - // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/samplesheet.csv' - - annotation_tool = 'prodigal' - - run_arg_screening = true - arg_skip_fargene = true - arg_skip_rgi = true - arg_skip_amrfinderplus = true - arg_skip_abricate = true - arg_skip_deeparg = false - - run_amp_screening = false - run_bgc_screening = false -} diff --git a/conf/test_full.config b/conf/test_full.config index e8622529..9d16fb18 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -11,33 +11,38 @@ */ params { - config_profile_name = 'Full test profile' - config_profile_description = 'Full test dataset to check pipeline function' + config_profile_name = 'Full test profile' + config_profile_description = 'Full test dataset to check pipeline function' // Input data for full size test - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/samplesheet_full.csv' + input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_full.csv' // Database and annotation options - save_annotations = true + save_annotations = true // AMP params - run_amp_screening = true - amp_hmmsearch_models = 'https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/hmms/mybacteriocin.hmm' - amp_hmmsearch_savealignments = true - amp_hmmsearch_savedomains = true - amp_hmmsearch_savetargets = true - amp_skip_amplify = true + run_amp_screening = true + amp_run_hmmsearch = true + amp_hmmsearch_models = params.pipelines_testdata_base_path + 'funcscan/hmms/mybacteriocin.hmm' + amp_hmmsearch_savealignments = true + amp_hmmsearch_savedomains = true + amp_hmmsearch_savetargets = true + amp_skip_amplify = true // takes too long // ARG params - run_arg_screening = true - arg_skip_deeparg = false + run_arg_screening = true + arg_skip_deeparg = false // BGC params - run_bgc_screening = true - bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' - bgc_hmmsearch_savealignments = true - bgc_hmmsearch_savetargets = true - bgc_hmmsearch_savedomains = true - bgc_skip_deepbgc = true // takes too long - + run_bgc_screening = true + bgc_run_hmmsearch = true + bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' + bgc_hmmsearch_savealignments = true + bgc_hmmsearch_savetargets = true + bgc_hmmsearch_savedomains = true + bgc_skip_deepbgc = true // takes too long + bgc_mincontiglength = 1000 + bgc_savefilteredcontigs = true + bgc_skip_deepbgc = true + bgc_antismash_contigminlength = 1000 } diff --git a/conf/test_nothing.config b/conf/test_nothing.config index f5df5b3b..87a2e06b 100644 --- a/conf/test_nothing.config +++ b/conf/test_nothing.config @@ -7,7 +7,7 @@ Although in this case we turn everything off Use as follows: - nextflow run nf-core/funcscan -profile test, --outdir + nextflow run nf-core/funcscan -profile test_nothing, --outdir ---------------------------------------------------------------------------------------- */ @@ -18,16 +18,36 @@ params { // Limit resources so that this can run on GitHub Actions max_cpus = 2 - max_memory = '6.GB' + max_memory = '8.GB' max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/samplesheet.csv' - amp_hmmsearch_models = 'https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/hmms/mybacteriocin.hmm' - - annotation_tool = 'prodigal' - - run_arg_screening = false - run_amp_screening = false - run_bgc_screening = false + input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_reduced.csv' + amp_hmmsearch_models = params.pipelines_testdata_base_path + 'funcscan/hmms/mybacteriocin.hmm' + bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' + + annotation_tool = 'pyrodigal' + + run_arg_screening = false + run_amp_screening = false + run_bgc_screening = false + + arg_fargene_hmmmodel = 'class_a,class_b_1_2' + + amp_skip_amplify = true + amp_skip_macrel = true + amp_skip_ampir = true + amp_run_hmmsearch = false + + arg_skip_deeparg = true + arg_skip_fargene = true + arg_skip_rgi = true + arg_skip_amrfinderplus = true + arg_skip_deeparg = true + arg_skip_abricate = true + + bgc_skip_antismash = true + bgc_skip_deepbgc = true + bgc_skip_gecco = true + bgc_run_hmmsearch = false } diff --git a/conf/test_preannotated.config b/conf/test_preannotated.config new file mode 100644 index 00000000..38a5e1d1 --- /dev/null +++ b/conf/test_preannotated.config @@ -0,0 +1,33 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/funcscan -profile test_preannotated, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'ARG/AMP test profile - preannotated input' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_preannotated.csv' + + annotation_tool = 'pyrodigal' + + run_arg_screening = true + arg_fargene_hmmmodel = 'class_a,class_b_1_2' + + run_amp_screening = true + amp_run_hmmsearch = true + amp_hmmsearch_models = params.pipelines_testdata_base_path + 'funcscan/hmms/mybacteriocin.hmm' +} diff --git a/conf/test_bgc.config b/conf/test_preannotated_bgc.config similarity index 65% rename from conf/test_bgc.config rename to conf/test_preannotated_bgc.config index 07623670..039656d3 100644 --- a/conf/test_bgc.config +++ b/conf/test_preannotated_bgc.config @@ -5,13 +5,13 @@ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: - nextflow run nf-core/funcscan -profile test_bgc, --outdir + nextflow run nf-core/funcscan -profile test_preannotated_bgc, --outdir ---------------------------------------------------------------------------------------- */ params { - config_profile_name = 'BGC test profile' + config_profile_name = 'BGC test profile - preannotated input' config_profile_description = 'Minimal test dataset to check BGC workflow function' // Limit resources so that this can run on GitHub Actions @@ -20,12 +20,14 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/funcscan/samplesheet.csv' - bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' + input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_preannotated.csv' + + annotation_tool = 'pyrodigal' - annotation_tool = 'prodigal' + run_arg_screening = false + run_amp_screening = false + run_bgc_screening = true - run_arg_screening = false - run_amp_screening = false - run_bgc_screening = true + bgc_run_hmmsearch = true + bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' } diff --git a/conf/test_prokka.config b/conf/test_prokka.config new file mode 100644 index 00000000..eb346bcb --- /dev/null +++ b/conf/test_prokka.config @@ -0,0 +1,33 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/funcscan -profile test_prokka, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'AMP/ARG Prokka test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '8.GB' + max_time = '6.h' + + // Input data + input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_reduced.csv' + + annotation_tool = 'prokka' + + run_arg_screening = true + arg_fargene_hmmmodel = 'class_a,class_b_1_2' + + run_amp_screening = true + amp_run_hmmsearch = true + amp_hmmsearch_models = params.pipelines_testdata_base_path + 'funcscan/hmms/mybacteriocin.hmm' +} diff --git a/conf/test_taxonomy_bakta.config b/conf/test_taxonomy_bakta.config new file mode 100644 index 00000000..e7bc923d --- /dev/null +++ b/conf/test_taxonomy_bakta.config @@ -0,0 +1,50 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/funcscan -profile test_taxonomy_bakta, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Taxonomic classification test profile' + config_profile_description = 'Minimal test dataset to check taxonomic classification workflow function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '14.GB' + max_time = '6.h' + + // Input data + input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_reduced.csv' + amp_hmmsearch_models = params.pipelines_testdata_base_path + 'funcscan/hmms/mybacteriocin.hmm' + bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' + + run_taxa_classification = true + annotation_tool = 'bakta' + annotation_bakta_db_downloadtype = 'light' + + run_arg_screening = true + arg_skip_deeparg = true + arg_skip_amrfinderplus = true + + run_amp_screening = true + amp_run_hmmsearch = true + + run_bgc_screening = true + bgc_mincontiglength = 1000 + bgc_savefilteredcontigs = true + bgc_skip_deepbgc = true + bgc_antismash_contigminlength = 1000 + bgc_run_hmmsearch = true +} + +process { + withName: MMSEQS_DATABASES { + memory = '14.GB' + } +} diff --git a/conf/test_taxonomy_prokka.config b/conf/test_taxonomy_prokka.config new file mode 100644 index 00000000..39eefdfc --- /dev/null +++ b/conf/test_taxonomy_prokka.config @@ -0,0 +1,49 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/funcscan -profile test_taxonomy_prokka, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Taxonomic classification test profile' + config_profile_description = 'Minimal test dataset to check taxonomic classification workflow function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '14.GB' + max_time = '6.h' + + // Input data + input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_reduced.csv' + bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' + amp_hmmsearch_models = params.pipelines_testdata_base_path + 'funcscan/hmms/mybacteriocin.hmm' + + run_taxa_classification = true + annotation_tool = 'prokka' + + run_arg_screening = true + arg_skip_deeparg = true + arg_skip_amrfinderplus = true + + run_amp_screening = true + amp_run_hmmsearch = true + + run_bgc_screening = true + bgc_mincontiglength = 1000 + bgc_savefilteredcontigs = true + bgc_skip_deepbgc = true + bgc_antismash_contigminlength = 1000 + bgc_run_hmmsearch = true +} + +process { + withName: MMSEQS_DATABASES { + memory = '14.GB' + } +} diff --git a/conf/test_taxonomy_pyrodigal.config b/conf/test_taxonomy_pyrodigal.config new file mode 100644 index 00000000..4ad970f9 --- /dev/null +++ b/conf/test_taxonomy_pyrodigal.config @@ -0,0 +1,49 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/funcscan -profile test_taxonomy_pyrodigal, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Taxonomic classification test profile' + config_profile_description = 'Minimal test dataset to check taxonomic classification workflow function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '14.GB' + max_time = '6.h' + + // Input data + input = params.pipelines_testdata_base_path + 'funcscan/samplesheet_reduced.csv' + bgc_hmmsearch_models = 'https://raw.githubusercontent.com/antismash/antismash/fd61de057e082fbf071732ac64b8b2e8883de32f/antismash/detection/hmm_detection/data/ToyB.hmm' + amp_hmmsearch_models = params.pipelines_testdata_base_path + 'funcscan/hmms/mybacteriocin.hmm' + + run_taxa_classification = true + annotation_tool = 'pyrodigal' + + run_arg_screening = true + arg_skip_deeparg = true + arg_skip_amrfinderplus = true + + run_amp_screening = true + amp_run_hmmsearch = true + + run_bgc_screening = true + bgc_mincontiglength = 1000 + bgc_savefilteredcontigs = true + bgc_skip_deepbgc = true + bgc_antismash_contigminlength = 1000 + bgc_run_hmmsearch = true +} + +process { + withName: MMSEQS_DATABASES { + memory = '14.GB' + } +} diff --git a/docs/images/funcscan_metro_workflow.png b/docs/images/funcscan_metro_workflow.png index 368a8c34..7fda2756 100644 Binary files a/docs/images/funcscan_metro_workflow.png and b/docs/images/funcscan_metro_workflow.png differ diff --git a/docs/images/funcscan_metro_workflow.svg b/docs/images/funcscan_metro_workflow.svg index 997ea2cb..c9d291fb 100644 --- a/docs/images/funcscan_metro_workflow.svg +++ b/docs/images/funcscan_metro_workflow.svg @@ -2,17 +2,17 @@ tsvcsvjsonvcftsvvcftsvhAMRonizationABRicateAMRFinderPlusDeepARGfARGeneRGIgunzipMMseqs2SeqKitProkkaProdigalPyrodigalBaktaAMPcombicomBGCSummaryhAMRonizationantiSMASHABRicateGECCOAMRFinderPlusDeepBGChmmsearchhmmsearchDeepARGampirfARGeneAMPlifyRGIMacrelv2.0vcfvcffastavcfvcfgbkvcfvcffastafaagunzipProkkaProdigalPyrodigalBaktaLEGENDStartModuleDirectionBiosynthetic Gene Clusters (BGCs)Antibiotic Resistance Genes (ARGs)Aggregation ModulesAntimicrobial Peptide Genes (AMPs)Annotation FilesantiSMASHcomBGCAMPcombiGECCODeepBGCampirAMPlifyhmmsearchMacrel()vcfvcfv1.1.4tsvargNorm + id="g6322" />LEGENDAntimicrobial Peptide Genes (AMPs)Biosynthetic Gene Clusters (BGCs)Antibiotic Resistance Genes (ARGs)Taxonomic ClassificationScreening ToolsPreprocessing ToolsPostprocessing ToolsOptional Input() diff --git a/docs/images/nf-core-funcscan_logo_dark.png b/docs/images/nf-core-funcscan_logo_dark.png index 0437446c..9c131578 100644 Binary files a/docs/images/nf-core-funcscan_logo_dark.png and b/docs/images/nf-core-funcscan_logo_dark.png differ diff --git a/docs/images/nf-core-funcscan_logo_light.png b/docs/images/nf-core-funcscan_logo_light.png index f00d53e8..10c5788e 100644 Binary files a/docs/images/nf-core-funcscan_logo_light.png and b/docs/images/nf-core-funcscan_logo_light.png differ diff --git a/docs/output.md b/docs/output.md index d515d1f5..9f71278a 100644 --- a/docs/output.md +++ b/docs/output.md @@ -4,23 +4,25 @@ The output of nf-core/funcscan provides reports for each of the functional groups: -- antibiotic resistance genes (tools: [ABRicate](https://github.com/tseemann/abricate), [AMRFinderPlus](https://www.ncbi.nlm.nih.gov/pathogens/antimicrobial-resistance/AMRFinder), [DeepARG](https://bitbucket.org/gusphdproj/deeparg-ss/src/master), [fARGene](https://github.com/fannyhb/fargene), [RGI](https://card.mcmaster.ca/analyze/rgi) – summarised by [hAMRonization](https://github.com/pha4ge/hAMRonization)) -- antimicrobial peptides (tools: [Macrel](https://github.com/BigDataBiology/macrel), [AMPlify](https://github.com/bcgsc/AMPlify), [ampir](https://ampir.marine-omics.net), [hmmsearch](http://hmmer.org) – summarised by [AMPcombi](https://github.com/Darcy220606/AMPcombi)) -- biosynthetic gene clusters (tools: [antiSMASH](https://docs.antismash.secondarymetabolites.org), [DeepBGC](https://github.com/Merck/deepbgc), [GECCO](https://gecco.embl.de), [hmmsearch](http://hmmer.org) – summarised by [comBGC](#combgc)) +- **antibiotic resistance genes** (tools: [ABRicate](https://github.com/tseemann/abricate), [AMRFinderPlus](https://www.ncbi.nlm.nih.gov/pathogens/antimicrobial-resistance/AMRFinder), [DeepARG](https://bitbucket.org/gusphdproj/deeparg-ss/src/master), [fARGene](https://github.com/fannyhb/fargene), [RGI](https://card.mcmaster.ca/analyze/rgi) – summarised by [hAMRonization](https://github.com/pha4ge/hAMRonization). Results from ABRicate, AMRFinderPlus, and DeepARG are normalised to [ARO](https://obofoundry.org/ontology/aro.html) by [argNorm](https://github.com/BigDataBiology/argNorm).) +- **antimicrobial peptides** (tools: [Macrel](https://github.com/BigDataBiology/macrel), [AMPlify](https://github.com/bcgsc/AMPlify), [ampir](https://ampir.marine-omics.net), [hmmsearch](http://hmmer.org) – summarised by [AMPcombi](https://github.com/Darcy220606/AMPcombi)) +- **biosynthetic gene clusters** (tools: [antiSMASH](https://docs.antismash.secondarymetabolites.org), [DeepBGC](https://github.com/Merck/deepbgc), [GECCO](https://gecco.embl.de), [hmmsearch](http://hmmer.org) – summarised by [comBGC](#combgc)) -As a general workflow, we recommend to first look at the summary reports ([ARGs](#hamronization), [AMPs](#ampcombi), [BGCs](#combgc)), to get a general overview of what hits have been found across all the tools of each functional group. After which, you can explore the specific output directories of each tool to get more detailed information about each result. The tool-specific output directories also includes the output from the functional annotation steps of either [prokka](https://github.com/tseemann/prokka), [pyrodigal](https://github.com/althonos/pyrodigal), [prodigal](https://github.com/hyattpd/Prodigal), or [Bakta](https://github.com/oschwengers/bakta) if the `--save_annotations` flag was set. +As a general workflow, we recommend to first look at the summary reports ([ARGs](#hamronization), [AMPs](#ampcombi), [BGCs](#combgc)), to get a general overview of what hits have been found across all the tools of each functional group. After which, you can explore the specific output directories of each tool to get more detailed information about each result. The tool-specific output directories also includes the output from the functional annotation steps of either [prokka](https://github.com/tseemann/prokka), [pyrodigal](https://github.com/althonos/pyrodigal), [prodigal](https://github.com/hyattpd/Prodigal), or [Bakta](https://github.com/oschwengers/bakta) if the `--save_annotations` flag was set. Additionally, taxonomic classifications from [MMseqs2](https://github.com/soedinglab/MMseqs2) are saved if the `--taxa_classification_mmseqs_db_savetmp` and `--taxa_classification_mmseqs_taxonomy_savetmp` flags are set. -Similarly, all downloaded databases are saved (i.e. from [antiSMASH](https://docs.antismash.secondarymetabolites.org), [AMRFinderPlus](https://www.ncbi.nlm.nih.gov/pathogens/antimicrobial-resistance/AMRFinder), [Bakta](https://github.com/oschwengers/bakta), [DeepARG](https://bitbucket.org/gusphdproj/deeparg-ss/src/master), and/or [AMPcombi](https://github.com/Darcy220606/AMPcombi)) into the output directory `/downloads/` if the `--save_databases` flag was set. +Similarly, all downloaded databases are saved (i.e. from [MMseqs2](https://github.com/soedinglab/MMseqs2), [antiSMASH](https://docs.antismash.secondarymetabolites.org), [AMRFinderPlus](https://www.ncbi.nlm.nih.gov/pathogens/antimicrobial-resistance/AMRFinder), [Bakta](https://github.com/oschwengers/bakta), [DeepARG](https://bitbucket.org/gusphdproj/deeparg-ss/src/master), [RGI](https://github.com/arpcard/rgi), and/or [AMPcombi](https://github.com/Darcy220606/AMPcombi)) into the output directory `/databases/` if the `--save_db` flag was set. Furthermore, for reproducibility, versions of all software used in the run is presented in a [MultiQC](http://multiqc.info) report. The directories listed below will be created in the results directory (specified by the `--outdir` flag) after the pipeline has finished. All paths are relative to this top-level output directory. The default directory structure of nf-core/funcscan is: -```console +```tree results/ +├── taxonomic_classification/ +| └── mmseqs_createtsv/ ├── annotation/ | ├── bakta/ -| ├── prodigal +| ├── prodigal/ | ├── prokka/ | └── pyrodigal/ ├── amp/ @@ -33,16 +35,19 @@ results/ | ├── amrfinderplus/ | ├── deeparg/ | ├── fargene/ +| ├── rgi/ | ├── hamronization/ -| └── rgi/ +| └── argnorm/ ├── bgc/ | ├── antismash/ | ├── deepbgc/ | ├── gecco/ | └── hmmsearch/ +├── qc/ +| └── seqkit/ ├── reports/ | ├── ampcombi/ -| ├── comBGC/ +| ├── combgc/ | └── hamronization_summarize/ ├── databases/ ├── multiqc/ @@ -54,6 +59,14 @@ work/ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes prokaryotic sequence data through the following steps: +Input contig QC with: + +- [SeqKit](https://bioinf.shenwei.me/seqkit/) (default) - for separating into long- and short- categories + +Taxonomy classification of nucleotide sequences with: + +- [MMseqs2](https://github.com/soedinglab/MMseqs2) (default) - for contig taxonomic classification using 2bLCA. + ORF prediction and annotation with any of: - [Pyrodigal](#pyrodigal) (default) – for open reading frame prediction. @@ -85,14 +98,32 @@ Biosynthetic Gene Clusters (BGCs): Output Summaries: -- [AMPcombi](#ampcombi) – summary of antimicrobial peptide gene output from various detection tools. +- [AMPcombi](#ampcombi) – summary report of antimicrobial peptide gene output from various detection tools. - [hAMRonization](#hamronization) – summary of antimicrobial resistance gene output from various detection tools. +- [argNorm](#argNorm) - Normalize ARG annotations from [ABRicate](#abricate), [AMRFinderPlus](#amrfinderplus), and [DeepARG](#deeparg) to the ARO - [comBGC](#combgc) – summary of biosynthetic gene cluster output from various detection tools. - [MultiQC](#multiqc) – report of all software and versions used in the pipeline. - [Pipeline information](#pipeline-information) – report metrics generated during the workflow execution. ## Tool details +### Taxonomic classification tools + +[MMseqs2](#mmseqs2) + +#### MMseqs2 + +
+Output files + +- `taxonomic_classification/mmseqs2_createtsv/` + - `/`: + - `*.tsv`: tab-separated table containing the taxonomic lineage of every contig. When a contig cannot be classified according to the database, it is assigned in the 'lineage' column as 'no rank | unclassified'. +- `reports//_complete_summary_taxonomy.tsv.gz`: tab-separated table containing the concatenated results from the summary tables along with the taxonomic classification if the parameter `--run_taxa_classification` is called. +
+ +[MMseqs2](https://github.com/soedinglab/MMseqs2) classifies the taxonomic lineage of contigs based on the last common ancestor. The inferred taxonomic lineages are included in the final workflow summaries to annotate the potential source bacteria of the identified AMPs, ARGs, and/or BGCs. + ### Annotation tools [Pyrodigal](#pyrodigal), [Prodigal](#prodigal), [Prokka](#prokka), [Bakta](#bakta) @@ -103,11 +134,11 @@ Output Summaries: Output files - `prodigal/` - - `/`: - - `*.gff`: annotation in GFF3 format, containing both sequences and annotations - - `*.fna`: nucleotide FASTA file of the input contig sequences - - `*.faa`: protein FASTA file of the translated CDS sequences - - `*.gbk`: annotation in GBK format, containing both sequences and annotations + - `category/`: indicates whether annotation files are of all contigs or `long`-only contigs (BGC subworkflow only) + - `/`: + - `*.fna`: nucleotide FASTA file of the input contig sequences + - `*.faa`: protein FASTA file of the translated CDS sequences + - `*.gbk`: annotation in GBK format, containing both sequences and annotations > Descriptions taken from the [Prodigal documentation](https://github.com/hyattpd/prodigal/wiki) @@ -121,10 +152,11 @@ Output Summaries: Output files - `pyrodigal/` - - `/`: - - `*.gff`: annotation in GFF3 format, containing both sequences and annotations - - `*.fna`: nucleotide FASTA file of the input contig sequences - - `*.faa`: protein FASTA file of the translated CDS sequences + - `category/`: indicates whether annotation files are of all contigs or `long`-only contigs (BGC subworkflow only) + - `/`: + - `*.gbk`: annotation in GBK format, containing both sequences and annotations + - `*.fna`: nucleotide FASTA file of the annotated CDS sequences + - `*.faa`: protein FASTA file of the translated CDS sequences > Descriptions taken from the [Pyrodigal documentation](https://pyrodigal.readthedocs.io/) @@ -138,19 +170,20 @@ Output Summaries: Output files - `prokka/` - - `/` - - `*.gff`: annotation in GFF3 format, containing both sequences and annotations - - `*.gbk`: standard Genbank file derived from the master .gff - - `*.fna`: nucleotide FASTA file of the input contig sequences - - `*.faa`: protein FASTA file of the translated CDS sequences - - `*.ffn`: nucleotide FASTA file of all the prediction transcripts (CDS, rRNA, tRNA, tmRNA, misc_RNA) - - `*.sqn`: an ASN1 format "Sequin" file for submission to Genbank - - `*.fsa`: nucleotide FASTA file of the input contig sequences, used by "tbl2asn" to create the .sqn file - - `*.tbl`: feature Table file, used by "tbl2asn" to create the .sqn file - - `*.err`: unacceptable annotations - the NCBI discrepancy report - - `*.log`: logging output that Prokka produced during its run - - `*.txt`: statistics relating to the annotated features found - - `*.tsv`: tab-separated file of all features + - `category/`: indicates whether annotation files are of all contigs or `long`-only contigs (BGC subworkflow only) + - `/` + - `*.gff`: annotation in GFF3 format, containing both sequences and annotations + - `*.gbk`: standard Genbank file derived from the master .gff + - `*.fna`: nucleotide FASTA file of the input contig sequences + - `*.faa`: protein FASTA file of the translated CDS sequences + - `*.ffn`: nucleotide FASTA file of all the prediction transcripts (CDS, rRNA, tRNA, tmRNA, misc_RNA) + - `*.sqn`: an ASN1 format "Sequin" file for submission to Genbank + - `*.fsa`: nucleotide FASTA file of the input contig sequences, used by "tbl2asn" to create the .sqn file + - `*.tbl`: feature Table file, used by "tbl2asn" to create the .sqn file + - `*.err`: unacceptable annotations - the NCBI discrepancy report + - `*.log`: logging output that Prokka produced during its run + - `*.txt`: statistics relating to the annotated features found + - `*.tsv`: tab-separated file of all features > Descriptions directly from the [Prokka documentation](https://github.com/tseemann/prokka#output-files) @@ -164,18 +197,20 @@ Output Summaries: Output files - `bakta/` - - `.gff3`: annotations & sequences in GFF3 format - - `.gbff`: annotations & sequences in (multi) GenBank format - - `.ffn`: feature nucleotide sequences as FASTA - - `.fna`: replicon/contig DNA sequences as FASTA - - `.embl`: annotations & sequences in (multi) EMBL format - - `.faa`: CDS/sORF amino acid sequences as FASTA - - `_hypothetical.faa`: further information on hypothetical protein CDS as simple human readble tab separated values - - `_hypothetical.tsv`: hypothetical protein CDS amino acid sequences as FASTA - - `.tsv`: annotations as simple human readble TSV - - `.txt`: summary in TXT format - -> Descriptions directly from the [Bakta documentation](https://github.com/oschwengers/bakta#output). + - `category/`: indicates whether annotation files are of all contigs or `long`-only contigs (BGC only) + - `` + - `.gff3`: annotations & sequences in GFF3 format + - `.gbff`: annotations & sequences in (multi) GenBank format + - `.ffn`: feature nucleotide sequences as FASTA + - `.fna`: replicon/contig DNA sequences as FASTA + - `.embl`: annotations & sequences in (multi) EMBL format + - `.faa`: CDS/sORF amino acid sequences as FASTA + - `_hypothetical.faa`: further information on hypothetical protein CDS as simple human readble tab separated values + - `_hypothetical.tsv`: hypothetical protein CDS amino acid sequences as FASTA + - `.tsv`: annotations as simple human readble TSV + - `.txt`: summary in TXT format + +> Descriptions taken from the [Bakta documentation](https://github.com/oschwengers/bakta#output). @@ -243,7 +278,7 @@ Output Summaries: ### ARG detection tools -[ABRicate](#abricate), [AMRFinderPlus](#amrfinderplus), [DeepARG](#deeparg), [fARGene](#fargene), [RGI](#rgi) +[ABRicate](#abricate), [AMRFinderPlus](#amrfinderplus), [DeepARG](#deeparg), [fARGene](#fargene), [RGI](#rgi). #### ABRicate @@ -292,7 +327,7 @@ Output Summaries: - `fargene/` - `fargene_analysis.log`: logging output that fARGene produced during its run - `/`: - - `hmmsearchresults/`: output from intermediate hmmsearch step (only if `--arg_fargene_savetmpfiles` supplied) + - `hmmsearchresults/`: output from intermediate hmmsearch step - `predictedGenes/`: - `*-filtered.fasta`: nucleotide sequences of predicted ARGs - `*-filtered-peptides.fasta`: amino acid sequences of predicted ARGs @@ -328,7 +363,22 @@ Output Summaries: ### BGC detection tools -[antiSMASH](#antismash), [deepBGC](#deepbgc), [GECCO](#gecco), [hmmsearch](#hmmsearch) +[antiSMASH](#antismash), [deepBGC](#deepbgc), [GECCO](#gecco), [hmmsearch](#hmmsearch). + +Note that the BGC tools are run on a set of annotations generated on only long contigs (3000 bp or longer) by default. These specific filtered FASTA files are under `bgc/seqkit/`, and annotations files are under `annotation//long/`, if the corresponding saving flags are specified (see [parameter docs](https://nf-co.re/funcscan/parameters)). However the same annotations _should_ also be annotation files in the sister `all/` directory. + +### Input contig QC + +
+Output files + +- `seqkit/` + - `_long.fasta`: FASTA file containing contigs equal or longer than the threshold set by `--contig_qc_lengththreshold` used in BGC subworkflow +
+ +[SeqKit](https://bioinf.shenwei.me/seqkit/) is a cross-platform and ultrafast toolkit for FASTA/Q file manipulation. + +Note that filtered FASTA is only used for BGC workflow for run-time optimisation and biological reasons. All contigs are otherwise screened in ARG/AMP workflows. #### antiSMASH @@ -395,7 +445,7 @@ Output Summaries: ### Summary tools -[AMPcombi](#ampcombi), [hAMRonization](#hamronization), [comBGC](#combgc), [MultiQC](#multiqc), [pipeline information](#pipeline-information) +[AMPcombi](#ampcombi), [hAMRonization](#hamronization), [comBGC](#combgc), [MultiQC](#multiqc), [pipeline information](#pipeline-information), [argNorm](#argnorm). #### AMPcombi @@ -403,16 +453,19 @@ Output Summaries: Output files - `ampcombi/` - - `ampcombi_complete_summary.csv.gz`: summarised output from all AMP workflow tools (except hmmer_hmmsearch) in compressed csv format - - `ampcombi.log`: a log file generated by ampcombi - - `*_ampcombi.csv`: summarised output in csv for each sample - - `*_amp.faa*`: fasta file containing the amino acid sequences for all AMP hits for each sample - - `*_diamond_matches.txt*`: alignment file generated by DIAMOND for each sample - - - -
-AMP summary table header descriptions + - `Ampcombi_summary.tsv`: tab-separated table containing the concatenated and filtered results from each AMPcombi summary table. This is the output given when the taxonomic classification is not activated (pipeline default). + - `Ampcombi_parse_tables.log`: log file containing the run information from AMPcombi submodule `ampcombi2/parsetables` + - `Ampcombi_complete.log`: log file containing the run information from AMPcombi submodule `ampcombi2/complete` + - `Ampcombi_summary_cluster.tsv`: tab-separated table containing the clustered AMP hits. This is the output given when the taxonomic classification is not activated (pipeline default). + - `Ampcombi_summary_cluster_representative_seq.tsv`: tab-separated table containing the representative sequence of each cluster. This can be used in AMPcombi for constructing 3D structures using ColabFold. For more details on how to do this, please refer to the [AMPcombi documentation](https://github.com/Darcy220606/AMPcombi/blob/main/README.md). + - `Ampcombi_cluster.log`: log file containing the run information from AMPcombi submodule `ampcombi2/cluster` + - `ampcombi_complete_summary_taxonomy.tsv.gz`: summarised output from all AMP workflow tools with taxonomic assignment in compressed tsv format. This is the same output as `Ampcombi_summary_cluster.tsv` file but with taxonomic classification of the contig. + - `/contig_gbks`: contains all the contigs in gbk format that an AMP was found on using the custom parameters + - `/*_ampcombi.log`: a log file generated by AMPcombi + - `/*_ampcombi.tsv`: summarised output in tsv format for each sample + - `/*_amp.faa*`: fasta file containing the amino acid sequences for all AMP hits for each sample + - `/*_diamond_matches.txt*`: alignment file generated by DIAMOND for each sample + AMP summary table header descriptions | Table column | Description | | ------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | @@ -453,19 +506,21 @@ Output Summaries: | `Reference` | Citation of the associated publication if available | | `Author` | Authors' names associated with the publication or who have uploaded the peptide | | `Title` | Publication title if available | +| `...` | |
-[AMPcombi](https://github.com/Darcy220606/AMPcombi) summarizes the results of **antimicrobial peptide (AMP)** prediction tools (AMPIR, AMPLIFY, MACREL, and other non nf-core tools) into a single table and aligns the hits against a reference AMP database for functional and taxonomic classification. +[AMPcombi](https://github.com/Darcy220606/AMPcombi) summarizes the results of **antimicrobial peptide (AMP)** prediction tools (ampir, AMPlify, Macrel, and other non-nf-core tools) into a single table and aligns the hits against a reference AMP database for functional and taxonomic classification. It assigns the physiochemical properties (e.g. hydrophobicity, molecular weight) using the [Biopython toolkit](https://github.com/biopython/biopython). Additionally, it clusters the resulting AMP hits from all samples using [MMseqs2](https://github.com/soedinglab/MMseqs2). For further filtering for AMPs with signaling peptides, the output file `Ampcombi_summary_cluster.tsv` or `ampcombi_complete_summary_taxonomy.tsv.gz` can be used downstream as detailed [here](https://github.com/Darcy220606/AMPcombi/blob/main/README.md). #### hAMRonization
Output files -- `hamronization/` one of the following: +- `hamronization_summarize/` one of the following: - `hamronization_combined_report.json`: summarised output in .json format - - `hamronization_combined_report.tsv`: summarised output in .tsv format + - `hamronization_combined_report.tsv`: summarised output in .tsv format when the taxonomic classification is turned off (pipeline default). + - `hamronization_combined_report.tsv.gz`: summarised output in gzipped format when the taxonomic classification is turned on by `--run_taxa_classification`. - `hamronization_combined_report.html`: interactive output in .html format
@@ -515,13 +570,37 @@ Output Summaries: [hAMRonization](https://github.com/pha4ge/hAMRonization) summarizes the outputs of the **antimicrobial resistance gene** detection tools (ABRicate, AMRFinderPlus, DeepARG, fARGene, RGI) into a single unified tabular format. It supports a variety of summary options including an interactive summary. +#### argNorm + +
+Output files + +- `normalized/` + - `*.{tsv}`: search results in tabular format +
+
+ ARG summary table headers + +| Table column | Description | +| ---------------------------- | -------------------------------------------------------------------------------- | +| `ARO` | ARO accessions of ARG | +| `confers_resistance_to` | ARO accessions of drugs to which ARGs confer resistance to | +| `resistance_to_drug_classes` | ARO accessions of drugs classes to which drugs in `confers_resistance_to` belong | + +
+ +[argnorm](https://github.com/BigDataBiology/argNorm) is a tool to normalize antibiotic resistance genes (ARGs) by mapping them to the antibiotic resistance ontology ([ARO](https://obofoundry.org/ontology/aro.html)) created by the CARD database. argNorm also enhances antibiotic resistance gene annotations by providing categorization of the drugs that antibiotic resistance genes confer resistance to. + +argNorm takes the outputs of the [hAMRonization](#hamronization) tool of [ABRicate](#abricate), [AMRFinderPlus](#amrfinderplus), and [DeepARG](#deeparg) and normalizes ARGs in the hAMRonization output to the ARO. + #### comBGC
Output files - `comBGC/` - - `combgc_complete_summary.tsv`: summarised output from all BGC detection tools used in tsv format (all samples concatenated). + - `combgc_complete_summary.tsv`: summarised output from all BGC detection tools used in tsv format (all samples concatenated). This is the output given when the taxonomic classification is not activated (pipeline default). + - `combgc_complete_summary.tsv.gz`: summarised output in gzipped format from all BGC detection tools used in tsv format (all samples concatenated) along with the taxonomic classification obtained when `--run_taxa_classification` is activated. - `*/combgc_summary.tsv`: summarised output from all applied BGC detection tools in tsv format for each sample.
diff --git a/docs/usage.md b/docs/usage.md index b5fb3dd7..6c3c1088 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -6,7 +6,7 @@ ## Introduction -nf-core/funcscan is a pipeline for efficient and parallelised screening of long nucleotide sequences such as contigs for antimicrobial peptide genes, antimicrobial resistance genes, and biosynthetic gene clusters. +nf-core/funcscan is a pipeline for efficient and parallelised screening of long nucleotide sequences such as contigs for antimicrobial peptide genes, antimicrobial resistance genes, and biosynthetic gene clusters. It can additionally identify the taxonomic origin of the sequences. ## Running the pipeline @@ -18,13 +18,14 @@ nextflow run nf-core/funcscan --input samplesheet.csv --outdir -profile This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. -To run any of the three screening workflows (AMP, ARG, and/or BGC), switch them on by adding the respective flag(s) to the command: +To run any of the three screening workflows (AMP, ARG, and/or BGC) or taxonomic classification, switch them on by adding the respective flag(s) to the command: - `--run_amp_screening` - `--run_arg_screening` - `--run_bgc_screening` +- `--run_taxa_classification` -When switched on, all tools of the given workflow will be run by default. If you don't need specific tools, you can explicitly skip them. +When switched on, all tools of the given workflow will be run by default. If you don't need specific tools, you can explicitly skip them. The exception is HMMsearch, which needs to be explicitly switched on and provided with HMM screening files (AMP and BGC workflows, see [parameter documentation](/funcscan/parameters)). For the taxonomic classification, MMseqs2 is currently the only tool implemented in the pipline. **Example:** You want to run AMP and ARG screening but you don't need the DeepARG tool of the ARG workflow and the Macrel tool of the AMP workflow. Your command would be: @@ -43,6 +44,31 @@ work # Directory containing temporary files required for the run # Other nextflow hidden files, eg. history of pipeline runs and old logs ``` +If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file. + +Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. + +:::warning +Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). +::: + +The above pipeline run specified with a params file in yaml format: + +```bash +nextflow run nf-core/funcscan -profile docker -params-file params.yaml +``` + +with `params.yaml` containing: + +```yaml +input: './samplesheet.csv' +outdir: './results/' +genome: 'GRCh37' +<...> +``` + +You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). + ## Samplesheet input nf-core/funcscan takes FASTA files as input, typically contigs or whole genome sequences. To supply these to the pipeline, you will need to create a samplesheet with information about the samples you would like to analyse. Use this parameter to specify its location. @@ -51,38 +77,72 @@ nf-core/funcscan takes FASTA files as input, typically contigs or whole genome s --input '[path to samplesheet file]' ``` -The input samplesheet has to be a comma-separated file (`.csv`) with 2 columns (`sample`, and `fasta`), and a header row as shown in the examples below. +The input samplesheet has to be a comma-separated file (`.csv`) with 2 (`sample`, and `fasta`) or 4 columns (`sample`, `fasta`, `protein`, `gbk`), and a header row as shown in the examples below. -```bash +If you already have annotated contigs with peptide sequences and an annotation file in Genbank format (`.gbk.` or `.gbff`), you can supply these to the pipeline using the optional `protein` and `gbk` columns. If these additional columns are supplied, pipeline annotation (i.e. with bakta, prodigal, pyrodigal or prokka) will be skipped and the corresponding annotation files used instead. + +For two columns (without pre-annotated data): + +```csv title="samplesheet.csv" sample,fasta sample_1,///wastewater_metagenome_contigs_1.fasta.gz sample_2,///wastewater_metagenome_contigs_2.fasta.gz ``` -| Column | Description | -| -------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `sample` | Custom sample name. This will be used to name all output files from the pipeline. Spaces in sample names are automatically converted to underscores (`_`). | -| `fasta` | Path or URL to a gzipped or uncompressed FASTA file. Accepted file suffixes are: `.fasta`, `.fna`, or `.fa`, or any of these with `.gz`, e.g. `.fa.gz`. | +For four columns (with pre-annotated data): + +```csv title="samplesheet.csv" +sample,fasta,protein,gbk +sample_1,///wastewater_metagenome_contigs_1.fasta.gz,///wastewater_metagenome_contigs_1.faa,///wastewater_metagenome_contigs_1.fasta.gbk +sample_2,///wastewater_metagenome_contigs_2.fasta.gz,///wastewater_metagenome_contigs_2.faa,///wastewater_metagenome_contigs_2.fasta.gbk +``` + +| Column | Description | +| --------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. This will be used to name all output files from the pipeline. Spaces in sample names are automatically converted to underscores (`_`). | +| `fasta` | Path or URL to a gzipped or uncompressed FASTA file. Accepted file suffixes are: `.fasta`, `.fna`, or `.fa`, or any of these with `.gz`, e.g. `.fa.gz`. | +| `protein` | Optional path to a pre-generated amino acid FASTA file (`.faa`) containing protein annotations of `fasta`, optionally gzipped. Required to be supplied if `gbk` also given. | +| `gbk` | Optional path to a pre-generated annotation file in Genbank format (`.gbk`, or `.gbff`) format containing annotations information of `fasta`, optionally gzipped. Required to be supplied if `protein` is also given. | An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. -> ⚠️ We highly recommend performing quality control on input contigs before running the pipeline. You may not receive results for some tools if none of the contigs in a FASTA file reach certain thresholds. Check parameter documentation for relevant minimum contig parameters. +:::danger +We highly recommend performing quality control on input contigs before running the pipeline. You may not receive results for some tools if none of the contigs in a FASTA file reach certain thresholds. Check parameter documentation for relevant minimum contig parameters. -## Notes on screening tools +For example, ideally BGC screening requires contigs of at least 3,000 bp else downstream tools may crash. +::: + +## Notes on screening tools and taxonomic classification The implementation of some tools in the pipeline may have some particular behaviours that you should be aware of before you run the pipeline. -### antiSMASH +### MMseqs2 -antiSMASH has a minimum contig parameter, in which only contigs of a certain length (or longer) will be screened. In cases where no hits are found in these, the tool ends successfully without hits. However if no contigs in an input file reach that minimum threshold, the tool will end with a 'failure' code, and cause the pipeline to crash. +MMseqs2 is currently the only taxonomic classification tool used in the pipeline to assign a taxonomic lineage to the input contigs. The database used to assign the taxonomic lineage can either be: + +- A custom based database created by the user using `mmseqs createdb` externally and beforehand. If this flag is assigned, this database takes precedence over the default database in `--mmseqs_db_id`. + + ```bash + --taxa_classification_mmseqs_db '///' + ``` -To prevent entire pipeline failures due to a single 'bad sample', nf-core/funcscan will filter out any input sample in which none of the contigs reach the minimum contig length in bp specified with `--bgc_antismash_sampleminlength` (default: 1000). + The contents of the directory should have files such as `.version` and `.taxonomy` in the top level. -> ⚠️ If a sample does not reach this contig length threshold, you will receive a warning in your console and in the `.nextflow.log` file, and no result files will exist for this sample in your results directory for this tool. +- An MMseqs2 ready database. These databases were compiled by the developers of MMseqs2 and can be called using their labels. All available options can be found [here](https://github.com/soedinglab/MMseqs2/wiki#downloading-databases). Only use those databases that have taxonomy files available (i.e., Taxonomy == Yes). By default mmseqs2 in the pipeline uses '[Kalamari](https://github.com/lskatz/Kalamari)', and runs an aminoacid based alignment. However, if the user requires a more comprehensive taxonomic classification, we recommend the use of [GTDB](https://gtdb.ecogenomic.org/), but for that please remember to increase the memory, CPU threads and time required for the process `MMSEQS_TAXONOMY`. + + ```bash + --taxa_classification_mmseqs_db_id 'Kalamari' + ``` + +### antiSMASH + +antiSMASH has a minimum contig parameter, in which only contigs of a certain length (or longer) will be screened. In cases where no hits are found in these, the tool ends successfully without hits. However if no contigs in an input file reach that minimum threshold, the tool will end with a 'failure' code, and cause the pipeline to crash. When the annotation is run with Prokka, the resulting `.gbk` file passed to antiSMASH may produce the error `translation longer than location allows` and end the pipeline run. This Prokka bug has been reported before (see [discussion on GitHub](https://github.com/antismash/antismash/discussions/450)) and is not likely to be fixed soon. -> ⚠️ If antiSMASH is run for BGC detection, we recommend to **not** run Prokka for annotation but instead use the default annotation tool (Pyrodigal) or switch to Prodigal, or (for bacteria only!) Bakta. +:::warning +If antiSMASH is run for BGC detection, we recommend to **not** run Prokka for annotation but instead use the default annotation tool (Pyrodigal) or switch to Prodigal, or (for bacteria only!) Bakta. +::: ## Databases and reference files @@ -90,9 +150,11 @@ Various tools of nf-core/funcscan use databases and reference files to operate. nf-core/funcscan offers the functionality to auto-download databases for you, and as these databases can be very large, and we suggest to store these files in a central place from where you can reuse them across pipeline runs. -We **highly recommend** allowing the pipeline to download these databases for you on a first run, saving these to your results directory with `--save_databases`, then moving these to a different location (in case you wish to delete the results directory of this first run). An exception to this is HMM files where no auto-downloading functionality is possible. +If your infrastructure has internet access (particularly on compute nodes), we **highly recommend** allowing the pipeline to download these databases for you on a first run, saving these to your results directory with `--save_db`, then moving these to a different location (in case you wish to delete the results directory of this first run). An exception to this is HMM files where no auto-downloading functionality is possible. -> ⚠️ We generally do not recommend downloading the databases yourself, as this can often be non-trivial to do! +:::warning +We generally do not recommend downloading the databases yourself, as this can often be non-trivial to do! +::: As a reference, we will describe below where and how you can obtain databases and reference files used for tools included in the pipeline. @@ -100,7 +162,19 @@ As a reference, we will describe below where and how you can obtain databases an nf-core/funcscan offers multiple tools for annotating input sequences. Bakta is a new tool touted as a bacteria-only successor to the well-established Prokka. -To supply the preferred Bakta database (and not have the pipeline download it for every new run), use the flag `--annotation_bakta_db_localpath`. The full or light Bakta database must be downloaded from the Bakta Zenodo archive, the link of which can be found on the [Bakta GitHub repository](https://github.com/oschwengers/bakta#database-download). +To supply the preferred Bakta database (and not have the pipeline download it for every new run), use the flag `--annotation_bakta_db`. +The full or light Bakta database must be downloaded from the Bakta Zenodo archive. + +You can do this by installing via conda and using the dedicated command + +```bash +conda create -n bakta -c bioconda bakta +conda activate bakta + +bakta_db download --output --type +``` + +Alternatively, you can manually download the files via the links which can be found on the [Bakta GitHub repository](https://github.com/oschwengers/bakta#database-download). Once downloaded this must be untarred: @@ -111,45 +185,127 @@ tar xvzf db.tar.gz And then passed to the pipeline with: ```bash ---annotation_bakta_db_localpath ///db/ +--annotation_bakta_db //// ``` -> ℹ️ The flag `--save_databases` saves the pipeline-downloaded databases in your results directory. You can then move these to a central cache directory of your choice for re-use in the future. +The contents of the directory should have files such as `*.dmnd` in the top level. + +:::info +The flag `--save_db` saves the pipeline-downloaded databases in your results directory. You can then move these to a central cache directory of your choice for re-use in the future. +::: ### hmmsearch nf-core/funcscan allows screening of sequences for functional genes associated with various natural product types via Hidden Markov Models (HMMs) using hmmsearch. -This requires supplying a list of HMM files ending in `.hmm`, that have models for the particular molecule(s) or BGCs you are interested in. You can download these files from places such as [PFAM](https://www.ebi.ac.uk/interpro/download/Pfam/) for antimicrobial peptides (AMP), or the antiSMASH GitHub repository for [biosynthetic gene cluster](https://github.com/antismash/antismash/tree/master/antismash/detection/hmm_detection/data) related HMMs, or create them yourself. +This requires supplying a list of HMM files ending in `.hmm`, that have models for the particular molecule(s) or BGCs you are interested in. +You can download these files from places such as [PFAM](https://www.ebi.ac.uk/interpro/download/Pfam/) for antimicrobial peptides (AMP), or the antiSMASH GitHub repository for [biosynthetic gene cluster](https://github.com/antismash/antismash/tree/master/antismash/detection/hmm_detection/data) related HMMs, or create them yourself. + +You should place all HMMs in a directory, supply them to the AMP or BGC workflow and switch hmmsearch on: + +```bash +--amp_run_hmmsearch --amp_hmmsearch_models "////*.hmm" +``` + +:::warning +Ensure to wrap this path in double quotes if using an asterisk, to ensure Nextflow (not your shell) parses the wildcard. +::: + +### AMPcombi + +For AMPcombi, nf-core/funcscan will by default download the most recent version of the [DRAMP](http://dramp.cpu-bioinfor.org/) database as a reference database, and modifies the files for aligning the AMP hits in the AMP workflow. + +nf-core/funcscan currently provides a python3 helper script to do these steps. + +```bash +mkdir -p ampcombi/amp_ref_database +cd ampcombi/ +wget https://github.com/nf-core/funcscan/raw//bin/ampcombi_download.py +python3 ampcombi_download.py +``` + +However, the user can also supply their own custom AMP database by following the guidelines in [AMPcombi](https://github.com/Darcy220606/AMPcombi). +This can then be passed to the pipeline with: + +```bash +--amp_ampcombi_db '/// +``` + +The contents of the directory should have files such as `*.dmnd` and `*.fasta` in the top level. + +:::warning +The pipeline will automatically run Pyrodigal instead of Prodigal if the parameters `--run_annotation_tool prodigal --run_amp_screening` are both provided. +This is due to an incompatibility issue of Prodigal's output `.gbk` file with multiple downstream tools. +::: + +### Abricate + +The default ABRicate installation comes with a series of 'default' databases: + +- NCBI AMRFinderPlus (`ncbi`) +- CARD (`card`) +- ResFinder (`resfinder`) +- ARG-ANNOT (`argannot`) +- MEGARES (`megares`) +- EcOH (`echo`) +- PlasmidFinder (`plasmidfinder`) +- VFDB (`vfdb`) +- Ecoli_VF (`ecoli_vf`) + +Each can be specified by using the nf-core/funcscan flag, for example for card: `--arg_abricate_db_id card`. + +ABRicate also allows you to download additional and/or use custom databases. +For both of these, you will need to have your own local installation of ABRicate. +You then can download/add the custom database to the local installation's database directory, and supply this directory to the pipeline with the flag `--arg_abricate_db`, in combination with the name of the new database to `--arg_abricate_db_id `. + +For example, if you want to use the `bacmet2` database that does not come with the default installation, you could do: + +```bash +## Create conda environment +conda create -n abricate -c bioconda abricate +conda activate abricate + +## Download the bacmet2 database +abricate-get_db --db bacmet2 ## the logging will tell you where the database is downloaded to, e.g. /home//bin/miniconda3/envs/abricate/db/bacmet2/sequences +``` -You should place all HMMs in a directory and supply them e.g. to AMP models: +The resulting directory and database name can be passed to the pipeline as follows ```bash ---amp_hmmsearch_models '////*.hmm' +--arg_abricate_db ////db/ --arg_abricate_db_id bacmet2 ``` +The contents of the directory should have a directory named with the database name in the top level (e.g. `bacmet2/`). + ### AMRFinderPlus -AMRFinderPlus relies on NCBI’s curated Reference Gene Database and curated collection of Hidden Markov Models. +AMRFinderPlus relies on NCBI's curated Reference Gene Database and curated collection of Hidden Markov Models. nf-core/funcscan will download this database for you, unless the path to a local version is given with: ```bash ---arg_amrfinderplus_db '////' +--arg_amrfinderplus_db '////latest' ``` -You can either: +You must give the `latest` directory to the pipeline, and the contents of the directory should include files such as `*.nbd`, `*.nhr`, `versions.txt` etc. in the top level. -1. Install AMRFinderPlus from [bioconda](https://bioconda.github.io/recipes/ncbi-amrfinderplus/README.html?highlight=amrfinderplus) -2. Run `amrfinder --update`, which will download the latest version of the AMRFinderPlus database to the default location (location of the AMRFinderPlus binaries/data). It creates a directory in the format YYYY-MM-DD.version (e.g., `//data/2022-12-19.1/`). +To obtain a local version of the database: -Or: +1. Install AMRFinderPlus from [bioconda](https://bioconda.github.io/recipes/ncbi-amrfinderplus/README.html?highlight=amrfinderplus). + To ensure database compatibility, please use the same version as is used in your nf-core/funcscan release (check version in file `//funcscan/modules/nf-core/amrfinderplus/run/environment.yml`). -1. Download the files directly from the [NCBI FTP site](https://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/latest/) +```bash +conda create -n amrfinderplus -c bioconda ncbi-amrfinderplus=3.12.8 +conda activate amrfinderplus +``` -The downloaded database folder contains the AMR related files: +2. Run `amrfinder --update`, which will download the latest version of the AMRFinderPlus database to the default location (location of the AMRFinderPlus binaries/data). + It creates a directory in the format YYYY-MM-DD.version (e.g., `//data/2024-01-31.1/`). -```console +
+AMR related files in the database folder + +```tree / ├── AMR_CDS.* ├── AMR_DNA-Campylobacter.* @@ -170,9 +326,11 @@ The downloaded database folder contains the AMR related files: └── version.txt ``` -2. Supply the database directory path to the pipeline as described above. +
-> ℹ️ The flag `--save_databases` saves the pipeline-downloaded databases in your results directory. You can then move these to a central cache directory of your choice for re-use in the future. +:::info +The flag `--save_db` saves the pipeline-downloaded databases in your results directory. You can then move these to a central cache directory of your choice for re-use in the future. +::: ### DeepARG @@ -183,6 +341,12 @@ nf-core/funcscan can download this database for you, however it is very slow and You can either: 1. Install DeepARG from [bioconda](https://bioconda.github.io/recipes/deeparg/README.html?highlight=deeparg) + +```bash +conda create -n deeparg -c bioconda deeparg +conda activate deeparg +``` + 2. Run `deeparg download_data -o ////` Or download the files directly from @@ -195,13 +359,67 @@ Note that more recent database versions maybe available from the [ARGMiner servi You can then supply the path to resulting database directory with: ```bash ---arg_deeparg_data '/////' +--arg_deeparg_db '/////' ``` -Note that if you supply your own database that is not downloaded by the pipeline, make sure to also supply `--arg_deeparg_data_version` along +The contents of the directory should include directories such as `database`, `model`, and files such as `deeparg.gz` etc. in the top level. + +Note that if you supply your own database that is not downloaded by the pipeline, make sure to also supply `--arg_deeparg_db_version` along with the version number so hAMRonization will correctly display the database version in the summary report. -> ℹ️ The flag `--save_databases` saves the pipeline-downloaded databases in your results directory. You can then move these to a central cache directory of your choice for re-use in the future. +:::info +The flag `--save_db` saves the pipeline-downloaded databases in your results directory. +You can then move these to a central cache directory of your choice for re-use in the future. +::: + +### MMSeqs2 + +To download MMSeqs2 databases for taxonomic classification, you can install `mmseqs` via conda: + +```bash +conda create -n mmseqs2 -c bioconda mmseqs2 +conda activate mmseqs2 +``` + +Then to download the database of your choice + +```bash +mmseqs databases tmp/ +``` + +:::info +You may want to specify a different location for `tmp/`, we just borrowed here from the official `mmseqs` [documentation](https://github.com/soedinglab/mmseqs2/wiki#downloading-databases). +::: + +### RGI + +RGI requires the database CARD which can be downloaded by nf-core/funcscan or supplied by the user manually. +To download and supply the database yourself, do: + +1. Download [CARD](https://card.mcmaster.ca/latest/data) + +```bash +wget https://card.mcmaster.ca/latest/data +``` + +2. Extract the (`.tar.bz2`) archive. + +```bash +tar -xjvf data +``` + +You can then supply the path to resulting database directory with: + +```bash +--arg_rgi_db '////' +``` + +The contents of the directory should include files such as `card.json`, `aro_index.tsv`, `snps.txt` etc. in the top level. + +:::info +The flag `--save_db` saves the pipeline-downloaded databases in your results directory. +You can then move these to a central cache directory of your choice for re-use in the future. +::: ### antiSMASH @@ -213,30 +431,52 @@ The same applies for the antiSMASH installation directory, which is also a requi To supply the database directories to the pipeline: -1. Install antiSMASH from [bioconda](https://bioconda.github.io/recipes/antismash-lite/README.html) -2. Run `download-antismash-databases` +1. Install antiSMASH from [bioconda](https://bioconda.github.io/recipes/antismash-lite/README.html). To ensure database compatibility, please use the same version as is used in your nf-core/funcscan release (check version in file `//funcscan/modules/nf-core/antismash/antismashlite/environment.yml`). + +```bash +conda create -n antismash-lite -c bioconda antismash-lite +conda activate antismash-lite +``` + +2. Run the command `download-antismash-databases`. Use `--database-dir` to specify a new location. 3. You can then supply the paths to the resulting databases and the whole installation directory with: ```bash ---bgc_antismash_databases '/////' ---bgc_antismash_installationdirectory '/////' +--bgc_antismash_db '/////' +--bgc_antismash_installdir '/////antismash' ``` -Note that the names of the supplied folders must differ from each other (e.g. `antismash_db` and `antismash_dir`). If they are not provided, the databases will be auto-downloaded upon each BGC screening run of the pipeline. +Note that the names of the supplied folders must differ from each other (e.g. `antismash_db` and `antismash_dir`). +The contents of the database directory should include directories such as `as-js/`, `clusterblast/`, `clustercompare/` etc. in the top level. +The contents of the installation directory should include directories such as `common/` `config/` and files such as `custom_typing.py` `custom_typing.pyi` etc. in the top level. + +:::info +If installing with conda, the installation directory will be `lib/python3.10/site-packages/antismash` from the base directory of your conda install or conda environment directory. +::: -> ℹ️ The flag `--save_databases` saves the pipeline-downloaded databases in your results directory. You can then move these to a central cache directory of your choice for re-use in the future. +Note that the names of the two required folders must differ from each other (i.e., the `--bgc_antismash_db` directory must not be called `antismash`). +If they are not provided, the databases will be auto-downloaded upon each BGC screening run of the pipeline. -> ℹ️ If installing with conda, the installation directory will be `lib/python3.8/site-packages/antismash` from the base directory of your conda install or conda environment directory. +:::info +The flag `--save_db` saves the pipeline-downloaded databases in your results directory. You can then move these to a central cache directory of your choice for re-use in the future. +::: ### DeepBGC -:::danger -The dependencies for the deepBGC database download are currently broken. Until deepBGC gets fixed, users need to provide the database files themselves or just skip the tool (`--bgc_skip_deepbgc`). To provide the files yourself: The links for the database files can be found in [this script](https://github.com/Merck/deepbgc/blob/476934b61521d23c1122a1cfada176ee5e402741/deepbgc/data.py) from the deepBGC GitHub repository. The command `deepbgc download` as described below will not work at the moment. -::: +DeepBGC relies on trained models and Pfams to run its analysis. +nf-core/funcscan will download these databases for you. If the flag `--save_db` is set, the downloaded files will be stored in the output directory under `databases/deepbgc/`. -DeepBGC relies on trained models and Pfams to run its analysis. nf-core/funcscan will download these databases for you. If the flag `--save_databases` is set, the downloaded files will be stored in the output directory under `databases/deepbgc/`. +Alternatively, you can download the database locally with: -Alternatively, if you already downloaded the database locally with `deepbgc download`, you can indicate the path to the database folder with `--bgc_deepbgc_database ///`. The folder has to contain the subfolders as in the database folder downloaded by `deepbgc download`: +```bash +conda create -n deepbgc -c bioconda deepbgc +conda activate deepbgc +export DEEPBGC_DOWNLOADS_DIR= +deepbgc download +``` + +You can then indicate the path to the database folder in the pipeline with `--bgc_deepbgc_db ///`. +The contents of the database directory should include directories such as `common`, `0.1.0` in the top level: ```console deepbgc_db/ @@ -249,31 +489,6 @@ deepbgc_db/ └── myDetectors*.pkl ``` -If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file. - -Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. - -:::warning -Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). -::: - -The above pipeline run specified with a params file in yaml format: - -```bash -nextflow run nf-core/funcscan -profile docker -params-file params.yaml -``` - -with `params.yaml` containing: - -```yaml -input: './samplesheet.csv' -outdir: './results/' -genome: 'GRCh37' -<...> -``` - -You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). - ## Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: @@ -334,6 +549,8 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) - `apptainer` - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) +- `wave` + - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later). - `conda` - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy deleted file mode 100755 index 01b8653d..00000000 --- a/lib/NfcoreTemplate.groovy +++ /dev/null @@ -1,352 +0,0 @@ -// -// This file holds several functions used within the nf-core pipeline template. -// - -import org.yaml.snakeyaml.Yaml -import groovy.json.JsonOutput - -class NfcoreTemplate { - - // - // Check AWS Batch related parameters have been specified correctly - // - public static void awsBatch(workflow, params) { - if (workflow.profile.contains('awsbatch')) { - // Check params.awsqueue and params.awsregion have been set if running on AWSBatch - assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - // Check outdir paths to be S3 buckets if running on AWSBatch - assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" - } - } - - // - // Warn if a -profile or Nextflow config has not been provided to run the pipeline - // - public static void checkConfigProvided(workflow, log) { - if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { - log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + - "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + - " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + - " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + - " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + - "Please refer to the quick start section and usage docs for the pipeline.\n " - } - } - - // - // Generate version string - // - public static String version(workflow) { - String version_string = "" - - if (workflow.manifest.version) { - def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' - version_string += "${prefix_v}${workflow.manifest.version}" - } - - if (workflow.commitId) { - def git_shortsha = workflow.commitId.substring(0, 7) - version_string += "-g${git_shortsha}" - } - - return version_string - } - - // - // Construct and send completion email - // - public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { - - // Set up the e-mail variables - def subject = "[$workflow.manifest.name] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[$workflow.manifest.name] FAILED: $workflow.runName" - } - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['Date Started'] = workflow.start - misc_fields['Date Completed'] = workflow.complete - misc_fields['Pipeline script file path'] = workflow.scriptFile - misc_fields['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision - misc_fields['Nextflow Version'] = workflow.nextflow.version - misc_fields['Nextflow Build'] = workflow.nextflow.build - misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - def email_fields = [:] - email_fields['version'] = NfcoreTemplate.version(workflow) - email_fields['runName'] = workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary << misc_fields - - // On success try attach the multiqc report - def mqc_report = null - try { - if (workflow.success) { - mqc_report = multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { - if (mqc_report.size() > 1) { - log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" - } - mqc_report = mqc_report[0] - } - } - } catch (all) { - if (multiqc_report) { - log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" - } - } - - // Check if we are only sending emails on failure - def email_address = params.email - if (!params.email && params.email_on_fail && !workflow.success) { - email_address = params.email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$projectDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$projectDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] - def sf = new File("$projectDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - Map colors = logColours(params.monochrome_logs) - if (email_address) { - try { - if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" - } catch (all) { - // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { - mail_cmd += [ '-A', mqc_report ] - } - mail_cmd.execute() << email_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" - } - } - - // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") - output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") - output_tf.withWriter { w -> w << email_txt } - } - - // - // Construct and send a notification to a web server as JSON - // e.g. Microsoft Teams and Slack - // - public static void IM_notification(workflow, params, summary_params, projectDir, log) { - def hook_url = params.hook_url - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['start'] = workflow.start - misc_fields['complete'] = workflow.complete - misc_fields['scriptfile'] = workflow.scriptFile - misc_fields['scriptid'] = workflow.scriptId - if (workflow.repository) misc_fields['repository'] = workflow.repository - if (workflow.commitId) misc_fields['commitid'] = workflow.commitId - if (workflow.revision) misc_fields['revision'] = workflow.revision - misc_fields['nxf_version'] = workflow.nextflow.version - misc_fields['nxf_build'] = workflow.nextflow.build - misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp - - def msg_fields = [:] - msg_fields['version'] = NfcoreTemplate.version(workflow) - msg_fields['runName'] = workflow.runName - msg_fields['success'] = workflow.success - msg_fields['dateComplete'] = workflow.complete - msg_fields['duration'] = workflow.duration - msg_fields['exitStatus'] = workflow.exitStatus - msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - msg_fields['errorReport'] = (workflow.errorReport ?: 'None') - msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") - msg_fields['projectDir'] = workflow.projectDir - msg_fields['summary'] = summary << misc_fields - - // Render the JSON template - def engine = new groovy.text.GStringTemplateEngine() - // Different JSON depending on the service provider - // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format - def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" - def hf = new File("$projectDir/assets/${json_path}") - def json_template = engine.createTemplate(hf).make(msg_fields) - def json_message = json_template.toString() - - // POST - def post = new URL(hook_url).openConnection(); - post.setRequestMethod("POST") - post.setDoOutput(true) - post.setRequestProperty("Content-Type", "application/json") - post.getOutputStream().write(json_message.getBytes("UTF-8")); - def postRC = post.getResponseCode(); - if (! postRC.equals(200)) { - log.warn(post.getErrorStream().getText()); - } - } - - // - // Dump pipeline parameters in a json file - // - public static void dump_parameters(workflow, params) { - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - - def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') - def output_pf = new File(output_d, "params_${timestamp}.json") - def jsonStr = JsonOutput.toJson(params) - output_pf.text = JsonOutput.prettyPrint(jsonStr) - } - - // - // Print pipeline summary on completion - // - public static void summary(workflow, params, log) { - Map colors = logColours(params.monochrome_logs) - if (workflow.success) { - if (workflow.stats.ignoredCount == 0) { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" - } - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" - } - } - - // - // ANSII Colours used for terminal logging - // - public static Map logColours(Boolean monochrome_logs) { - Map colorcodes = [:] - - // Reset / Meta - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" - colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" - colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" - colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" - colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" - - // Regular Colors - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" - - // Bold - colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" - colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" - colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" - colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" - colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" - colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" - colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" - colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" - - // Underline - colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" - colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" - colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" - colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" - colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" - colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" - colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" - colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" - - // High Intensity - colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" - colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" - colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" - colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" - colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" - colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" - colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" - colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" - - // Bold High Intensity - colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" - colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" - colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" - colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" - colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" - colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" - colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" - - return colorcodes - } - - // - // Does what is says on the tin - // - public static String dashedLine(monochrome_logs) { - Map colors = logColours(monochrome_logs) - return "-${colors.dim}----------------------------------------------------${colors.reset}-" - } - - // - // nf-core logo - // - public static String logo(workflow, monochrome_logs) { - Map colors = logColours(monochrome_logs) - String workflow_version = NfcoreTemplate.version(workflow) - String.format( - """\n - ${dashedLine(monochrome_logs)} - ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} - ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} - ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} - ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} - ${colors.green}`._,._,\'${colors.reset} - ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset} - ${dashedLine(monochrome_logs)} - """.stripIndent() - ) - } -} diff --git a/lib/Utils.groovy b/lib/Utils.groovy deleted file mode 100644 index 8d030f4e..00000000 --- a/lib/Utils.groovy +++ /dev/null @@ -1,47 +0,0 @@ -// -// This file holds several Groovy functions that could be useful for any Nextflow pipeline -// - -import org.yaml.snakeyaml.Yaml - -class Utils { - - // - // When running with -profile conda, warn if channels have not been set-up appropriately - // - public static void checkCondaChannels(log) { - Yaml parser = new Yaml() - def channels = [] - try { - def config = parser.load("conda config --show channels".execute().text) - channels = config.channels - } catch(NullPointerException | IOException e) { - log.warn "Could not verify conda channel configuration." - return - } - - // Check that all channels are present - // This channel list is ordered by required channel priority. - def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] - def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean - - // Check that they are in the right order - def channel_priority_violation = false - def n = required_channels_in_order.size() - for (int i = 0; i < n - 1; i++) { - channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) - } - - if (channels_missing | channel_priority_violation) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " There is a problem with your Conda configuration!\n\n" + - " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/\n" + - " The observed channel order is \n" + - " ${channels}\n" + - " but the following channel order is required:\n" + - " ${required_channels_in_order}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - } - } -} diff --git a/lib/WorkflowFuncscan.groovy b/lib/WorkflowFuncscan.groovy deleted file mode 100755 index 1b7427de..00000000 --- a/lib/WorkflowFuncscan.groovy +++ /dev/null @@ -1,200 +0,0 @@ -// -// This file holds several functions specific to the workflow/funcscan.nf in the nf-core/funcscan pipeline -// - -import nextflow.Nextflow -import groovy.text.SimpleTemplateEngine - -class WorkflowFuncscan { - - // - // Check and validate parameters - // - public static void initialise(params, log) { - - genomeExistsError(params, log) - - //if (!params.fasta) { - // Nextflow.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." - //} - } - - // - // Get workflow summary for MultiQC - // - public static String paramsSummaryMultiqc(workflow, summary) { - String summary_section = '' - for (group in summary.keySet()) { - def group_params = summary.get(group) // This gets the parameters of that particular group - if (group_params) { - summary_section += "

$group

\n" - summary_section += "
\n" - for (param in group_params.keySet()) { - summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" - } - summary_section += "
\n" - } - } - - String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" - yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" - yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" - yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" - yaml_file_text += "plot_type: 'html'\n" - yaml_file_text += "data: |\n" - yaml_file_text += "${summary_section}" - return yaml_file_text - } - - // - // Generate methods description for MultiQC - // - - public static String toolCitationText(params) { - - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def preprocessing_text = "The pipeline used the following tools: preprocessing included bioawk (Li 2023)." - - def annotation_text = [ - "Annotation was carried out with:", - params.annotation_tool == 'prodigal' ? "Prodigal (Hyatt et al. 2010)." : "", - params.annotation_tool == 'pyrodigal' ? "Pyrodigal (Larralde 2022)." : "", - params.annotation_tool == 'bakta' ? "BAKTA (Schwengers et al. 2021)." : "", - params.annotation_tool == 'prokka' ? "PROKKA (Seemann 2014)." : "", - ].join(' ').trim() - - def amp_text = [ - "The following antimicrobial peptide screening tools were used:", - !params.amp_skip_amplify ? "AMPlify (Li et al. 2022)," : "", - !params.amp_skip_macrel ? "Macrel (Santos-Júnior et al. 2020)," : "", - !params.amp_skip_ampir ? "ampir (Fingerhut et al. 2021)," : "", - !params.amp_skip_hmmsearch ? "HMMER (Eddy 2011)," : "", - ". The output from the antimicrobial peptide screening tools were standardised and summarised with AMPcombi (Ibrahim and Perelo 2023)." - ].join(' ').trim().replaceAll(", \\.", ".") - - def arg_text = [ - "The following antimicrobial resistance gene screening tools were used:", - !params.arg_skip_fargene ? "fARGene (Berglund et al. 2019)," : "", - !params.arg_skip_rgi ? "RGI (Alcock et al. 2020)," : "", - !params.arg_skip_amrfinderplus ? "AMRfinderplus (Feldgarden et al. 2021)," : "", - !params.arg_skip_deeparg ? "deepARG (Arango-Argoty 2018)," : "", - !params.arg_skip_abricate ? "ABRicate (Seemann 2020)," : "", - ". The output from the antimicrobial resistance gene screening tools were standardised and summarised with hAMRonization (Maguire et al. 2023)." - ].join(' ').trim().replaceAll(", +\\.", ".") - - def bgc_text = [ - "The following biosynthetic gene cluster screening tools were used:", - !params.bgc_skip_antismash ? "antiSMASH (Blin et al. 2021)," : "", - !params.bgc_skip_deepbgc ? "deepBGC (Hannigan et al. 2019)," : "", - !params.bgc_skip_gecco ? "GECCO (Carroll et al. 2021)," : "", - !params.bgc_skip_hmmsearch ? "HMMER (Eddy 2011)," : "", - ". The output from the biosynthetic gene cluster screening tools were standardised and summarised with comBGC (Frangenberg et al. 2023)." - ].join(' ').replaceAll(", +\\.", ".").trim() - - def postprocessing_text = "Run statistics were reported using MultiQC (Ewels et al. 2016)." - - def citation_text = [ - preprocessing_text, - annotation_text, - params.run_amp_screening ? amp_text : "", - params.run_arg_screening ? arg_text : "", - params.run_bgc_screening ? bgc_text : "", - postprocessing_text, - ].join(' ').trim() - - return citation_text - } - - public static String toolBibliographyText(params) { - - def preprocessing_text = "
  • Li, H. (2023). bioawk: BWK awk modified for biological data. Github. Retrieved July 12, 2023, from https://github.com/lh3/bioawk
  • " - - def annotation_text = [ - params.annotation_tool == 'prodigal' ? "
  • Hyatt, D., Chen, G. L., Locascio, P. F., Land, M. L., Larimer, F. W., & Hauser, L. J. (2010). Prodigal: prokaryotic gene recognition and translation initiation site identification. BMC bioinformatics, 11, 119. DOI: 10.1186/1471-2105-11-119" : "", - params.annotation_tool == 'pyrodigal' ? "
  • Larralde, M. (2022). Pyrodigal: Python bindings and interface to Prodigal, an efficient method for gene prediction in prokaryotes. Journal of Open Source Software, 7(72), 4296. DOI: 10.21105/joss.04296
  • " : "", - params.annotation_tool == 'bakta' ? "
  • Schwengers, O., Jelonek, L., Dieckmann, M. A., Beyvers, S., Blom, J., & Goesmann, A. (2021). Bakta: rapid and standardized annotation of bacterial genomes via alignment-free sequence identification. Microbial Genomics, 7(11). DOI: 10.1099/mgen.0.000685
  • " : "", - params.annotation_tool == 'prokka' ? "
  • Seemann, T. (2014). Prokka: rapid prokaryotic genome annotation. Bioinformatics (Oxford, England), 30(14), 2068–2069. DOI: 10.1093/bioinformatics/btu153
  • " : "", - ].join(' ').trim() - - def amp_text = [ - !params.amp_skip_amplify ? "
  • Li, C., Sutherland, D., Hammond, S. A., Yang, C., Taho, F., Bergman, L., Houston, S., Warren, R. L., Wong, T., Hoang, L., Cameron, C. E., Helbing, C. C., & Birol, I. (2022). AMPlify: attentive deep learning model for discovery of novel antimicrobial peptides effective against WHO priority pathogens. BMC genomics, 23(1), 77. DOI: 10.1186/s12864-022-08310-4
  • " : "", - !params.amp_skip_macrel ? "
  • Santos-Júnior, C. D., Pan, S., Zhao, X. M., & Coelho, L. P. (2020). Macrel: antimicrobial peptide screening in genomes and metagenomes. PeerJ, 8, e10555. DOI: 10.7717/peerj.10555
  • " : "", - !params.amp_skip_ampir ? "
  • Fingerhut, L., Miller, D. J., Strugnell, J. M., Daly, N. L., & Cooke, I. R. (2021). ampir: an R package for fast genome-wide prediction of antimicrobial peptides. Bioinformatics (Oxford, England), 36(21), 5262–5263. DOI: 10.1093/bioinformatics/btaa653
  • " : "", - "
  • Ibrahim, A. & Perelo, L. (2023). Darcy220606/AMPcombi. DOI: 10.5281/zenodo.7639121
  • " - ].join(' ').trim().replaceAll(", \\.", ".") - - def arg_text = [ - !params.arg_skip_fargene ? "
  • Berglund, F., Österlund, T., Boulund, F., Marathe, N. P., Larsson, D., & Kristiansson, E. (2019). Identification and reconstruction of novel antibiotic resistance genes from metagenomes. Microbiome, 7(1), 52. DOI: 10.1186/s40168-019-0670-1
  • " : "", - !params.arg_skip_rgi ? "
  • Alcock, B. P., Raphenya, A. R., Lau, T., Tsang, K. K., Bouchard, M., Edalatmand, A., Huynh, W., Nguyen, A. V., Cheng, A. A., Liu, S., Min, S. Y., Miroshnichenko, A., Tran, H. K., Werfalli, R. E., Nasir, J. A., Oloni, M., Speicher, D. J., Florescu, A., Singh, B., Faltyn, M., … McArthur, A. G. (2020). CARD 2020: antibiotic resistome surveillance with the comprehensive antibiotic resistance database. Nucleic acids research, 48(D1), D517–D525. DOI: 10.1093/nar/gkz935
  • " : "", - !params.arg_skip_amrfinderplus ? "
  • Feldgarden, M., Brover, V., Gonzalez-Escalona, N., Frye, J. G., Haendiges, J., Haft, D. H., Hoffmann, M., Pettengill, J. B., Prasad, A. B., Tillman, G. E., Tyson, G. H., & Klimke, W. (2021). AMRFinderPlus and the Reference Gene Catalog facilitate examination of the genomic links among antimicrobial resistance, stress response, and virulence. Scientific reports, 11(1), 12728. DOI: 10.1038/s41598-021-91456-0
  • " : "", - !params.arg_skip_deeparg ? "
  • Arango-Argoty, G., Garner, E., Pruden, A., Heath, L. S., Vikesland, P., & Zhang, L. (2018). DeepARG: a deep learning approach for predicting antibiotic resistance genes from metagenomic data. Microbiome, 6(1), 23. DOI: 10.1186/s40168-018-0401-z" : "", - !params.arg_skip_abricate ? "
  • Seemann, T. (2020). ABRicate. Github https://github.com/tseemann/abricate.
  • " : "", - "
  • Public Health Alliance for Genomic Epidemiology (pha4ge). (2022). Parse multiple Antimicrobial Resistance Analysis Reports into a common data structure. Github. Retrieved October 5, 2022, from https://github.com/pha4ge/hAMRonization
  • " - ].join(' ').trim().replaceAll(", +\\.", ".") - - def bgc_text = [ - !params.bgc_skip_antismash ? "
  • Blin, K., Shaw, S., Kloosterman, A. M., Charlop-Powers, Z., van Wezel, G. P., Medema, M. H., & Weber, T. (2021). antiSMASH 6.0: improving cluster detection and comparison capabilities. Nucleic acids research, 49(W1), W29–W35. DOI:
  • " : "", - !params.bgc_skip_deepbgc ? "
  • Hannigan, G. D., Prihoda, D., Palicka, A., Soukup, J., Klempir, O., Rampula, L., Durcak, J., Wurst, M., Kotowski, J., Chang, D., Wang, R., Piizzi, G., Temesi, G., Hazuda, D. J., Woelk, C. H., & Bitton, D. A. (2019). A deep learning genome-mining strategy for biosynthetic gene cluster prediction. Nucleic acids research, 47(18), e110. DOI: 10.1093/nar/gkz654
  • " : "", - !params.bgc_skip_gecco ? "
  • Carroll, L. M. , Larralde, M., Fleck, J. S., Ponnudurai, R., Milanese, A., Cappio Barazzone, E. & Zeller, G. (2021). Accurate de novo identification of biosynthetic gene clusters with GECCO. bioRxiv DOI: 0.1101/2021.05.03.442509
  • " : "", - "
  • Frangenberg, J. Fellows Yates, J. A., Ibrahim, A., Perelo, L., & Beber, M. E. (2023). nf-core/funcscan: 1.0.0 - German Rollmops - 2023-02-15. https://doi.org/10.5281/zenodo.7643100
  • " - ].join(' ').replaceAll(", +\\.", ".").trim() - - def postprocessing_text = "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. https://doi.org/10.1093/bioinformatics/btw354
  • " - - // Special as reused in multiple subworkflows, and we don't want to cause duplicates - def hmmsearch_text = ( params.run_amp_screening && !params.amp_skip_hmmsearch ) || (params.run_bgc_screening && !params.bgc_skip_hmmsearch) ? "
  • Eddy S. R. (2011). Accelerated Profile HMM Searches. PLoS computational biology, 7(10), e1002195. DOI: 10.1371/journal.pcbi.1002195
  • " : "" - - def reference_text = [ - preprocessing_text, - annotation_text, - params.run_amp_screening ? amp_text : "", - params.run_arg_screening ? arg_text : "", - params.run_bgc_screening ? bgc_text : "", - hmmsearch_text, - postprocessing_text, - ].join(' ').trim() - - return reference_text - - } - - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) { - // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file - def meta = [:] - meta.workflow = run_workflow.toMap() - meta["manifest_map"] = run_workflow.manifest.toMap() - - // Pipeline DOI - meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" - meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " - - // Tool references - meta["tool_citations"] = "" - meta["tool_bibliography"] = "" - - meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") - meta["tool_bibliography"] = toolBibliographyText(params) - - def methods_text = mqc_methods_yaml.text - - def engine = new SimpleTemplateEngine() - def description_html = engine.createTemplate(methods_text).make(meta) - - return description_html - } - - // - // Exit pipeline if incorrect --genome key provided - // - private static void genomeExistsError(params, log) { - if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + - " Currently, the available genome keys are:\n" + - " ${params.genomes.keySet().join(", ")}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - Nextflow.error(error_string) - } - } -} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy deleted file mode 100644 index 1b9b0766..00000000 --- a/lib/WorkflowMain.groovy +++ /dev/null @@ -1,61 +0,0 @@ -// -// This file holds several functions specific to the main.nf workflow in the nf-core/funcscan pipeline -// - -import nextflow.Nextflow - -class WorkflowMain { - - // - // Citation string for pipeline - // - public static String citation(workflow) { - return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - "* The pipeline\n" + - " https://doi.org/10.5281/zenodo.7643099\n\n" + - "* The nf-core framework\n" + - " https://doi.org/10.1038/s41587-020-0439-x\n\n" + - "* Software dependencies\n" + - " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" - } - - // - // Validate parameters and print summary to screen - // - public static void initialise(workflow, params, log) { - - // Print workflow version and exit on --version - if (params.version) { - String workflow_version = NfcoreTemplate.version(workflow) - log.info "${workflow.manifest.name} ${workflow_version}" - System.exit(0) - } - - // Check that a -profile or Nextflow config has been provided to run the pipeline - NfcoreTemplate.checkConfigProvided(workflow, log) - - // Check that conda channels are set-up correctly - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - Utils.checkCondaChannels(log) - } - - // Check AWS batch settings - NfcoreTemplate.awsBatch(workflow, params) - - // Check input has been provided - if (!params.input) { - Nextflow.error("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'") - } - } - // - // Get attribute from genome config file e.g. fasta - // - public static Object getGenomeAttribute(params, attribute) { - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] - } - } - return null - } -} diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar deleted file mode 100644 index 805c8bb5..00000000 Binary files a/lib/nfcore_external_java_deps.jar and /dev/null differ diff --git a/main.nf b/main.nf index 5d87fca2..529aa3ee 100644 --- a/main.nf +++ b/main.nf @@ -13,63 +13,91 @@ nextflow.enable.dsl = 2 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - GENOME PARAMETER VALUES + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +include { FUNCSCAN } from './workflows/funcscan' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_funcscan_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_funcscan_pipeline' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE & PRINT PARAMETER SUMMARY + GENOME PARAMETER VALUES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { validateParameters; paramsHelp } from 'plugin/nf-validation' - -// Print help message if needed -if (params.help) { - def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) - def citation = '\n' + WorkflowMain.citation(workflow) + '\n' - def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" - log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) - System.exit(0) -} - -// Validate input parameters -if (params.validate_params) { - validateParameters() -} - -WorkflowMain.initialise(workflow, params, log) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - NAMED WORKFLOW FOR PIPELINE + NAMED WORKFLOWS FOR PIPELINE ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { FUNCSCAN } from './workflows/funcscan' - // -// WORKFLOW: Run main nf-core/funcscan analysis pipeline +// WORKFLOW: Run main analysis pipeline // workflow NFCORE_FUNCSCAN { - FUNCSCAN () -} + take: + samplesheet // channel: samplesheet read in from --input + + main: + + // + // WORKFLOW: Run pipeline + // + FUNCSCAN ( + samplesheet + ) + + emit: + multiqc_report = FUNCSCAN.out.multiqc_report // channel: /path/to/multiqc_report.html + +} /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN ALL WORKFLOWS + RUN MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// WORKFLOW: Execute a single named workflow for the pipeline -// See: https://github.com/nf-core/rnaseq/issues/619 -// workflow { - NFCORE_FUNCSCAN () + + main: + + // + // SUBWORKFLOW: Run initialisation tasks + // + PIPELINE_INITIALISATION ( + params.version, + params.help, + params.validate_params, + params.monochrome_logs, + args, + params.outdir, + params.input + ) + + // + // WORKFLOW: Run main workflow + // + NFCORE_FUNCSCAN ( + PIPELINE_INITIALISATION.out.samplesheet + ) + + // + // SUBWORKFLOW: Run completion tasks + // + PIPELINE_COMPLETION ( + params.email, + params.email_on_fail, + params.plaintext_email, + params.outdir, + params.monochrome_logs, + params.hook_url, + NFCORE_FUNCSCAN.out.multiqc_report + ) } /* diff --git a/modules.json b/modules.json index f0101971..c8fdad1a 100644 --- a/modules.json +++ b/modules.json @@ -7,174 +7,228 @@ "nf-core": { "abricate/run": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "9837ac7d7bb2e2362c021e8dc08efa96190b49a4", "installed_by": ["modules"] }, - "ampcombi": { + "ampcombi2/cluster": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "900f6c970712e41b783e21e5dfc30f052174b5cd", + "installed_by": ["modules"] + }, + "ampcombi2/complete": { + "branch": "master", + "git_sha": "900f6c970712e41b783e21e5dfc30f052174b5cd", + "installed_by": ["modules"] + }, + "ampcombi2/parsetables": { + "branch": "master", + "git_sha": "900f6c970712e41b783e21e5dfc30f052174b5cd", "installed_by": ["modules"] }, "ampir": { "branch": "master", - "git_sha": "6ac776f62fad7360685a87680c5f57f74c3682dc", + "git_sha": "9bfc81874554e87740bcb3e5e07acf0a153c9ecb", "installed_by": ["modules"] }, "amplify/predict": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "730f3aee80d5f8d0b5fc532202ac59361414d006", "installed_by": ["modules"] }, "amrfinderplus/run": { "branch": "master", - "git_sha": "016547e536216e0e839eb8ec678d13b25211c174", + "git_sha": "c0514dfc403fa97c96f549de6abe99f03c78fe8d", "installed_by": ["modules"] }, "amrfinderplus/update": { "branch": "master", - "git_sha": "868d4c3dc7a3db39d36184173e4fe3484499396e", + "git_sha": "8f4a5d5ad55715f6c905ab73ce49f677cf6092fc", "installed_by": ["modules"] }, "antismash/antismashlite": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "b20be35facfc5acdc1259f132ed79339d79e989f", "installed_by": ["modules"] }, "antismash/antismashlitedownloaddatabases": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "4e5f4687318f24ba944a13609d3ea6ebd890737d", "installed_by": ["modules"] }, - "bakta/bakta": { + "argnorm": { "branch": "master", - "git_sha": "9d0f89b445e1f5b2fb30476f4be9a8b519c07846", + "git_sha": "e4fc46af5ec30070e6aef780aba14f89a28caa88", "installed_by": ["modules"] }, - "bakta/baktadbdownload": { + "bakta/bakta": { "branch": "master", - "git_sha": "7c06e6820fa3918bc28a040e794f8a2b39fabadb", + "git_sha": "52507581f62929f98dd6e6c5c5824583fa6ef94d", "installed_by": ["modules"] }, - "bioawk": { - "branch": "master", - "git_sha": "dee3479f3b4a828df6052d318403d2b6a87b2d2e", - "installed_by": ["modules"], - "patch": "modules/nf-core/bioawk/bioawk.diff" - }, - "custom/dumpsoftwareversions": { + "bakta/baktadbdownload": { "branch": "master", - "git_sha": "1b372269755a5c4a13c23bc130ebada8cb9d4cd0", + "git_sha": "7c06e6820fa3918bc28a040e794f8a2b39fabadb", "installed_by": ["modules"] }, "deeparg/downloaddata": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "0af92e0fe6a34f31ee41eae66f04d71850fb4beb", "installed_by": ["modules"] }, "deeparg/predict": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "90b63cde0f838ca4da3a88a37a5309888cae97b9", "installed_by": ["modules"] }, "deepbgc/download": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "f315f85d9ac6c321f6e3596493fd61019340df2a", "installed_by": ["modules"] }, "deepbgc/pipeline": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "34ac993e081b32d2170ab790d0386b74122f9d36", "installed_by": ["modules"] }, "fargene": { "branch": "master", - "git_sha": "a7231cbccb86535529e33859e05d19ac93f3ea04", + "git_sha": "5e8481d994963871e3faf061d6fbf02fe33d8cad", "installed_by": ["modules"] }, "gecco/run": { "branch": "master", - "git_sha": "8c029dd8e67754d937fb6b6814e568f1decb2fea", + "git_sha": "f9707f9499a90a46208873d23440e22ac8ad5ebc", "installed_by": ["modules"] }, "gunzip": { "branch": "master", - "git_sha": "e06548bfa36ee31869b81041879dd6b3a83b1d57", + "git_sha": "4e5f4687318f24ba944a13609d3ea6ebd890737d", "installed_by": ["modules"] }, "hamronization/abricate": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "9837ac7d7bb2e2362c021e8dc08efa96190b49a4", "installed_by": ["modules"] }, "hamronization/amrfinderplus": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "52ddbb3ad754d870e485bcfcb680fe6a49d83567", "installed_by": ["modules"] }, "hamronization/deeparg": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "9837ac7d7bb2e2362c021e8dc08efa96190b49a4", "installed_by": ["modules"] }, "hamronization/fargene": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "9cf6f5e4ad9cc11a670a94d56021f1c4f9a91ec1", "installed_by": ["modules"] }, "hamronization/rgi": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "483e4838a2a009e826ea14da0dfc6bcaccef5ad1", "installed_by": ["modules"] }, "hamronization/summarize": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "9837ac7d7bb2e2362c021e8dc08efa96190b49a4", "installed_by": ["modules"] }, "hmmer/hmmsearch": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "b046a286c8240ebe3412ddf8ae901d47008d1ca7", "installed_by": ["modules"] }, "macrel/contigs": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "mmseqs/createdb": { + "branch": "master", + "git_sha": "89fe39b745da3dca14ad1a361784812ea3aa3a43", + "installed_by": ["modules"] + }, + "mmseqs/createtsv": { + "branch": "master", + "git_sha": "89fe39b745da3dca14ad1a361784812ea3aa3a43", + "installed_by": ["modules"] + }, + "mmseqs/databases": { + "branch": "master", + "git_sha": "151460db852d636979d9ff3ee631e2268060d4c3", + "installed_by": ["modules"] + }, + "mmseqs/taxonomy": { + "branch": "master", + "git_sha": "89fe39b745da3dca14ad1a361784812ea3aa3a43", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "a6e11ac655e744f7ebc724be669dd568ffdc0e80", + "git_sha": "878d2adbb911aa6e15c06a4d1e93d01bd6f26c74", "installed_by": ["modules"] }, "prodigal": { "branch": "master", - "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", + "git_sha": "5e8481d994963871e3faf061d6fbf02fe33d8cad", "installed_by": ["modules"] }, "prokka": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "697d97d46d56b12ff46a1a848a36849527cea0b8", "installed_by": ["modules"] }, "pyrodigal": { "branch": "master", - "git_sha": "dd3ed02ddb21363b1892e4705c164aa4cf945435", + "git_sha": "c00055a0b13d622b4f1f51a8e5be31deaf99ded7", + "installed_by": ["modules"] + }, + "rgi/cardannotation": { + "branch": "master", + "git_sha": "dbbb0c509e044d2680b429ba622049d4a23426dc", "installed_by": ["modules"] }, "rgi/main": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "4e5f4687318f24ba944a13609d3ea6ebd890737d", + "installed_by": ["modules"] + }, + "seqkit/seq": { + "branch": "master", + "git_sha": "03fbf6c89e551bd8d77f3b751fb5c955f75b34c5", "installed_by": ["modules"] }, "tabix/bgzip": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "b20be35facfc5acdc1259f132ed79339d79e989f", "installed_by": ["modules"] }, "untar": { "branch": "master", - "git_sha": "d0b4fc03af52a1cc8c6fb4493b921b57352b1dd8", - "installed_by": ["modules"] + "git_sha": "4e5f4687318f24ba944a13609d3ea6ebd890737d", + "installed_by": ["modules"], + "patch": "modules/nf-core/untar/untar.diff" + } + } + }, + "subworkflows": { + "nf-core": { + "utils_nextflow_pipeline": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + }, + "utils_nfcore_pipeline": { + "branch": "master", + "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", + "installed_by": ["subworkflows"] + }, + "utils_nfvalidation_plugin": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] } } } diff --git a/modules/local/dramp_download.nf b/modules/local/dramp_download.nf index 2fed5682..8b7eb2d1 100644 --- a/modules/local/dramp_download.nf +++ b/modules/local/dramp_download.nf @@ -1,10 +1,10 @@ process DRAMP_DOWNLOAD { label 'process_single' - conda "bioconda::ampcombi=0.1.7" + conda "bioconda::ampcombi=0.2.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ampcombi:0.1.7--pyhdfd78af_0': - 'biocontainers/ampcombi:0.1.7--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/ampcombi:0.2.2--pyhdfd78af_0': + 'biocontainers/ampcombi:0.2.2--pyhdfd78af_0' }" output: path "amp_ref_database/" , emit: db diff --git a/modules/local/merge_taxonomy_ampcombi.nf b/modules/local/merge_taxonomy_ampcombi.nf new file mode 100644 index 00000000..26e38343 --- /dev/null +++ b/modules/local/merge_taxonomy_ampcombi.nf @@ -0,0 +1,32 @@ +process MERGE_TAXONOMY_AMPCOMBI { + label 'process_medium' + + conda "conda-forge::python=3.11.0 conda-forge::biopython=1.80 conda-forge::pandas=1.5.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-27978155697a3671f3ef9aead4b5c823a02cc0b7:548df772fe13c0232a7eab1bc1deb98b495a05ab-0' : + 'biocontainers/mulled-v2-27978155697a3671f3ef9aead4b5c823a02cc0b7:548df772fe13c0232a7eab1bc1deb98b495a05ab-0' }" + + input: + path(ampcombi_df) + path(taxa_list) + + output: + path "ampcombi_complete_summary_taxonomy.tsv" , emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline, in nf-core/funcscan/bin/ + """ + merge_taxonomy.py \\ + ampcombi_taxa \\ + --ampcombi $ampcombi_df \\ + --taxonomy $taxa_list + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + merge_taxonomy: \$(merge_taxonomy.py --version | sed 's/merge_taxonomy //g') + END_VERSIONS + """ +} diff --git a/modules/local/merge_taxonomy_combgc.nf b/modules/local/merge_taxonomy_combgc.nf new file mode 100644 index 00000000..075668f2 --- /dev/null +++ b/modules/local/merge_taxonomy_combgc.nf @@ -0,0 +1,32 @@ +process MERGE_TAXONOMY_COMBGC { + label 'process_medium' + + conda "conda-forge::python=3.11.0 conda-forge::biopython=1.80 conda-forge::pandas=1.5.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-27978155697a3671f3ef9aead4b5c823a02cc0b7:548df772fe13c0232a7eab1bc1deb98b495a05ab-0' : + 'biocontainers/mulled-v2-27978155697a3671f3ef9aead4b5c823a02cc0b7:548df772fe13c0232a7eab1bc1deb98b495a05ab-0' }" + + input: + path(combgc_df) + path(taxa_list) + + output: + path "combgc_complete_summary_taxonomy.tsv" , emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline, in nf-core/funcscan/bin/ + """ + merge_taxonomy.py \\ + combgc_taxa \\ + --combgc $combgc_df \\ + --taxonomy $taxa_list + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + merge_taxonomy: \$(merge_taxonomy.py --version | sed 's/merge_taxonomy //g') + END_VERSIONS + """ +} diff --git a/modules/local/merge_taxonomy_hamronization.nf b/modules/local/merge_taxonomy_hamronization.nf new file mode 100644 index 00000000..14b85ff2 --- /dev/null +++ b/modules/local/merge_taxonomy_hamronization.nf @@ -0,0 +1,32 @@ +process MERGE_TAXONOMY_HAMRONIZATION { + label 'process_medium' + + conda "conda-forge::python=3.11.0 conda-forge::biopython=1.80 conda-forge::pandas=1.5.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-27978155697a3671f3ef9aead4b5c823a02cc0b7:548df772fe13c0232a7eab1bc1deb98b495a05ab-0' : + 'biocontainers/mulled-v2-27978155697a3671f3ef9aead4b5c823a02cc0b7:548df772fe13c0232a7eab1bc1deb98b495a05ab-0' }" + + input: + path(hamronization_df) + path(taxa_list) + + output: + path "hamronization_complete_summary_taxonomy.tsv" , emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline, in nf-core/funcscan/bin/ + """ + merge_taxonomy.py \\ + hamronization_taxa \\ + --hamronization $hamronization_df \\ + --taxonomy $taxa_list + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + merge_taxonomy: \$(merge_taxonomy.py --version | sed 's/merge_taxonomy //g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/abricate/run/environment.yml b/modules/nf-core/abricate/run/environment.yml new file mode 100644 index 00000000..4b2a1d2a --- /dev/null +++ b/modules/nf-core/abricate/run/environment.yml @@ -0,0 +1,7 @@ +name: abricate_run +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::abricate=1.0.1 diff --git a/modules/nf-core/abricate/run/main.nf b/modules/nf-core/abricate/run/main.nf index 87ab0df9..b0d8a68a 100644 --- a/modules/nf-core/abricate/run/main.nf +++ b/modules/nf-core/abricate/run/main.nf @@ -2,13 +2,14 @@ process ABRICATE_RUN { tag "$meta.id" label 'process_medium' - conda "bioconda::abricate=1.0.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/abricate%3A1.0.1--ha8f3691_1': 'biocontainers/abricate:1.0.1--ha8f3691_1' }" input: tuple val(meta), path(assembly) + path databasedir output: tuple val(meta), path("*.txt"), emit: report @@ -20,11 +21,27 @@ process ABRICATE_RUN { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def datadir = databasedir ? "--datadir ${databasedir}" : '' """ abricate \\ $assembly \\ $args \\ - --threads $task.cpus > ${prefix}.txt + $datadir \\ + --threads $task.cpus \\ + > ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + abricate: \$(echo \$(abricate --version 2>&1) | sed 's/^.*abricate //' ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def datadir = databasedir ? '--datadir ${databasedir}' : '' + """ + touch ${prefix}.txt cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/abricate/run/meta.yml b/modules/nf-core/abricate/run/meta.yml index 1189ee90..927c21f6 100644 --- a/modules/nf-core/abricate/run/meta.yml +++ b/modules/nf-core/abricate/run/meta.yml @@ -3,16 +3,14 @@ description: Screen assemblies for antimicrobial resistance against multiple dat keywords: - bacteria - assembly - - antimicrobial reistance + - antimicrobial resistance tools: - abricate: description: Mass screening of contigs for antibiotic resistance genes homepage: https://github.com/tseemann/abricate documentation: https://github.com/tseemann/abricate tool_dev_url: https://github.com/tseemann/abricate - licence: ["GPL v2"] - input: - meta: type: map @@ -23,7 +21,10 @@ input: type: file description: FASTA, GenBank or EMBL formatted file pattern: "*.{fa,fasta,fna,fa.gz,fasta.gz,fna.gz,gbk,gbk.gz,embl,embl.gz}" - + - databasedir: + type: directory + description: Optional location of local copy of database files, possibly with custom databases set up with `abricate --setupdb` + pattern: "*/" output: - meta: type: map @@ -38,6 +39,7 @@ output: type: file description: Tab-delimited report of results pattern: "*.{txt}" - authors: - "@rpetit3" +maintainers: + - "@rpetit3" diff --git a/modules/nf-core/abricate/run/tests/main.nf.test b/modules/nf-core/abricate/run/tests/main.nf.test new file mode 100644 index 00000000..f31a67e7 --- /dev/null +++ b/modules/nf-core/abricate/run/tests/main.nf.test @@ -0,0 +1,64 @@ +nextflow_process { + + name "Test Process ABRICATE_RUN" + script "../main.nf" + process "ABRICATE_RUN" + tag "modules" + tag "modules_nfcore" + tag "abricate" + tag "abricate/run" + + test("bacteroides_fragilis - genome.fa.gz") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("bacteroides_fragilis - genome - stub") { + + options "-stub" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/abricate/run/tests/main.nf.test.snap b/modules/nf-core/abricate/run/tests/main.nf.test.snap new file mode 100644 index 00000000..9f598c4a --- /dev/null +++ b/modules/nf-core/abricate/run/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "bacteroides_fragilis - genome - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,2204fb00277c287f5f3d82e28964aa03" + ], + "report": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,2204fb00277c287f5f3d82e28964aa03" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-19T21:06:27.483697023" + }, + "bacteroides_fragilis - genome": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,cd07e2953b127aed8d09bf1b2b903a1f" + ] + ], + "1": [ + "versions.yml:md5,2204fb00277c287f5f3d82e28964aa03" + ], + "report": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,cd07e2953b127aed8d09bf1b2b903a1f" + ] + ], + "versions": [ + "versions.yml:md5,2204fb00277c287f5f3d82e28964aa03" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-19T21:06:20.036490495" + } +} \ No newline at end of file diff --git a/modules/nf-core/abricate/run/tests/tags.yml b/modules/nf-core/abricate/run/tests/tags.yml new file mode 100644 index 00000000..0a304886 --- /dev/null +++ b/modules/nf-core/abricate/run/tests/tags.yml @@ -0,0 +1,2 @@ +abricate/run: + - modules/nf-core/abricate/run/** diff --git a/modules/nf-core/ampcombi/main.nf b/modules/nf-core/ampcombi/main.nf deleted file mode 100644 index 9cad25cb..00000000 --- a/modules/nf-core/ampcombi/main.nf +++ /dev/null @@ -1,52 +0,0 @@ -process AMPCOMBI { - tag "$meta.id" - label 'process_medium' - - conda "bioconda::ampcombi=0.1.7" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ampcombi:0.1.7--pyhdfd78af_0': - 'biocontainers/ampcombi:0.1.7--pyhdfd78af_0' }" - - input: - tuple val(meta), path(amp_input) - path(faa_input) - path( opt_amp_db ) - - output: - tuple val(meta), path("${meta.id}*") , emit: sample_dir - tuple val(meta), path("${meta.id}/*diamond_matches.txt") , emit: txt - tuple val(meta), path("${meta.id}/*ampcombi.csv") , emit: csv - tuple val(meta), path("${meta.id}/*amp.faa") , emit: faa - tuple val(meta), path("AMPcombi_summary.csv") , optional:true, emit: summary_csv - tuple val(meta), path("AMPcombi_summary.html") , optional:true, emit: summary_html - tuple val(meta), path("*.log") , optional:true, emit: log - tuple val(meta), path("*/amp_ref_database") , optional:true, emit: results_db - tuple val(meta), path("*/amp_ref_database/*.dmnd") , optional:true, emit: results_db_dmnd - tuple val(meta), path("*/amp_ref_database/*.clean.fasta") , optional:true, emit: results_db_fasta - tuple val(meta), path("*/amp_ref_database/*.tsv") , optional:true, emit: results_db_tsv - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def db = opt_amp_db? "--amp_database $opt_amp_db": "" - def faa = faa_input.isDirectory() ? "--faa ${faa_input}/" : "--faa ${faa_input}" - """ - ampcombi \\ - $args \\ - --path_list '${amp_input.collect{"$it"}.join("' '")}' \\ - --sample_list ${prefix} \\ - --log True \\ - --threads ${task.cpus} \\ - ${db} \\ - ${faa} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - ampcombi: \$(ampcombi --version | sed 's/ampcombi //') - END_VERSIONS - """ -} diff --git a/modules/nf-core/ampcombi2/cluster/environment.yml b/modules/nf-core/ampcombi2/cluster/environment.yml new file mode 100644 index 00000000..aa5e5fe4 --- /dev/null +++ b/modules/nf-core/ampcombi2/cluster/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "ampcombi2_cluster" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::ampcombi=0.2.2" diff --git a/modules/nf-core/ampcombi2/cluster/main.nf b/modules/nf-core/ampcombi2/cluster/main.nf new file mode 100644 index 00000000..90495dba --- /dev/null +++ b/modules/nf-core/ampcombi2/cluster/main.nf @@ -0,0 +1,48 @@ +process AMPCOMBI2_CLUSTER { + tag 'ampcombi2' + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ampcombi:0.2.2--pyhdfd78af_0': + 'biocontainers/ampcombi:0.2.2--pyhdfd78af_0' }" + + input: + path(summary_file) + + output: + path("Ampcombi_summary_cluster.tsv") , emit: cluster_tsv + path("Ampcombi_summary_cluster_representative_seq.tsv"), emit: rep_cluster_tsv + path("Ampcombi_cluster.log") , emit: log, optional:true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + ampcombi cluster \\ + --ampcombi_summary ${summary_file} \\ + $args \\ + --threads ${task.cpus} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ampcombi: \$(ampcombi --version | sed 's/ampcombi //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + """ + touch Ampcombi_summary_cluster.tsv + touch Ampcombi_summary_cluster_representative_seq.tsv + touch Ampcombi_cluster.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ampcombi: \$(ampcombi --version | sed 's/ampcombi //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/ampcombi2/cluster/meta.yml b/modules/nf-core/ampcombi2/cluster/meta.yml new file mode 100644 index 00000000..60949dc3 --- /dev/null +++ b/modules/nf-core/ampcombi2/cluster/meta.yml @@ -0,0 +1,48 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "ampcombi2_cluster" +description: A submodule that clusters the merged AMP hits generated from ampcombi2/parsetables and ampcombi2/complete using MMseqs2 cluster. +keywords: + - antimicrobial peptides + - amps + - parsing + - reporting + - align + - clustering + - mmseqs2 +tools: + - ampcombi2/cluster: + description: "A tool for clustering all AMP hits found across many samples and supporting many AMP prediction tools." + homepage: "https://github.com/Darcy220606/AMPcombi" + documentation: "https://github.com/Darcy220606/AMPcombi" + tool_dev_url: "https://github.com/Darcy220606/AMPcombi/tree/dev" + licence: ["MIT"] + +input: + - summary_file: + type: file + description: A file corresponding to the Ampcombi_summary.tsv that is generated by running 'ampcombi complete'. It is a file containing all the merged AMP results from all samples and all tools. + pattern: "*.tsv" + +output: + - cluster_tsv: + type: file + description: A file containing all the results from the merged input table 'Ampcombi_summary.tsv', but also including the cluster id number. The clustering is done using MMseqs2 cluster. + pattern: "*.tsv" + - rep_cluster_tsv: + type: file + description: A file containing the representative sequences of the clusters estimated by the tool. The clustering is done using MMseqs2 cluster. + pattern: "*.tsv" + - log: + type: file + description: A log file that captures the standard output for the entire process in a log file. Can be activated by `--log`. + pattern: "*.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@darcy220606" +maintainers: + - "@darcy220606" diff --git a/modules/nf-core/ampcombi2/cluster/tests/main.nf.test b/modules/nf-core/ampcombi2/cluster/tests/main.nf.test new file mode 100644 index 00000000..49bee6cf --- /dev/null +++ b/modules/nf-core/ampcombi2/cluster/tests/main.nf.test @@ -0,0 +1,65 @@ +nextflow_process { + + name "Test Process AMPCOMBI2_CLUSTER" + script "../main.nf" + process "AMPCOMBI2_CLUSTER" + + tag "modules" + tag "modules_nfcore" + tag "ampcombi2" + tag "ampcombi2/cluster" + tag "ampcombi2/complete" + + setup { + run("AMPCOMBI2_COMPLETE") { + script "../../../ampcombi2/complete/main.nf" + process { + """ + input[0] = + [ + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/ampcombi/ampcombi2/sample_1_ampcombi.tsv', checkIfExists: true), + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/ampcombi/ampcombi2/sample_2_ampcombi.tsv', checkIfExists: true) + ] + """ + } + } + } + + test("ampcombi2_cluster - metagenome") { + when { + process { + """ + input[0] = AMPCOMBI2_COMPLETE.out.tsv + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.cluster_tsv[0]).readLines()[0].contains("Linear/Cyclic/Branched"), + file(process.out.rep_cluster_tsv[0]).readLines()[0].contains("total_cluster_members"), + process.out.versions).match() } + ) + } + } + + test("ampcombi2_cluster - metagenome - stub") { + options "-stub" + when { + process { + """ + input[0] = AMPCOMBI2_COMPLETE.out.tsv + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/ampcombi2/cluster/tests/main.nf.test.snap b/modules/nf-core/ampcombi2/cluster/tests/main.nf.test.snap new file mode 100644 index 00000000..f4123c76 --- /dev/null +++ b/modules/nf-core/ampcombi2/cluster/tests/main.nf.test.snap @@ -0,0 +1,51 @@ +{ + "ampcombi2_cluster - metagenome": { + "content": [ + true, + true, + [ + "versions.yml:md5,4e9aa3812bfee6ec22a1b6ccb62de2ca" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-24T12:11:40.928513749" + }, + "ampcombi2_cluster - metagenome - stub": { + "content": [ + { + "0": [ + "Ampcombi_summary_cluster.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "1": [ + "Ampcombi_summary_cluster_representative_seq.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "2": [ + "Ampcombi_cluster.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "3": [ + "versions.yml:md5,4e9aa3812bfee6ec22a1b6ccb62de2ca" + ], + "cluster_tsv": [ + "Ampcombi_summary_cluster.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "log": [ + "Ampcombi_cluster.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "rep_cluster_tsv": [ + "Ampcombi_summary_cluster_representative_seq.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "versions": [ + "versions.yml:md5,4e9aa3812bfee6ec22a1b6ccb62de2ca" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-24T12:12:08.780718892" + } +} \ No newline at end of file diff --git a/modules/nf-core/ampcombi2/cluster/tests/tags.yml b/modules/nf-core/ampcombi2/cluster/tests/tags.yml new file mode 100644 index 00000000..783f4d52 --- /dev/null +++ b/modules/nf-core/ampcombi2/cluster/tests/tags.yml @@ -0,0 +1,2 @@ +ampcombi2/cluster: + - "modules/nf-core/ampcombi2/cluster/**" diff --git a/modules/nf-core/ampcombi2/complete/environment.yml b/modules/nf-core/ampcombi2/complete/environment.yml new file mode 100644 index 00000000..fa640b77 --- /dev/null +++ b/modules/nf-core/ampcombi2/complete/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "ampcombi2_complete" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::ampcombi=0.2.2" diff --git a/modules/nf-core/ampcombi2/complete/main.nf b/modules/nf-core/ampcombi2/complete/main.nf new file mode 100644 index 00000000..0e4d5d53 --- /dev/null +++ b/modules/nf-core/ampcombi2/complete/main.nf @@ -0,0 +1,44 @@ +process AMPCOMBI2_COMPLETE { + tag "ampcombi2" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ampcombi:0.2.2--pyhdfd78af_0': + 'biocontainers/ampcombi:0.2.2--pyhdfd78af_0' }" + + input: + path(summaries) + + output: + path("Ampcombi_summary.tsv") , emit: tsv + path("Ampcombi_complete.log"), emit: log, optional:true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + ampcombi complete \\ + --summaries_files '${summaries.collect{"$it"}.join("' '")}' \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ampcombi: \$(ampcombi --version | sed 's/ampcombi //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + """ + touch Ampcombi_summary.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ampcombi: \$(ampcombi --version | sed 's/ampcombi //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/ampcombi2/complete/meta.yml b/modules/nf-core/ampcombi2/complete/meta.yml new file mode 100644 index 00000000..e9ae632c --- /dev/null +++ b/modules/nf-core/ampcombi2/complete/meta.yml @@ -0,0 +1,50 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "ampcombi2_complete" +description: A submodule that merges all output summary tables from ampcombi/parsetables in one summary file. +keywords: + - antimicrobial peptides + - amps + - parsing + - reporting + - align + - macrel + - amplify + - hmmsearch + - neubi + - ampir + - ampgram + - amptransformer + - DRAMP +tools: + - ampcombi2/complete: + description: "This merges the per sample AMPcombi summaries generated by running 'ampcombi2/parsetables'." + homepage: "https://github.com/Darcy220606/AMPcombi" + documentation: "https://github.com/Darcy220606/AMPcombi" + tool_dev_url: "https://github.com/Darcy220606/AMPcombi/tree/dev" + licence: ["MIT"] + +input: + - summaries: + type: list + description: The path to the list of files corresponding to each sample as generated by ampcombi2/parsetables. + pattern: "[*_ampcombi.tsv, *_ampcombi.tsv]" + +output: + - tsv: + type: file + description: A file containing the complete AMPcombi summaries from all processed samples. + pattern: "*.tsv" + - log: + type: file + description: A log file that captures the standard output for the entire process in a log file. Can be activated by `--log`. + pattern: "*.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@darcy220606" +maintainers: + - "@darcy220606" diff --git a/modules/nf-core/ampcombi2/complete/tests/main.nf.test b/modules/nf-core/ampcombi2/complete/tests/main.nf.test new file mode 100644 index 00000000..176d975f --- /dev/null +++ b/modules/nf-core/ampcombi2/complete/tests/main.nf.test @@ -0,0 +1,56 @@ +nextflow_process { + + name "Test Process AMPCOMBI2_COMPLETE" + script "../main.nf" + process "AMPCOMBI2_COMPLETE" + + tag "modules" + tag "modules_nfcore" + tag "ampcombi2" + tag "ampcombi2/complete" + + test("ampcombi2_complete - contigs") { + when { + process { + """ + input[0] = + [ + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/ampcombi/ampcombi2/sample_1_ampcombi.tsv', checkIfExists: true), + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/ampcombi/ampcombi2/sample_2_ampcombi.tsv', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.tsv[0]).readLines()[0].contains("ampir"), + process.out.versions).match() } + ) + } + } + + test("ampcombi2_complete - contigs - stub") { + options "-stub" + when { + process { + """ + input[0] = + [ + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/ampcombi/ampcombi2/sample_1_ampcombi.tsv', checkIfExists: true), + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/ampcombi/ampcombi2/sample_2_ampcombi.tsv', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/ampcombi2/complete/tests/main.nf.test.snap b/modules/nf-core/ampcombi2/complete/tests/main.nf.test.snap new file mode 100644 index 00000000..cd8fa18f --- /dev/null +++ b/modules/nf-core/ampcombi2/complete/tests/main.nf.test.snap @@ -0,0 +1,44 @@ +{ + "ampcombi2_complete - contigs - stub": { + "content": [ + { + "0": [ + "Ampcombi_summary.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,0aa35e86761a6c160482b8b8dbfc5440" + ], + "log": [ + + ], + "tsv": [ + "Ampcombi_summary.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "versions": [ + "versions.yml:md5,0aa35e86761a6c160482b8b8dbfc5440" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-29T11:55:16.030399422" + }, + "ampcombi2_complete - contigs": { + "content": [ + true, + [ + "versions.yml:md5,0aa35e86761a6c160482b8b8dbfc5440" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-29T11:54:54.334224301" + } +} \ No newline at end of file diff --git a/modules/nf-core/ampcombi2/complete/tests/tags.yml b/modules/nf-core/ampcombi2/complete/tests/tags.yml new file mode 100644 index 00000000..f8ac5fee --- /dev/null +++ b/modules/nf-core/ampcombi2/complete/tests/tags.yml @@ -0,0 +1,2 @@ +ampcombi2/complete: + - "modules/nf-core/ampcombi2/complete/**" diff --git a/modules/nf-core/ampcombi2/parsetables/environment.yml b/modules/nf-core/ampcombi2/parsetables/environment.yml new file mode 100644 index 00000000..7a4b37ab --- /dev/null +++ b/modules/nf-core/ampcombi2/parsetables/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "ampcombi2_parsetables" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::ampcombi=0.2.2" diff --git a/modules/nf-core/ampcombi2/parsetables/main.nf b/modules/nf-core/ampcombi2/parsetables/main.nf new file mode 100644 index 00000000..d779440b --- /dev/null +++ b/modules/nf-core/ampcombi2/parsetables/main.nf @@ -0,0 +1,76 @@ +process AMPCOMBI2_PARSETABLES { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ampcombi:0.2.2--pyhdfd78af_0': + 'biocontainers/ampcombi:0.2.2--pyhdfd78af_0' }" + + input: + tuple val(meta), path(amp_input) + path(faa_input) + path(gbk_input) + path(opt_amp_db) + + output: + tuple val(meta), path("${meta.id}/") , emit: sample_dir + tuple val(meta), path("${meta.id}/contig_gbks/") , emit: contig_gbks + tuple val(meta), path("${meta.id}/${meta.id}_diamond_matches.txt"), emit: txt + tuple val(meta), path("${meta.id}/${meta.id}_ampcombi.tsv") , emit: tsv + tuple val(meta), path("${meta.id}/${meta.id}_amp.faa") , emit: faa + tuple val(meta), path("${meta.id}/${meta.id}_ampcombi.log") , emit: sample_log, optional:true + tuple val(meta), path("Ampcombi_parse_tables.log") , emit: full_log, optional:true + tuple val(meta), path("amp_ref_database/") , emit: results_db, optional:true + tuple val(meta), path("amp_ref_database/*.dmnd") , emit: results_db_dmnd, optional:true + tuple val(meta), path("amp_ref_database/*.clean.fasta") , emit: results_db_fasta, optional:true + tuple val(meta), path("amp_ref_database/*.tsv") , emit: results_db_tsv, optional:true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def db = opt_amp_db? "--amp_database $opt_amp_db": "" + """ + ampcombi parse_tables \\ + --path_list '${amp_input.collect{"$it"}.join("' '")}' \\ + --faa ${faa_input} \\ + --gbk ${gbk_input} \\ + --sample_list ${prefix} \\ + ${db} \\ + $args \\ + --threads ${task.cpus} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ampcombi: \$(ampcombi --version | sed 's/ampcombi //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def db = opt_amp_db? "--amp_database $opt_amp_db": "" + """ + mkdir -p ${prefix} + mkdir -p ${prefix}/contig_gbks + touch ${prefix}/${meta.id}_diamond_matches.txt + touch ${prefix}/${meta.id}_ampcombi.tsv + touch ${prefix}/${meta.id}_amp.faa + touch ${prefix}/${meta.id}_ampcombi.log + touch Ampcombi_parse_tables.log + + mkdir -p amp_ref_database + touch amp_ref_database/*.dmnd + touch amp_ref_database/*.clean.fasta + touch amp_ref_database/*.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ampcombi: \$(ampcombi --version | sed 's/ampcombi //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/ampcombi/meta.yml b/modules/nf-core/ampcombi2/parsetables/meta.yml similarity index 55% rename from modules/nf-core/ampcombi/meta.yml rename to modules/nf-core/ampcombi2/parsetables/meta.yml index 83e7b83f..eeea5586 100644 --- a/modules/nf-core/ampcombi/meta.yml +++ b/modules/nf-core/ampcombi2/parsetables/meta.yml @@ -1,5 +1,7 @@ -name: ampcombi -description: A tool to parse and summarise results from antimicrobial peptides tools and present functional classification. +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "ampcombi2_parsetables" +description: A submodule that parses and standardizes the results from various antimicrobial peptide identification tools. keywords: - antimicrobial peptides - amps @@ -11,89 +13,95 @@ keywords: - hmmsearch - neubi - ampir + - ampgram + - amptransformer - DRAMP tools: - - ampcombi: - description: "This tool parses the results of amp prediction tools into a single table and aligns the hits against a reference database of antimicrobial peptides for functional classifications." + - ampcombi2/parsetables: + description: "A parsing tool to convert and summarise the outputs from multiple AMP detection tools in a standardized format." homepage: "https://github.com/Darcy220606/AMPcombi" documentation: "https://github.com/Darcy220606/AMPcombi" tool_dev_url: "https://github.com/Darcy220606/AMPcombi/tree/dev" - - licence: "['MIT']" + licence: ["MIT"] input: - meta: type: map description: | Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] + e.g. `[ id:'sample1', single_end:false ]` - amp_input: - type: folder or filelist - description: The path to the directory containing the results for the AMP tools for each sample processed or a list of files corresponding to each file generated by AMP tools. + type: list + description: The path to the directory containing the results for the AMP tools for each processed sample or a list of files corresponding to each file generated by AMP tools. + pattern: "[*amptool.tsv, *amptool.tsv]" - faa_input: - type: folder or file - description: The path to the folder or file corresponding to the respective protein fasta files with '.faa' extension. Filenames have to contain the corresponding sample-name, i.e. sample_1.faa - pattern: "*/" - - amp_database: + type: file + description: The path to the file corresponding to the respective protein fasta files with '.faa' extension. File names have to contain the corresponding sample name, i.e. sample_1.faa + pattern: "*.faa" + - gbk_input: + type: file + description: The path to the file corresponding to the respective annotated files with either '.gbk' or '.gbff' extensions. File names must contain the corresponding sample name, i.e. sample_1.faa where "sample_1" is the sample name. + pattern: "*.gbk" + - opt_amp_db: type: directory description: The path to the folder containing the fasta and tsv database files. pattern: "*/" - output: - meta: type: map description: | Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + e.g. `[ id:'sample1', single_end:false ]` + - sample_dir: + type: directory + description: The output directory that contains the summary output and related alignment files for one sample. + pattern: "/*" + - contig_gbks: + type: directory + description: The output subdirectory that contains the gbk files containing the AMP hits for each sample. + pattern: "/*/contig_gbks" - txt: type: file description: An alignment file containing the results from the DIAMOND alignment step done on all AMP hits. pattern: "/*/*_diamond_matches.txt" - - csv: + - tsv: type: file - description: A file containing the summary report of all predicted AMP hits from all AMP tools given as input and the corresponding taxonomic and functional classification from the alignment step. - pattern: "/*/*_ampcombi.csv" + description: A file containing the summary report of all predicted AMP hits from all AMP tools given as input, the corresponding taxonomic and functional classification from the alignment step and the estimated physiochemical properties. + pattern: "/*/*_ampcombi.tsv" - faa: type: file description: A fasta file containing the amino acid sequences of all predicted AMP hits. pattern: "/*/*_amp.faa" - - log: + - sample_log: type: file - description: A log file that captures the standard output ina log file. Can be activated by `--log`. - pattern: "*.log" - - sample_dir: - type: directory - description: The output directory that contains the summary output and related alignment files for one sample. - pattern: "/*" + description: A log file that captures the standard output per sample in a log file. Can be activated by `--log`. + pattern: "/*/*.log" + - full_log: + type: file + description: A log file that captures the standard output for the entire process in a log file. Can be activated by `--log`. + pattern: "Ampcombi_parse_tables.log" - results_db: type: directory description: If the AMP reference database is not provided by the user using the flag `--amp_database', by default the DRAMP database will be downloaded, filtered and stored in this folder. pattern: "/amp_ref_database" + - results_db_dmnd: + type: file + description: AMP reference database converted to DIAMOND database format. + pattern: "/amp_ref_database/*.dmnd" - results_db_fasta: type: file description: AMP reference database fasta file, cleaned of diamond-uncompatible characters. pattern: "/amp_ref_database/*.clean.fasta" - - results_db_dmd: - type: file - description: AMP reference database converted to DIAMOND database format. - pattern: "/amp_ref_database/*.dmnd" - results_db_tsv: type: file description: AMP reference database in tsv-format with two columns containing header and sequence. pattern: "/amp_ref_database/*.tsv" - - summary_csv: - type: file - description: A file that concatenates all samples ampcombi summaries. This is activated with `--complete_summary true`. - pattern: "AMPcombi_summary.html" - - summary_html: + - versions: type: file - description: A file that concatenates all samples ampcombi summaries. This is activated with `--complete_summary true`. - pattern: "AMPcombi_summary.html" + description: File containing software versions + pattern: "versions.yml" authors: - "@darcy220606" - - "@louperelo" +maintainers: + - "@darcy220606" diff --git a/modules/nf-core/ampcombi2/parsetables/tests/main.nf.test b/modules/nf-core/ampcombi2/parsetables/tests/main.nf.test new file mode 100644 index 00000000..2d775179 --- /dev/null +++ b/modules/nf-core/ampcombi2/parsetables/tests/main.nf.test @@ -0,0 +1,82 @@ +nextflow_process { + + name "Test Process AMPCOMBI2_PARSETABLES" + script "../main.nf" + process "AMPCOMBI2_PARSETABLES" + tag "modules" + tag "modules_nfcore" + tag "antimicrobial peptides" + tag "ampcombi2" + tag "ampcombi2/parsetables" + + config "./nextflow.config" + + test("ampcombi2_parsetables - metagenome") { + when { + process { + """ + amp_input = [ + [id:'sample_1'], + [ + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/ampcombi/test_files_0.2/ampir/sample_1/sample_1.ampir.tsv', checkIfExists: true), + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/ampcombi/test_files_0.2/amplify/sample_1/sample_1.amplify.tsv', checkIfExists: true) + ] + ] + faa_input = file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/ampcombi/test_faa_0.2/sample_1.faa', checkIfExists: true) + gbk_input = file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/ampcombi/test_gbk_0.2/sample_1.gbff', checkIfExists: true) + + input[0] = amp_input + input[1] = faa_input + input[2] = gbk_input + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.sample_dir.collect { file(it[1]).getName() } + + process.out.results_db.collect { file(it[1]).getName() } + + process.out.contig_gbks.collect { file(it[1]).getName() } + + process.out.full_log.collect { file(it[1]).readLines().contains("<--AMP_database>") } + + process.out.sample_log.collect { file(it[1]).readLines().contains("found ampir file") } + + process.out.txt.collect { file(it[1]).readLines()[0] } + + process.out.tsv.collect { file(it[1]).readLines()[0] } + + process.out.faa.collect { file(it[1]).readLines()[0] } + + process.out.summary_csv.collect { file(it[1]).readLines().contains("Structure_Description") } + + process.out.versions ).match() } + ) + } + } + test("ampcombi2_parsetables - metagenome - stub") { + options "-stub" + when { + process { + """ + amp_input = [ + [id:'sample_1'], + [ + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/ampcombi/test_files_0.2/ampir/sample_1/sample_1.ampir.tsv', checkIfExists: true), + file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/ampcombi/test_files_0.2/amplify/sample_1/sample_1.amplify.tsv', checkIfExists: true) + ] + ] + faa_input = file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/ampcombi/test_faa_0.2/sample_1.faa', checkIfExists: true) + gbk_input = file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/ampcombi/test_gbk_0.2/sample_1.gbff', checkIfExists: true) + + input[0] = amp_input + input[1] = faa_input + input[2] = gbk_input + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/ampcombi2/parsetables/tests/main.nf.test.snap b/modules/nf-core/ampcombi2/parsetables/tests/main.nf.test.snap new file mode 100644 index 00000000..54faf69f --- /dev/null +++ b/modules/nf-core/ampcombi2/parsetables/tests/main.nf.test.snap @@ -0,0 +1,243 @@ +{ + "ampcombi2_parsetables - metagenome": { + "content": [ + [ + "sample_1", + "amp_ref_database", + "contig_gbks", + false, + true, + "contig_id\ttarget_id\tpident\tevalue\tnident\tfull_qseq\tfull_sseq\tqseq\tsseq\tqcovhsp\tscovhsp", + "sample_id\tCDS_id\tprob_ampir\tprob_amplify\taa_sequence\ttarget_id\tpident\tevalue\tSequence\tFamily\tSource\tPDB_ID\tLinear/Cyclic/Branched\tOther_Modifications\tPubmed_ID\tReference\tmolecular_weight\thelix_fraction\tturn_fraction\tsheet_fraction\tisoelectric_point\thydrophobicity\ttransporter_protein\tcontig_id\tCDS_start\tCDS_end\tCDS_dir\tCDS_stop_codon_found", + ">BAONEE_00005", + "versions.yml:md5,f32ab4ba79e66feba755b78d7d7a1f36" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-24T12:05:11.848363584" + }, + "ampcombi2_parsetables - metagenome - stub": { + "content": [ + { + "0": [ + [ + { + "id": "sample_1" + }, + [ + [ + + ], + "sample_1_amp.faa:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_1_ampcombi.log:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_1_ampcombi.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_1_diamond_matches.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + { + "id": "sample_1" + }, + [ + + ] + ] + ], + "10": [ + [ + { + "id": "sample_1" + }, + "*.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "11": [ + "versions.yml:md5,f32ab4ba79e66feba755b78d7d7a1f36" + ], + "2": [ + [ + { + "id": "sample_1" + }, + "sample_1_diamond_matches.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "sample_1" + }, + "sample_1_ampcombi.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "sample_1" + }, + "sample_1_amp.faa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "sample_1" + }, + "sample_1_ampcombi.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "sample_1" + }, + "Ampcombi_parse_tables.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "sample_1" + }, + [ + "*.clean.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", + "*.dmnd:md5,d41d8cd98f00b204e9800998ecf8427e", + "*.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "8": [ + [ + { + "id": "sample_1" + }, + "*.dmnd:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "sample_1" + }, + "*.clean.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "contig_gbks": [ + [ + { + "id": "sample_1" + }, + [ + + ] + ] + ], + "faa": [ + [ + { + "id": "sample_1" + }, + "sample_1_amp.faa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "full_log": [ + [ + { + "id": "sample_1" + }, + "Ampcombi_parse_tables.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "results_db": [ + [ + { + "id": "sample_1" + }, + [ + "*.clean.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", + "*.dmnd:md5,d41d8cd98f00b204e9800998ecf8427e", + "*.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "results_db_dmnd": [ + [ + { + "id": "sample_1" + }, + "*.dmnd:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "results_db_fasta": [ + [ + { + "id": "sample_1" + }, + "*.clean.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "results_db_tsv": [ + [ + { + "id": "sample_1" + }, + "*.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sample_dir": [ + [ + { + "id": "sample_1" + }, + [ + [ + + ], + "sample_1_amp.faa:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_1_ampcombi.log:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_1_ampcombi.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_1_diamond_matches.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "sample_log": [ + [ + { + "id": "sample_1" + }, + "sample_1_ampcombi.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tsv": [ + [ + { + "id": "sample_1" + }, + "sample_1_ampcombi.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "txt": [ + [ + { + "id": "sample_1" + }, + "sample_1_diamond_matches.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,f32ab4ba79e66feba755b78d7d7a1f36" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-24T12:05:34.675308615" + } +} \ No newline at end of file diff --git a/modules/nf-core/ampcombi2/parsetables/tests/nextflow.config b/modules/nf-core/ampcombi2/parsetables/tests/nextflow.config new file mode 100644 index 00000000..d39b0509 --- /dev/null +++ b/modules/nf-core/ampcombi2/parsetables/tests/nextflow.config @@ -0,0 +1,21 @@ +process { + + withName: AMPCOMBI2_PARSETABLES { + + ext.args = [ + "--aminoacid_length 2000", + "--db_evalue 2000", + "--ampir_file 'ampir.tsv'", + "--amplify_file 'amplify.tsv'", + "--macrel_file '.prediction'", + "--neubi_file '.fasta'", + "--hmmsearch_file 'candidates.txt'", + "--ampgram_file '.tsv'", + "--amptransformer_file '.txt'", + "--log true" + ].join(' ') + + ext.prefix = "sample_1" + + } +} diff --git a/modules/nf-core/ampcombi2/parsetables/tests/tags.yml b/modules/nf-core/ampcombi2/parsetables/tests/tags.yml new file mode 100644 index 00000000..b56b0468 --- /dev/null +++ b/modules/nf-core/ampcombi2/parsetables/tests/tags.yml @@ -0,0 +1,2 @@ +ampcombi2/parsetables: + - "modules/nf-core/ampcombi2/parsetables/**" diff --git a/modules/nf-core/ampir/environment.yml b/modules/nf-core/ampir/environment.yml new file mode 100644 index 00000000..8cb475d1 --- /dev/null +++ b/modules/nf-core/ampir/environment.yml @@ -0,0 +1,7 @@ +name: ampir +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::r-ampir=1.1.0 diff --git a/modules/nf-core/ampir/main.nf b/modules/nf-core/ampir/main.nf index 50ef1e2e..4a899fdd 100644 --- a/modules/nf-core/ampir/main.nf +++ b/modules/nf-core/ampir/main.nf @@ -2,7 +2,7 @@ process AMPIR { tag "$meta.id" label 'process_single' - conda "conda-forge::r-ampir=1.1.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/r-ampir:1.1.0': 'biocontainers/r-ampir:1.1.0' }" @@ -44,4 +44,27 @@ process AMPIR { writeLines(version_ampir, f) close(f) """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if ("$faa" == "${prefix}.faa") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + """ + #!/usr/bin/env Rscript + library(ampir) + + t <- file("${prefix}.tsv", "w") + close(t) + + a <- file("${prefix}.faa", "w") + close(a) + + version_file_path <- "versions.yml" + version_ampir <- paste(unlist(packageVersion("ampir")), collapse = ".") + f <- file(version_file_path, "w") + writeLines('"${task.process}":', f) + writeLines(" ampir: ", f, sep = "") + writeLines(version_ampir, f) + close(f) + """ } diff --git a/modules/nf-core/ampir/meta.yml b/modules/nf-core/ampir/meta.yml index 9e854448..231cec54 100644 --- a/modules/nf-core/ampir/meta.yml +++ b/modules/nf-core/ampir/meta.yml @@ -12,7 +12,6 @@ tools: tool_dev_url: "https://github.com/Legana/ampir" doi: "10.1093/bioinformatics/btaa653" licence: ["GPL v2"] - input: - meta: type: map @@ -32,10 +31,9 @@ input: description: Minimum protein length for which predictions will be generated pattern: "[0-9]+" - min_probability: - type: number + type: float description: Cut-off for AMP prediction pattern: "[0-9].[0-9]+" - output: - meta: type: map @@ -54,6 +52,7 @@ output: type: file description: File containing AMP predictions in TSV format pattern: "*.tsv" - authors: - "@jasmezz" +maintainers: + - "@jasmezz" diff --git a/modules/nf-core/ampir/tests/main.nf.test b/modules/nf-core/ampir/tests/main.nf.test new file mode 100644 index 00000000..0ed40ef5 --- /dev/null +++ b/modules/nf-core/ampir/tests/main.nf.test @@ -0,0 +1,65 @@ +nextflow_process { + + name "Test Process AMPIR" + script "../main.nf" + process "AMPIR" + + tag "modules" + tag "modules_nfcore" + tag "ampir" + + test("candidatus_portiera_aleyrodidarum proteome [fasta]") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/proteome.fasta', checkIfExists: true), + ] + input[1] = "precursor" // model + input[2] = 10 // min_length + input[3] = "0.7" // min_probability + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("candidatus_portiera_aleyrodidarum proteome [fasta] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/proteome.fasta', checkIfExists: true), + ] + input[1] = "precursor" // model + input[2] = 10 // min_length + input[3] = "0.7" // min_probability + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.amps_faa.collect { file(it[1]).getName() } + + process.out.amps_tsv.collect { file(it[1]).getName() } + + process.out.versions).match() } + ) + } + + } + +} diff --git a/modules/nf-core/ampir/tests/main.nf.test.snap b/modules/nf-core/ampir/tests/main.nf.test.snap new file mode 100644 index 00000000..77f1b9ec --- /dev/null +++ b/modules/nf-core/ampir/tests/main.nf.test.snap @@ -0,0 +1,61 @@ +{ + "sarscov2 - bam - stub": { + "content": [ + [ + "test.faa", + "test.tsv", + "versions.yml:md5,f8d5026ccdd8f72c7ac1b5e4670aab49" + ] + ], + "timestamp": "2023-12-26T18:19:18.308141504" + }, + "sarscov2 - bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.faa:md5,0435609144022c55ac196db053f0df89" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,70a70e8698e8d367707f4b1833e3168c" + ] + ], + "2": [ + "versions.yml:md5,f8d5026ccdd8f72c7ac1b5e4670aab49" + ], + "amps_faa": [ + [ + { + "id": "test", + "single_end": false + }, + "test.faa:md5,0435609144022c55ac196db053f0df89" + ] + ], + "amps_tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,70a70e8698e8d367707f4b1833e3168c" + ] + ], + "versions": [ + "versions.yml:md5,f8d5026ccdd8f72c7ac1b5e4670aab49" + ] + } + ], + "timestamp": "2023-12-26T18:18:57.151185866" + } +} \ No newline at end of file diff --git a/modules/nf-core/ampir/tests/tags.yml b/modules/nf-core/ampir/tests/tags.yml new file mode 100644 index 00000000..5ceace2d --- /dev/null +++ b/modules/nf-core/ampir/tests/tags.yml @@ -0,0 +1,2 @@ +ampir: + - "modules/nf-core/ampir/**" diff --git a/modules/nf-core/amplify/predict/environment.yml b/modules/nf-core/amplify/predict/environment.yml new file mode 100644 index 00000000..c980cf5e --- /dev/null +++ b/modules/nf-core/amplify/predict/environment.yml @@ -0,0 +1,7 @@ +name: amplify_predict +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::amplify=2.0.0 diff --git a/modules/nf-core/amplify/predict/main.nf b/modules/nf-core/amplify/predict/main.nf index be5863f1..26108da7 100644 --- a/modules/nf-core/amplify/predict/main.nf +++ b/modules/nf-core/amplify/predict/main.nf @@ -3,10 +3,10 @@ process AMPLIFY_PREDICT { label 'process_single' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - conda "bioconda::amplify=1.1.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/amplify:1.1.0--hdfd78af_0': - 'biocontainers/amplify:1.1.0--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/amplify:2.0.0--py36hdfd78af_1': + 'biocontainers/amplify:2.0.0--py36hdfd78af_1' }" input: tuple val(meta), path(faa) @@ -37,4 +37,15 @@ process AMPLIFY_PREDICT { AMPlify: \$(AMPlify --help | grep 'AMPlify v' | sed -e "s/^.*AMPlify v//") END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + AMPlify: \$(AMPlify --help | grep 'AMPlify v' | sed -e "s/^.*AMPlify v//") + END_VERSIONS + """ } diff --git a/modules/nf-core/amplify/predict/meta.yml b/modules/nf-core/amplify/predict/meta.yml index e0112a24..5ef93c83 100644 --- a/modules/nf-core/amplify/predict/meta.yml +++ b/modules/nf-core/amplify/predict/meta.yml @@ -12,8 +12,7 @@ tools: documentation: "https://github.com/bcgsc/AMPlify" tool_dev_url: "https://github.com/bcgsc/AMPlify" doi: "10.1186/s12864-022-08310-4" - licence: "['GPL v3']" - + licence: ["GPL v3"] input: - meta: type: map @@ -27,7 +26,6 @@ input: - model_dir: type: directory description: Directory of where models are stored (optional) - output: - meta: type: map @@ -42,6 +40,7 @@ output: type: file description: amino acid sequences with prediction (AMP, non-AMP) and probability scores pattern: "*.{tsv}" - authors: - "@louperelo" +maintainers: + - "@louperelo" diff --git a/modules/nf-core/amplify/predict/tests/main.nf.test b/modules/nf-core/amplify/predict/tests/main.nf.test new file mode 100644 index 00000000..835c409c --- /dev/null +++ b/modules/nf-core/amplify/predict/tests/main.nf.test @@ -0,0 +1,82 @@ +nextflow_process { + + name "Test Process AMPLIFY_PREDICT" + script "../main.nf" + process "AMPLIFY_PREDICT" + + tag "modules" + tag "modules_nfcore" + tag "amplify" + tag "amplify/predict" + tag "prodigal" + tag "gunzip" + + test("AMPlify predict (with Prodigal) - sarscov2 - contigs.fasta") { + + setup { + run("PRODIGAL") { + script "../../../prodigal/main.nf" + process { + """ + input[0] = Channel.fromList([ + tuple([ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true)) + ]) + input[1] = "gbk" + """ + } + } + run("GUNZIP") { + script "../../../gunzip/main.nf" + process { + """ + input[0] = PRODIGAL.out.amino_acid_fasta + + """ + } + } + } + + when { + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("AMPlify predict - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.fromList([ + tuple([ id:'test', single_end:false ], // meta map + file("test")) + ]) + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/amplify/predict/tests/main.nf.test.snap b/modules/nf-core/amplify/predict/tests/main.nf.test.snap new file mode 100644 index 00000000..d70e80eb --- /dev/null +++ b/modules/nf-core/amplify/predict/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "AMPlify predict (with Prodigal) - sarscov2 - contigs.fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,16927b54e09b999e96e4cbecb522d17c" + ], + "tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,16927b54e09b999e96e4cbecb522d17c" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-05T12:58:56.67316521" + }, + "AMPlify predict (with Prodigal) - sarscov2 - contigs.fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,1951084ce1d410028be86754997e5852" + ] + ], + "1": [ + "versions.yml:md5,16927b54e09b999e96e4cbecb522d17c" + ], + "tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,1951084ce1d410028be86754997e5852" + ] + ], + "versions": [ + "versions.yml:md5,16927b54e09b999e96e4cbecb522d17c" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-05T12:58:49.894554665" + } +} \ No newline at end of file diff --git a/modules/nf-core/amplify/predict/tests/tags.yml b/modules/nf-core/amplify/predict/tests/tags.yml new file mode 100644 index 00000000..592eb7bc --- /dev/null +++ b/modules/nf-core/amplify/predict/tests/tags.yml @@ -0,0 +1,2 @@ +amplify/predict: + - "modules/nf-core/amplify/predict/**" diff --git a/modules/nf-core/amrfinderplus/run/environment.yml b/modules/nf-core/amrfinderplus/run/environment.yml new file mode 100644 index 00000000..214f44f4 --- /dev/null +++ b/modules/nf-core/amrfinderplus/run/environment.yml @@ -0,0 +1,7 @@ +name: amrfinderplus_run +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::ncbi-amrfinderplus=3.12.8 diff --git a/modules/nf-core/amrfinderplus/run/main.nf b/modules/nf-core/amrfinderplus/run/main.nf index f178b68f..046ba262 100644 --- a/modules/nf-core/amrfinderplus/run/main.nf +++ b/modules/nf-core/amrfinderplus/run/main.nf @@ -2,10 +2,10 @@ process AMRFINDERPLUS_RUN { tag "$meta.id" label 'process_medium' - conda "bioconda::ncbi-amrfinderplus=3.11.18" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ncbi-amrfinderplus:3.11.18--h283d18e_0': - 'biocontainers/ncbi-amrfinderplus:3.11.18--h283d18e_0' }" + 'https://depot.galaxyproject.org/singularity/ncbi-amrfinderplus:3.12.8--h283d18e_0': + 'biocontainers/ncbi-amrfinderplus:3.12.8--h283d18e_0' }" input: tuple val(meta), path(fasta) @@ -62,4 +62,19 @@ process AMRFINDERPLUS_RUN { amrfinderplus-database: \$(echo \$(echo \$(amrfinder --database amrfinderdb --database_version 2> stdout) | rev | cut -f 1 -d ' ' | rev)) END_VERSIONS """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.tsv + + VER=\$(amrfinder --version) + DBVER=stub_version + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + amrfinderplus: \$(amrfinder --version) + amrfinderplus-database: stub_version + END_VERSIONS + """ } diff --git a/modules/nf-core/amrfinderplus/run/meta.yml b/modules/nf-core/amrfinderplus/run/meta.yml index 40359784..465927df 100644 --- a/modules/nf-core/amrfinderplus/run/meta.yml +++ b/modules/nf-core/amrfinderplus/run/meta.yml @@ -12,7 +12,6 @@ tools: tool_dev_url: https://github.com/ncbi/amr doi: "10.1038/s41598-021-91456-0" licence: ["Public Domain"] - input: - meta: type: map @@ -27,7 +26,6 @@ input: type: file description: A compressed tarball of the AMRFinderPlus database to query pattern: "*.tar.gz" - output: - meta: type: map @@ -52,8 +50,11 @@ output: - db_version: type: string description: The version of the used database in string format (useful for downstream tools such as hAMRronization) - authors: - "@rpetit3" - "@louperelo" - "@jfy133" +maintainers: + - "@rpetit3" + - "@louperelo" + - "@jfy133" diff --git a/modules/nf-core/amrfinderplus/run/tests/main.nf.test b/modules/nf-core/amrfinderplus/run/tests/main.nf.test new file mode 100644 index 00000000..8103bb0f --- /dev/null +++ b/modules/nf-core/amrfinderplus/run/tests/main.nf.test @@ -0,0 +1,69 @@ +nextflow_process { + + name "Test Process AMRFINDERPLUS_RUN" + script "../main.nf" + process "AMRFINDERPLUS_RUN" + + tag "modules" + tag "modules_nfcore" + tag "amrfinderplus" + tag "amrfinderplus/run" + tag "amrfinderplus/update" + + setup { + + run("AMRFINDERPLUS_UPDATE") { + script "modules/nf-core/amrfinderplus/update/main.nf" + process { + """ + """ + } + } + } + + test("amrfinderplus/run - haemophilus_influenzae - genome_fna_gz") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/haemophilus_influenzae/genome/genome.fna.gz', checkIfExists: true) + ] + input[1] = AMRFINDERPLUS_UPDATE.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("amrfinderplus/run - haemophilus_influenzae - genome_fna_gz - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/haemophilus_influenzae/genome/genome.fna.gz', checkIfExists: true) + ] + input[1] = AMRFINDERPLUS_UPDATE.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/amrfinderplus/run/tests/main.nf.test.snap b/modules/nf-core/amrfinderplus/run/tests/main.nf.test.snap new file mode 100644 index 00000000..f1e37cd3 --- /dev/null +++ b/modules/nf-core/amrfinderplus/run/tests/main.nf.test.snap @@ -0,0 +1,108 @@ +{ + "amrfinderplus/run - haemophilus_influenzae - genome_fna_gz - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,38c4420b00b74ca78268341754d6d26f" + ], + "3": [ + "3.12.8" + ], + "4": [ + "stub_version" + ], + "db_version": [ + "stub_version" + ], + "mutation_report": [ + + ], + "report": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tool_version": [ + "3.12.8" + ], + "versions": [ + "versions.yml:md5,38c4420b00b74ca78268341754d6d26f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-27T19:17:59.662186954" + }, + "amrfinderplus/run - haemophilus_influenzae - genome_fna_gz": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,b4d261ace9be7d013c19d1f5c0005bfe" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,1cdc90746febb496e06e63dd936aca9b" + ], + "3": [ + "3.12.8" + ], + "4": [ + "2024-01-31.1" + ], + "db_version": [ + "2024-01-31.1" + ], + "mutation_report": [ + + ], + "report": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,b4d261ace9be7d013c19d1f5c0005bfe" + ] + ], + "tool_version": [ + "3.12.8" + ], + "versions": [ + "versions.yml:md5,1cdc90746febb496e06e63dd936aca9b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-27T19:17:49.927168241" + } +} \ No newline at end of file diff --git a/modules/nf-core/amrfinderplus/run/tests/tags.yml b/modules/nf-core/amrfinderplus/run/tests/tags.yml new file mode 100644 index 00000000..3a5a84a4 --- /dev/null +++ b/modules/nf-core/amrfinderplus/run/tests/tags.yml @@ -0,0 +1,2 @@ +amrfinderplus/run: + - "modules/nf-core/amrfinderplus/run/**" diff --git a/modules/nf-core/amrfinderplus/update/environment.yml b/modules/nf-core/amrfinderplus/update/environment.yml new file mode 100644 index 00000000..d08f0725 --- /dev/null +++ b/modules/nf-core/amrfinderplus/update/environment.yml @@ -0,0 +1,7 @@ +name: amrfinderplus_update +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::ncbi-amrfinderplus=3.12.8 diff --git a/modules/nf-core/amrfinderplus/update/main.nf b/modules/nf-core/amrfinderplus/update/main.nf index bf3c5ef6..619a2a34 100644 --- a/modules/nf-core/amrfinderplus/update/main.nf +++ b/modules/nf-core/amrfinderplus/update/main.nf @@ -2,10 +2,10 @@ process AMRFINDERPLUS_UPDATE { tag "update" label 'process_single' - conda "bioconda::ncbi-amrfinderplus=3.11.18" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ncbi-amrfinderplus:3.11.18--h283d18e_0': - 'biocontainers/ncbi-amrfinderplus:3.11.18--h283d18e_0' }" + 'https://depot.galaxyproject.org/singularity/ncbi-amrfinderplus:3.12.8--h283d18e_0': + 'biocontainers/ncbi-amrfinderplus:3.12.8--h283d18e_0' }" output: path "amrfinderdb.tar.gz", emit: db @@ -15,7 +15,6 @@ process AMRFINDERPLUS_UPDATE { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' """ amrfinder_update -d amrfinderdb tar czvf amrfinderdb.tar.gz -C amrfinderdb/\$(readlink amrfinderdb/latest) ./ @@ -25,4 +24,15 @@ process AMRFINDERPLUS_UPDATE { amrfinderplus: \$(amrfinder --version) END_VERSIONS """ + + stub: + """ + touch amrfinderdb.tar + gzip amrfinderdb.tar + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + amrfinderplus: \$(amrfinder --version) + END_VERSIONS + """ } diff --git a/modules/nf-core/amrfinderplus/update/meta.yml b/modules/nf-core/amrfinderplus/update/meta.yml index 30fb7fcd..7a9345d6 100644 --- a/modules/nf-core/amrfinderplus/update/meta.yml +++ b/modules/nf-core/amrfinderplus/update/meta.yml @@ -12,9 +12,7 @@ tools: tool_dev_url: https://github.com/ncbi/amr doi: "10.1038/s41598-021-91456-0" licence: ["Public Domain"] - # this module does have any input. - output: - meta: type: map @@ -29,6 +27,7 @@ output: type: file description: The latest AMRFinder+ database in a compressed tarball pattern: "*.tar.gz" - authors: - "@rpetit3" +maintainers: + - "@rpetit3" diff --git a/modules/nf-core/amrfinderplus/update/tests/main.nf.test b/modules/nf-core/amrfinderplus/update/tests/main.nf.test new file mode 100644 index 00000000..72ff29e6 --- /dev/null +++ b/modules/nf-core/amrfinderplus/update/tests/main.nf.test @@ -0,0 +1,51 @@ +nextflow_process { + + name "Test Process AMRFINDERPLUS_UPDATE" + script "../main.nf" + process "AMRFINDERPLUS_UPDATE" + + tag "modules" + tag "modules_nfcore" + tag "amrfinderplus" + tag "amrfinderplus/update" + + test("amrfinderplus/update") { + + when { + process { + """ + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.db.collect { file(it).getName() } + + process.out.versions + ).match() + } + ) + } + } + + test("amrfinderplus/update - stub") { + + options "-stub" + + when { + process { + """ + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/amrfinderplus/update/tests/main.nf.test.snap b/modules/nf-core/amrfinderplus/update/tests/main.nf.test.snap new file mode 100644 index 00000000..646e134c --- /dev/null +++ b/modules/nf-core/amrfinderplus/update/tests/main.nf.test.snap @@ -0,0 +1,38 @@ +{ + "amrfinderplus/update - stub": { + "content": [ + { + "0": [ + "amrfinderdb.tar.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "1": [ + "versions.yml:md5,785d6824f78d04a40f96ec9c1e02c3a8" + ], + "db": [ + "amrfinderdb.tar.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "versions": [ + "versions.yml:md5,785d6824f78d04a40f96ec9c1e02c3a8" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-27T20:33:49.682876802" + }, + "amrfinderplus/update": { + "content": [ + [ + "amrfinderdb.tar.gz", + "versions.yml:md5,785d6824f78d04a40f96ec9c1e02c3a8" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-27T20:33:40.320979057" + } +} \ No newline at end of file diff --git a/modules/nf-core/amrfinderplus/update/tests/tags.yml b/modules/nf-core/amrfinderplus/update/tests/tags.yml new file mode 100644 index 00000000..bbe0358f --- /dev/null +++ b/modules/nf-core/amrfinderplus/update/tests/tags.yml @@ -0,0 +1,2 @@ +amrfinderplus/update: + - "modules/nf-core/amrfinderplus/update/**" diff --git a/modules/nf-core/antismash/antismashlite/environment.yml b/modules/nf-core/antismash/antismashlite/environment.yml new file mode 100644 index 00000000..227b5264 --- /dev/null +++ b/modules/nf-core/antismash/antismashlite/environment.yml @@ -0,0 +1,7 @@ +name: antismash_antismashlite +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::antismash-lite=7.1.0 diff --git a/modules/nf-core/antismash/antismashlite/main.nf b/modules/nf-core/antismash/antismashlite/main.nf index 1b551e6e..422e7be0 100644 --- a/modules/nf-core/antismash/antismashlite/main.nf +++ b/modules/nf-core/antismash/antismashlite/main.nf @@ -2,16 +2,16 @@ process ANTISMASH_ANTISMASHLITE { tag "$meta.id" label 'process_medium' - conda "bioconda::antismash-lite=6.1.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/antismash-lite:6.1.1--pyhdfd78af_0' : - 'biocontainers/antismash-lite:6.1.1--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/antismash-lite:7.1.0--pyhdfd78af_0' : + 'biocontainers/antismash-lite:7.1.0--pyhdfd78af_0' }" containerOptions { workflow.containerEngine == 'singularity' ? - "-B $antismash_dir:/usr/local/lib/python3.8/site-packages/antismash" : + "-B $antismash_dir:/usr/local/lib/python3.10/site-packages/antismash" : workflow.containerEngine == 'docker' ? - "-v \$PWD/$antismash_dir:/usr/local/lib/python3.8/site-packages/antismash" : + "-v \$PWD/$antismash_dir:/usr/local/lib/python3.10/site-packages/antismash" : '' } @@ -57,6 +57,7 @@ process ANTISMASH_ANTISMASHLITE { $gff_flag \\ -c $task.cpus \\ --output-dir $prefix \\ + --output-basename $prefix \\ --genefinding-tool none \\ --logfile $prefix/${prefix}.log \\ --databases $databases \\ @@ -64,7 +65,33 @@ process ANTISMASH_ANTISMASHLITE { cat <<-END_VERSIONS > versions.yml "${task.process}": - antismash-lite: \$(antismash --version | sed 's/antiSMASH //') + antismash-lite: \$(echo \$(antismash --version) | sed 's/antiSMASH //') + END_VERSIONS + """ + + stub: + prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" + def VERSION = '7.1.0' // WARN: Version information not provided by tool during stub run. Please update this string when bumping container versions. + """ + mkdir -p ${prefix}/css + mkdir ${prefix}/images + mkdir ${prefix}/js + touch ${prefix}/NZ_CP069563.1.region001.gbk + touch ${prefix}/NZ_CP069563.1.region002.gbk + touch ${prefix}/css/bacteria.css + touch ${prefix}/genome.gbk + touch ${prefix}/genome.json + touch ${prefix}/genome.zip + touch ${prefix}/images/about.svg + touch ${prefix}/index.html + touch ${prefix}/js/antismash.js + touch ${prefix}/js/jquery.js + touch ${prefix}/regions.js + touch ${prefix}/test.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + antismash-lite: $VERSION END_VERSIONS """ } diff --git a/modules/nf-core/antismash/antismashlite/meta.yml b/modules/nf-core/antismash/antismashlite/meta.yml index 0ff3fa0a..21f506bd 100644 --- a/modules/nf-core/antismash/antismashlite/meta.yml +++ b/modules/nf-core/antismash/antismashlite/meta.yml @@ -15,7 +15,6 @@ keywords: - eukaryotes - fungi - antismash - tools: - antismashlite: description: "antiSMASH - the antibiotics and Secondary Metabolite Analysis SHell" @@ -23,8 +22,7 @@ tools: documentation: "https://docs.antismash.secondarymetabolites.org" tool_dev_url: "https://github.com/antismash/antismash" doi: "10.1093/nar/gkab335" - licence: "['AGPL v3']" - + licence: ["AGPL v3"] input: - meta: type: map @@ -55,7 +53,6 @@ input: type: file description: Annotations in GFF3 format (only if sequence_input is in FASTA format) pattern: "*.gff" - output: - meta: type: map @@ -130,6 +127,7 @@ output: type: file description: Sideloaded annotations of protoclusters and/or subregions (see antiSMASH documentation "Annotation sideloading") pattern: "regions.js" - authors: - "@jasmezz" +maintainers: + - "@jasmezz" diff --git a/modules/nf-core/antismash/antismashlite/tests/main.nf.test b/modules/nf-core/antismash/antismashlite/tests/main.nf.test new file mode 100644 index 00000000..5ee21d6d --- /dev/null +++ b/modules/nf-core/antismash/antismashlite/tests/main.nf.test @@ -0,0 +1,126 @@ +nextflow_process { + + name "Test Process ANTISMASH_ANTISMASHLITE" + script "../main.nf" + process "ANTISMASH_ANTISMASHLITE" + + tag "modules" + tag "modules_nfcore" + tag "antismash" + tag "antismash/antismashlite" + tag "antismash/antismashlitedownloaddatabases" + tag "gunzip" + tag "untar" + + setup { + run("UNTAR", alias: "UNTAR_CSS") { + script "modules/nf-core/untar/main.nf" + process { + """ + input[0] = [ + [ ], + file('https://github.com/nf-core/test-datasets/raw/59ddeb5929f89ddddaff292d67f9025812762b87/data/delete_me/antismash/css.tar.gz', checkIfExists: true) + ] + """ + } + } + + run("UNTAR", alias: "UNTAR_DETECTION") { + script "modules/nf-core/untar/main.nf" + process { + """ + input[0] = [ + [ ], + file('https://github.com/nf-core/test-datasets/raw/59ddeb5929f89ddddaff292d67f9025812762b87/data/delete_me/antismash/detection.tar.gz', checkIfExists: true) + ] + """ + } + } + + run("UNTAR", alias: "UNTAR_MODULES") { + script "modules/nf-core/untar/main.nf" + process { + """ + input[0] = [ + [ ], + file('https://github.com/nf-core/test-datasets/raw/59ddeb5929f89ddddaff292d67f9025812762b87/data/delete_me/antismash/modules.tar.gz', checkIfExists: true) + ] + """ + } + } + + run("ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES") { + script "modules/nf-core/antismash/antismashlitedownloaddatabases" + process { + """ + input[0] = UNTAR_CSS.out.untar.map{ it[1] } + input[1] = UNTAR_DETECTION.out.untar.map{ it[1] } + input[2] = UNTAR_MODULES.out.untar.map{ it[1] } + """ + } + } + + run("GUNZIP") { + script "modules/nf-core/gunzip/main.nf" + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.gbff.gz', checkIfExists: true) + ] + """ + } + } + } + + test("antismashlite - bacteroides_fragilis - genome") { + + when { + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES.out.database + input[2] = ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES.out.antismash_dir + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.gbk_results.get(0).get(1).get(0)).text.contains("##antiSMASH-Data-START##") }, + { assert snapshot(process.out.html_accessory_files).match("html_accessory_files") }, + { assert path(process.out.gbk_input.get(0).get(1).get(0)).text.contains("##antiSMASH-Data-END##") }, + { assert path(process.out.zip.get(0).get(1)).exists() }, + { assert path(process.out.html.get(0).get(1)).text.contains("https://antismash.secondarymetabolites.org/") }, + { assert path(process.out.json_sideloading.get(0).get(1)).text.contains("\"seq_id\": \"NZ_CP069563.1\"") }, + { assert path(process.out.log.get(0).get(1)).text.contains("antiSMASH status: SUCCESS") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("antismashlite - bacteroides_fragilis - genome - stub") { + + options "-stub" + + when { + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES.out.database + input[2] = ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES.out.antismash_dir + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/antismash/antismashlite/tests/main.nf.test.snap b/modules/nf-core/antismash/antismashlite/tests/main.nf.test.snap new file mode 100644 index 00000000..618b06f9 --- /dev/null +++ b/modules/nf-core/antismash/antismashlite/tests/main.nf.test.snap @@ -0,0 +1,301 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,2a1c54c017741b59c057a05453fc067d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-09T17:06:08.439031477" + }, + "html_accessory_files": { + "content": [ + [ + [ + { + "id": "test" + }, + [ + [ + "bacteria.css:md5,39c0ca9cbc64cb824dc958b26b5b4ab8" + ], + [ + "about.svg:md5,2573f954dd506e2d0878daed04f5420a", + "bacteria_about.png:md5,99cdc2aa09aee37553b10ca86b172170", + "bacteria_antismash_icon.svg:md5,23a265b0e1cf293a4743fe13030b636f", + "bacteria_antismash_logo.svg:md5,f80f639969ee6506571ffda2e197df93", + "bacteria_antismash_white.svg:md5,2c9da15cc168d8f796269d037b5e7f60", + "bacteria_download.png:md5,c3428df1cf17cb97e2897ca6daa93d48", + "bacteria_help.png:md5,359b68f90c73208eb389759c0f5c1091", + "bacteria_home.png:md5,6595d97ee49d251fe038207f82012eff", + "bacteria_logo.png:md5,013f84d6dd93cde96f07084ff63d855c", + "contact.svg:md5,53b878c2af4f8a80a647ac30f61e6bf6", + "download.svg:md5,722038156f4ece46747cbf6908501974", + "expand-arrows-alt-solid.svg:md5,21b37749f54320135a455ed266a7fc3a", + "external-link-alt-solid.svg:md5,ca337694c74e57f73d15ca9db30081ba", + "fungi_about.png:md5,4d55bf14df0340dca01a286487fa8448", + "fungi_antismash_icon.svg:md5,2acc19cc91d5d7285a72f0b3912e108a", + "fungi_antismash_icon_white.svg:md5,961f1c41e25036a625f115f209a961c7", + "fungi_antismash_logo.svg:md5,36560983a36f46786c98a05125b15724", + "fungi_download.png:md5,782580852674aab0b69b2b94a94c7615", + "fungi_help.png:md5,0ac06748f3177d150ab90997117c4f64", + "fungi_home.png:md5,880071898062d6dafe989ac73bb7bbea", + "fungi_logo.png:md5,29294392a3953fd1ba12d1a39cebaeeb", + "help.svg:md5,e7565a3cd74893422f2886a0af748df2", + "mail.png:md5,049f51233b29663e4e4e4c8097c2d096", + "minus-circle.svg:md5,b523305570d06b6e34cd7099bed22015", + "nostructure_icon.png:md5,fc982a5b84a1a99db607731625a87f88", + "plant_antismash_icon.svg:md5,e031de9570ef2809e52502481a5e77ea", + "plant_antismash_icon_white.svg:md5,10d25996b023dbdaed4a382471ab4877", + "plus-circle.svg:md5,cba2cdd9ef893274f572228b354718cf", + "question-circle-solid.svg:md5,6dbc83547e29ecedc7f2a5b81354353b", + "search-solid.svg:md5,aeab848c26357f3d120f3e58f1efa8f5" + ], + [ + "antismash.js:md5,c90571fe2580fd4feff9a37314f1fe6b", + "jquery.js:md5,397754ba49e9e0cf4e7c190da78dda05", + "jquery.tablesorter.min.js:md5,5e9e08cef4d1be0eaa538e6eb28809a7" + ] + ] + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-09T17:06:08.392236617" + }, + "antismashlite - bacteroides_fragilis - genome - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + [ + [ + "bacteria.css:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + "about.svg:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + "antismash.js:md5,d41d8cd98f00b204e9800998ecf8427e", + "jquery.js:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ] + ], + "10": [ + [ + { + "id": "test" + }, + "genome.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "11": [ + [ + { + "id": "test" + }, + [ + "NZ_CP069563.1.region001.gbk:md5,d41d8cd98f00b204e9800998ecf8427e", + "NZ_CP069563.1.region002.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "12": [ + + ], + "13": [ + [ + { + "id": "test" + }, + "index.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "14": [ + + ], + "15": [ + [ + { + "id": "test" + }, + "regions.js:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "16": [ + "versions.yml:md5,2a1c54c017741b59c057a05453fc067d" + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + [ + { + "id": "test" + }, + [ + "NZ_CP069563.1.region001.gbk:md5,d41d8cd98f00b204e9800998ecf8427e", + "NZ_CP069563.1.region002.gbk:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "8": [ + [ + { + "id": "test" + }, + "genome.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "clusterblast_file": [ + + ], + "clusterblastoutput": [ + + ], + "gbk_input": [ + [ + { + "id": "test" + }, + [ + "NZ_CP069563.1.region001.gbk:md5,d41d8cd98f00b204e9800998ecf8427e", + "NZ_CP069563.1.region002.gbk:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "gbk_results": [ + [ + { + "id": "test" + }, + [ + "NZ_CP069563.1.region001.gbk:md5,d41d8cd98f00b204e9800998ecf8427e", + "NZ_CP069563.1.region002.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "html": [ + [ + { + "id": "test" + }, + "index.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "html_accessory_files": [ + [ + { + "id": "test" + }, + [ + [ + "bacteria.css:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + "about.svg:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + "antismash.js:md5,d41d8cd98f00b204e9800998ecf8427e", + "jquery.js:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ] + ], + "json_results": [ + [ + { + "id": "test" + }, + "genome.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json_sideloading": [ + [ + { + "id": "test" + }, + "regions.js:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "knownclusterblast_dir": [ + + ], + "knownclusterblast_html": [ + + ], + "knownclusterblast_txt": [ + + ], + "knownclusterblastoutput": [ + + ], + "log": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "svg_files_clusterblast": [ + + ], + "svg_files_knownclusterblast": [ + + ], + "versions": [ + "versions.yml:md5,2a1c54c017741b59c057a05453fc067d" + ], + "zip": [ + [ + { + "id": "test" + }, + "genome.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-11T16:35:51.079804" + } +} \ No newline at end of file diff --git a/modules/nf-core/antismash/antismashlite/tests/tags.yml b/modules/nf-core/antismash/antismashlite/tests/tags.yml new file mode 100644 index 00000000..020b39d6 --- /dev/null +++ b/modules/nf-core/antismash/antismashlite/tests/tags.yml @@ -0,0 +1,2 @@ +antismash/antismashlite: + - "modules/nf-core/antismash/antismashlite/**" diff --git a/modules/nf-core/antismash/antismashlitedownloaddatabases/environment.yml b/modules/nf-core/antismash/antismashlitedownloaddatabases/environment.yml new file mode 100644 index 00000000..b9323a93 --- /dev/null +++ b/modules/nf-core/antismash/antismashlitedownloaddatabases/environment.yml @@ -0,0 +1,7 @@ +name: antismash_antismashlitedownloaddatabases +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::antismash-lite=7.1.0 diff --git a/modules/nf-core/antismash/antismashlitedownloaddatabases/main.nf b/modules/nf-core/antismash/antismashlitedownloaddatabases/main.nf index 817db0c2..e63f20d2 100644 --- a/modules/nf-core/antismash/antismashlitedownloaddatabases/main.nf +++ b/modules/nf-core/antismash/antismashlitedownloaddatabases/main.nf @@ -1,10 +1,10 @@ process ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES { label 'process_single' - conda "bioconda::antismash-lite=6.1.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/antismash-lite:6.1.1--pyhdfd78af_0' : - 'biocontainers/antismash-lite:6.1.1--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/antismash-lite:7.1.0--pyhdfd78af_0' : + 'biocontainers/antismash-lite:7.1.0--pyhdfd78af_0' }" /* These files are normally downloaded/created by download-antismash-databases itself, and must be retrieved for input by manually running the command with conda or a standalone installation of antiSMASH. Therefore we do not recommend using this module for production pipelines, but rather require users to specify their own local copy of the antiSMASH database in pipelines. This is solely for use for CI tests of the nf-core/module version of antiSMASH. @@ -14,9 +14,9 @@ process ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES { containerOptions { workflow.containerEngine == 'singularity' ? - "-B $database_css:/usr/local/lib/python3.8/site-packages/antismash/outputs/html/css,$database_detection:/usr/local/lib/python3.8/site-packages/antismash/detection,$database_modules:/usr/local/lib/python3.8/site-packages/antismash/modules" : + "-B $database_css:/usr/local/lib/python3.10/site-packages/antismash/outputs/html/css,$database_detection:/usr/local/lib/python3.10/site-packages/antismash/detection,$database_modules:/usr/local/lib/python3.10/site-packages/antismash/modules" : workflow.containerEngine == 'docker' ? - "-v \$PWD/$database_css:/usr/local/lib/python3.8/site-packages/antismash/outputs/html/css -v \$PWD/$database_detection:/usr/local/lib/python3.8/site-packages/antismash/detection -v \$PWD/$database_modules:/usr/local/lib/python3.8/site-packages/antismash/modules" : + "-v \$PWD/$database_css:/usr/local/lib/python3.10/site-packages/antismash/outputs/html/css -v \$PWD/$database_detection:/usr/local/lib/python3.10/site-packages/antismash/detection -v \$PWD/$database_modules:/usr/local/lib/python3.10/site-packages/antismash/modules" : '' } @@ -35,7 +35,7 @@ process ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES { script: def args = task.ext.args ?: '' - cp_cmd = ( session.config.conda && session.config.conda.enabled ) ? "cp -r \$(python -c 'import antismash;print(antismash.__file__.split(\"/__\")[0])') antismash_dir;" : "cp -r /usr/local/lib/python3.8/site-packages/antismash antismash_dir;" + cp_cmd = ( session.config.conda && session.config.conda.enabled ) ? "cp -r \$(python -c 'import antismash;print(antismash.__file__.split(\"/__\")[0])') antismash_dir;" : "cp -r /usr/local/lib/python3.10/site-packages/antismash antismash_dir;" """ download-antismash-databases \\ --database-dir antismash_db \\ @@ -51,7 +51,8 @@ process ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES { stub: def args = task.ext.args ?: '' - cp_cmd = (session.config.conda && session.config.conda.enabled ) ? "cp -r \$(python -c 'import antismash;print(antismash.__file__.split(\"/__\")[0])') antismash_dir;" : "cp -r /usr/local/lib/python3.8/site-packages/antismash antismash_dir;" + cp_cmd = (session.config.conda && session.config.conda.enabled ) ? "cp -r \$(python -c 'import antismash;print(antismash.__file__.split(\"/__\")[0])') antismash_dir;" : "cp -r /usr/local/lib/python3.10/site-packages/antismash antismash_dir;" + def VERSION = '7.1.0' // WARN: Version information not provided by tool during stub run. Please update this string when bumping container versions. """ echo "download-antismash-databases --database-dir antismash_db $args" @@ -62,7 +63,7 @@ process ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES { cat <<-END_VERSIONS > versions.yml "${task.process}": - antismash-lite: \$(antismash --version | sed 's/antiSMASH //') + antismash-lite: $VERSION END_VERSIONS """ } diff --git a/modules/nf-core/antismash/antismashlitedownloaddatabases/meta.yml b/modules/nf-core/antismash/antismashlitedownloaddatabases/meta.yml index 69e7261e..010c6267 100644 --- a/modules/nf-core/antismash/antismashlitedownloaddatabases/meta.yml +++ b/modules/nf-core/antismash/antismashlitedownloaddatabases/meta.yml @@ -22,7 +22,6 @@ tools: tool_dev_url: https://github.com/antismash/antismash doi: "10.1093/nar/gkab335" licence: ["AGPL v3"] - input: - database_css: type: directory @@ -39,13 +38,11 @@ input: description: | antismash/modules folder which is being created during the antiSMASH database downloading step. These files are normally downloaded by download-antismash-databases itself, and must be retrieved by the user by manually running the command with conda or a standalone installation of antiSMASH. Therefore we do not recommend using this module for production pipelines, but rather require users to specify their own local copy of the antiSMASH database in pipelines. pattern: "modules" - output: - versions: type: file description: File containing software versions pattern: "versions.yml" - - database: type: directory description: Download directory for antiSMASH databases @@ -55,6 +52,7 @@ output: description: | antismash installation folder which is being modified during the antiSMASH database downloading step. The modified files are normally downloaded by download-antismash-databases itself, and must be retrieved by the user by manually running the command with conda or a standalone installation of antiSMASH. Therefore we do not recommend using this module for production pipelines, but rather require users to specify their own local copy of the antiSMASH database and installation folder in pipelines. pattern: "antismash_dir" - authors: - "@jasmezz" +maintainers: + - "@jasmezz" diff --git a/modules/nf-core/antismash/antismashlitedownloaddatabases/tests/main.nf.test b/modules/nf-core/antismash/antismashlitedownloaddatabases/tests/main.nf.test new file mode 100644 index 00000000..55f5f2f5 --- /dev/null +++ b/modules/nf-core/antismash/antismashlitedownloaddatabases/tests/main.nf.test @@ -0,0 +1,135 @@ +nextflow_process { + + name "Test Process ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES" + script "../main.nf" + process "ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES" + + tag "modules" + tag "modules_nfcore" + tag "antismash" + tag "antismash/antismashlitedownloaddatabases" + tag "untar" + + test("antiSMASH-lite downloaddatabases") { + + setup { + + run("UNTAR", alias: "UNTAR_CSS") { + script "modules/nf-core/untar/main.nf" + process { + """ + input[0] = [ + [ ], + file('https://github.com/nf-core/test-datasets/raw/59ddeb5929f89ddddaff292d67f9025812762b87/data/delete_me/antismash/css.tar.gz', checkIfExists: true) + ] + """ + } + } + + run("UNTAR", alias: "UNTAR_DETECTION") { + script "modules/nf-core/untar/main.nf" + process { + """ + input[0] = [ + [ ], + file('https://github.com/nf-core/test-datasets/raw/59ddeb5929f89ddddaff292d67f9025812762b87/data/delete_me/antismash/detection.tar.gz', checkIfExists: true) + ] + """ + } + } + + run("UNTAR", alias: "UNTAR_MODULES") { + script "modules/nf-core/untar/main.nf" + process { + """ + input[0] = [ + [ ], + file('https://github.com/nf-core/test-datasets/raw/59ddeb5929f89ddddaff292d67f9025812762b87/data/delete_me/antismash/modules.tar.gz', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = UNTAR_CSS.out.untar.map{ it[1] } + input[1] = UNTAR_DETECTION.out.untar.map{ it[1] } + input[2] = UNTAR_MODULES.out.untar.map{ it[1] } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot ( + file(process.out.database.get(0)).list().sort(), + process.out.versions, + ).match() } + ) + } + } + + test("antiSMASH-lite downloaddatabases - stub") { + + options "-stub" + + setup { + + run("UNTAR", alias: "UNTAR_CSS") { + script "modules/nf-core/untar/main.nf" + process { + """ + input[0] = [ + [ ], + file('https://github.com/nf-core/test-datasets/raw/59ddeb5929f89ddddaff292d67f9025812762b87/data/delete_me/antismash/css.tar.gz', checkIfExists: true) + ] + """ + } + } + + run("UNTAR", alias: "UNTAR_DETECTION") { + script "modules/nf-core/untar/main.nf" + process { + """ + input[0] = [ + [ ], + file('https://github.com/nf-core/test-datasets/raw/59ddeb5929f89ddddaff292d67f9025812762b87/data/delete_me/antismash/detection.tar.gz', checkIfExists: true) + ] + """ + } + } + + run("UNTAR", alias: "UNTAR_MODULES") { + script "modules/nf-core/untar/main.nf" + process { + """ + input[0] = [ + [ ], + file('https://github.com/nf-core/test-datasets/raw/59ddeb5929f89ddddaff292d67f9025812762b87/data/delete_me/antismash/modules.tar.gz', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = UNTAR_CSS.out.untar.map{ it[1] } + input[1] = UNTAR_DETECTION.out.untar.map{ it[1] } + input[2] = UNTAR_MODULES.out.untar.map{ it[1] } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/antismash/antismashlitedownloaddatabases/tests/main.nf.test.snap b/modules/nf-core/antismash/antismashlitedownloaddatabases/tests/main.nf.test.snap new file mode 100644 index 00000000..21ee9d41 --- /dev/null +++ b/modules/nf-core/antismash/antismashlitedownloaddatabases/tests/main.nf.test.snap @@ -0,0 +1,62 @@ +{ + "antiSMASH-lite downloaddatabases - stub": { + "content": [ + { + "0": [ + [ + + ] + ], + "1": [ + [ + + ] + ], + "2": [ + "versions.yml:md5,9eccc775a12d25ca5dfe334e8874f12a" + ], + "antismash_dir": [ + [ + + ] + ], + "database": [ + [ + + ] + ], + "versions": [ + "versions.yml:md5,9eccc775a12d25ca5dfe334e8874f12a" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-12T13:41:29.456143" + }, + "antiSMASH-lite downloaddatabases": { + "content": [ + [ + "as-js", + "clusterblast", + "clustercompare", + "comparippson", + "knownclusterblast", + "nrps_pks", + "pfam", + "resfam", + "tigrfam" + ], + [ + "versions.yml:md5,9eccc775a12d25ca5dfe334e8874f12a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-12T13:41:08.116244" + } +} \ No newline at end of file diff --git a/modules/nf-core/antismash/antismashlitedownloaddatabases/tests/tags.yml b/modules/nf-core/antismash/antismashlitedownloaddatabases/tests/tags.yml new file mode 100644 index 00000000..1b01466e --- /dev/null +++ b/modules/nf-core/antismash/antismashlitedownloaddatabases/tests/tags.yml @@ -0,0 +1,2 @@ +antismash/antismashlitedownloaddatabases: + - "modules/nf-core/antismash/antismashlitedownloaddatabases/**" diff --git a/modules/nf-core/argnorm/environment.yml b/modules/nf-core/argnorm/environment.yml new file mode 100644 index 00000000..771b87c9 --- /dev/null +++ b/modules/nf-core/argnorm/environment.yml @@ -0,0 +1,7 @@ +name: "argnorm" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::argnorm=0.5.0" diff --git a/modules/nf-core/argnorm/main.nf b/modules/nf-core/argnorm/main.nf new file mode 100644 index 00000000..5ff5e8a5 --- /dev/null +++ b/modules/nf-core/argnorm/main.nf @@ -0,0 +1,68 @@ +process ARGNORM { + tag "$meta.id" + label 'process_low' + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/argnorm:0.5.0--pyhdfd78af_0': + 'biocontainers/argnorm:0.5.0--pyhdfd78af_0' }" + + input: + tuple val(meta), path(input_tsv) + val tool + val db + + output: + tuple val(meta), path("*.tsv"), emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '0.5.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def db_args = db ? "--db ${db}" : "" + if (!tool) { + error 'Tool not provided.' + } + if ((tool in ["abricate"]) && !db) { + error "$tool requires a database but not provided." + } + + """ + argnorm \\ + $tool \\ + -i $input_tsv \\ + -o $prefix \\ + $db_args \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + argnorm: $VERSION + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '0.5.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + if (!tool) { + error 'Tool not provided.' + } + if ((tool in ["abricate"]) && !db) { + error "$tool requires a database but not provided." + } + + """ + touch ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + argnorm: $VERSION + END_VERSIONS + """ + +} diff --git a/modules/nf-core/argnorm/meta.yml b/modules/nf-core/argnorm/meta.yml new file mode 100644 index 00000000..a977e863 --- /dev/null +++ b/modules/nf-core/argnorm/meta.yml @@ -0,0 +1,60 @@ +name: "argnorm" +description: Normalize antibiotic resistance genes (ARGs) using the ARO ontology (developed by CARD). +keywords: + - amr + - antimicrobial resistance + - arg + - antimicrobial resistance genes + - genomics + - metagenomics + - normalization + - drug categorization +tools: + - "argnorm": + description: "Normalize antibiotic resistance genes (ARGs) using the ARO ontology (developed by CARD)." + homepage: "https://argnorm.readthedocs.io/en/latest/" + documentation: "https://argnorm.readthedocs.io/en/latest/" + tool_dev_url: "https://github.com/BigDataBiology/argNorm" + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - input_tsv: + type: file + description: ARG annotation output + pattern: "*.tsv" + + - tool: + type: string + description: ARG annotation tool used + pattern: "argsoap|abricate|deeparg|resfinder|amrfinderplus" + + - db: + type: string + description: Database used for ARG annotation + pattern: "sarg|ncbi|resfinder|deeparg|megares|argannot|resfinderfg" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - tsv: + type: file + description: Normalized argNorm output + pattern: "*.tsv" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@Vedanth-Ramji" +maintainers: + - "@Vedanth-Ramji" diff --git a/modules/nf-core/argnorm/tests/argnorm_hamronized.config b/modules/nf-core/argnorm/tests/argnorm_hamronized.config new file mode 100644 index 00000000..68748018 --- /dev/null +++ b/modules/nf-core/argnorm/tests/argnorm_hamronized.config @@ -0,0 +1,5 @@ +process { + withName: ARGNORM { + ext.args = '--hamronized' + } +} diff --git a/modules/nf-core/argnorm/tests/argnorm_raw.config b/modules/nf-core/argnorm/tests/argnorm_raw.config new file mode 100644 index 00000000..dffa3c48 --- /dev/null +++ b/modules/nf-core/argnorm/tests/argnorm_raw.config @@ -0,0 +1,5 @@ +process { + withName: ARGNORM { + ext.args = '' + } +} diff --git a/modules/nf-core/argnorm/tests/main.nf.test b/modules/nf-core/argnorm/tests/main.nf.test new file mode 100644 index 00000000..e68c2151 --- /dev/null +++ b/modules/nf-core/argnorm/tests/main.nf.test @@ -0,0 +1,155 @@ +nextflow_process { + name "Test Process ARGNORM" + script "../main.nf" + process "ARGNORM" + + tag "modules" + tag "modules_nfcore" + tag "argnorm" + + test("argnorm - amrfinderplus_ncbi_raw - tsv") { + config './argnorm_raw.config' + + when { + process { + """ + input[0] = [ + [ id:'argnorm_raw.tsv' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/amrfinderplus/test_output.tsv", checkIfExists: true) + ] + input[1] = 'amrfinderplus' + input[2] = 'ncbi' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("argnorm - amrfinderplus_ncbi_hamronized - tsv") { + config './argnorm_hamronized.config' + + when { + process { + """ + input[0] = [ + [ id:'argnorm_hamronized.tsv' ], // meta map + file("https://raw.githubusercontent.com/BigDataBiology/argNorm/main/examples/hamronized/amrfinderplus.ncbi.orfs.tsv", checkIfExists: true) + ] + input[1] = 'amrfinderplus' + input[2] = 'ncbi' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("argnorm - missing tool") { + + when { + process { + """ + input[0] = [ + [ id:'argnorm_raw.tsv' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/amrfinderplus/test_output.tsv", checkIfExists: true) + ] + input[1] = [] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("Tool not provided") } + ) + } + } + + test("argnorm - missing db") { + + when { + process { + """ + input[0] = [ + [ id:'argnorm_raw.tsv' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/amrfinderplus/test_output.tsv", checkIfExists: true) + ] + input[1] = "abricate" + input[2] = [] + """ + } + } + + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("abricate requires a database but not provided.") } + ) + } + } + + test("argnorm - amrfinderplus_ncbi_hamronized - tsv - stub") { + options "-stub" + config './argnorm_hamronized.config' + + when { + process { + """ + input[0] = [ + [ id:'argnorm_hamronized_stub.tsv' ], // meta map + file("https://raw.githubusercontent.com/BigDataBiology/argNorm/main/examples/hamronized/amrfinderplus.ncbi.orfs.tsv", checkIfExists: true) + ] + input[1] = 'amrfinderplus' + input[2] = 'ncbi' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("argnorm - amrfinderplus_ncbi - tsv - stub") { + + options "-stub" + config './argnorm_raw.config' + + when { + process { + """ + input[0] = [ + [ id:'argnorm_raw_stub.tsv' ], // meta map + file("https://raw.githubusercontent.com/BigDataBiology/argNorm/main/examples/raw/amrfinderplus.ncbi.orfs.tsv", checkIfExists: true) + ] + input[1] = 'amrfinderplus' + input[2] = 'ncbi' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/argnorm/tests/main.nf.test.snap b/modules/nf-core/argnorm/tests/main.nf.test.snap new file mode 100644 index 00000000..4bed36ba --- /dev/null +++ b/modules/nf-core/argnorm/tests/main.nf.test.snap @@ -0,0 +1,134 @@ +{ + "argnorm - amrfinderplus_ncbi_raw - tsv": { + "content": [ + { + "0": [ + [ + { + "id": "argnorm_raw.tsv" + }, + "argnorm_raw.tsv:md5,f870c239182592a065d9f80732b39bba" + ] + ], + "1": [ + "versions.yml:md5,e200075d98a6f59137f105efceea0426" + ], + "tsv": [ + [ + { + "id": "argnorm_raw.tsv" + }, + "argnorm_raw.tsv:md5,f870c239182592a065d9f80732b39bba" + ] + ], + "versions": [ + "versions.yml:md5,e200075d98a6f59137f105efceea0426" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T17:46:00.195868976" + }, + "argnorm - amrfinderplus_ncbi_hamronized - tsv": { + "content": [ + { + "0": [ + [ + { + "id": "argnorm_hamronized.tsv" + }, + "argnorm_hamronized.tsv:md5,1f9a3820f09fd6a818af372dfe5cf322" + ] + ], + "1": [ + "versions.yml:md5,e200075d98a6f59137f105efceea0426" + ], + "tsv": [ + [ + { + "id": "argnorm_hamronized.tsv" + }, + "argnorm_hamronized.tsv:md5,1f9a3820f09fd6a818af372dfe5cf322" + ] + ], + "versions": [ + "versions.yml:md5,e200075d98a6f59137f105efceea0426" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T17:46:31.856263885" + }, + "argnorm - amrfinderplus_ncbi_hamronized - tsv - stub": { + "content": [ + { + "0": [ + [ + { + "id": "argnorm_hamronized_stub.tsv" + }, + "argnorm_hamronized_stub.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,e200075d98a6f59137f105efceea0426" + ], + "tsv": [ + [ + { + "id": "argnorm_hamronized_stub.tsv" + }, + "argnorm_hamronized_stub.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e200075d98a6f59137f105efceea0426" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T17:47:03.088627445" + }, + "argnorm - amrfinderplus_ncbi - tsv - stub": { + "content": [ + { + "0": [ + [ + { + "id": "argnorm_raw_stub.tsv" + }, + "argnorm_raw_stub.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,e200075d98a6f59137f105efceea0426" + ], + "tsv": [ + [ + { + "id": "argnorm_raw_stub.tsv" + }, + "argnorm_raw_stub.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e200075d98a6f59137f105efceea0426" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T17:47:34.346622776" + } +} diff --git a/modules/nf-core/argnorm/tests/tags.yml b/modules/nf-core/argnorm/tests/tags.yml new file mode 100644 index 00000000..a2b6e8d0 --- /dev/null +++ b/modules/nf-core/argnorm/tests/tags.yml @@ -0,0 +1,2 @@ +argnorm: + - "modules/nf-core/argnorm/**" diff --git a/modules/nf-core/bakta/bakta/tests/main.nf.test b/modules/nf-core/bakta/bakta/tests/main.nf.test new file mode 100644 index 00000000..3c1f8f82 --- /dev/null +++ b/modules/nf-core/bakta/bakta/tests/main.nf.test @@ -0,0 +1,83 @@ +nextflow_process { + + name "Test Process BAKTA_BAKTA" + script "../main.nf" + config "./nextflow.config" + process "BAKTA_BAKTA" + + tag "modules" + tag "modules_nfcore" + tag "bakta" + tag "bakta/bakta" + tag "bakta/baktadbdownload" + + test("Bakta - bacteroides_fragilis - genome.fasta") { + + setup { + run("BAKTA_BAKTADBDOWNLOAD") { + script "../../baktadbdownload/main.nf" + process { + """ + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/illumina/fasta/test1.contigs.fa.gz', checkIfExists: true) + ] + input[1] = BAKTA_BAKTADBDOWNLOAD.out.db + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.embl.get(0).get(1)).text.contains("/translation=\"MKNTLKIAILLIAIISMGHWMPVKQVCDLNSLSLQNVEALANGET") }, + { assert path(process.out.faa.get(0).get(1)).text.contains("MKNTLKIAILLIAIISMGHWMPVKQVCDLNSLSLQNVEALANGETPNYTFCIGAGSVDCPIQHDKVKYVSQGFSLDY") }, + { assert path(process.out.ffn.get(0).get(1)).text.contains("ATGAAAAACACTTTAAAAATAGCTATTCTTCTTATTGCTATTATTTCTATGGGGCATTGGATGCCTGTAAAACAAGT") }, + { assert path(process.out.fna.get(0).get(1)).text.contains("TCTTTTTACTCATAATCTACTTTTATGATGTTAATTATTTTTTCCGTGTCTCTCTTTCGG") }, + { assert path(process.out.gbff.get(0).get(1)).text.contains("/translation=\"MKNTLKIAILLIAIISMGHWMPVKQVCDLNSLSLQNVEALANGET") }, + { assert path(process.out.gff.get(0).get(1)).text.contains("##sequence-region contig_1 1 2926") }, + { assert path(process.out.hypotheticals_tsv.get(0).get(1)).text.contains("#Annotated with Bakta") }, + { assert path(process.out.hypotheticals_faa.get(0).get(1)).text.contains("MKNLILVLGCFFFLISCQQTEKEKLEELVKNWNGKEVLL") }, + { assert path(process.out.tsv.get(0).get(1)).text.contains("SO:0001217, UniRef:UniRef50_A0A0I9S7A3") }, + { assert path(process.out.txt.get(0).get(1)).text.contains("Length: 1739120") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("Bakta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [[id: 'stub'],file('stub')] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bakta/bakta/tests/main.nf.test.snap b/modules/nf-core/bakta/bakta/tests/main.nf.test.snap new file mode 100644 index 00000000..40e30c36 --- /dev/null +++ b/modules/nf-core/bakta/bakta/tests/main.nf.test.snap @@ -0,0 +1,191 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,f8b70ceb2a328c25a190699384e6152d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T09:11:06.657602394" + }, + "Bakta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "stub" + }, + "stub.embl:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "stub" + }, + "stub.faa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + "versions.yml:md5,f8b70ceb2a328c25a190699384e6152d" + ], + "2": [ + [ + { + "id": "stub" + }, + "stub.ffn:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "stub" + }, + "stub.fna:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "stub" + }, + "stub.gbff:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "stub" + }, + "stub.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "stub" + }, + "stub.hypotheticals.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "stub" + }, + "stub.hypotheticals.faa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "stub" + }, + "stub.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "stub" + }, + "stub.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "embl": [ + [ + { + "id": "stub" + }, + "stub.embl:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "faa": [ + [ + { + "id": "stub" + }, + "stub.faa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "ffn": [ + [ + { + "id": "stub" + }, + "stub.ffn:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fna": [ + [ + { + "id": "stub" + }, + "stub.fna:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gbff": [ + [ + { + "id": "stub" + }, + "stub.gbff:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gff": [ + [ + { + "id": "stub" + }, + "stub.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "hypotheticals_faa": [ + [ + { + "id": "stub" + }, + "stub.hypotheticals.faa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "hypotheticals_tsv": [ + [ + { + "id": "stub" + }, + "stub.hypotheticals.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tsv": [ + [ + { + "id": "stub" + }, + "stub.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "txt": [ + [ + { + "id": "stub" + }, + "stub.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,f8b70ceb2a328c25a190699384e6152d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-14T09:11:15.532858932" + } +} \ No newline at end of file diff --git a/modules/nf-core/bakta/bakta/tests/nextflow.config b/modules/nf-core/bakta/bakta/tests/nextflow.config new file mode 100644 index 00000000..9af0dde1 --- /dev/null +++ b/modules/nf-core/bakta/bakta/tests/nextflow.config @@ -0,0 +1,11 @@ +process { + + withName: 'BAKTA_BAKTADBDOWNLOAD' { + ext.args = "--type light" + } + + withName: 'BAKTA_BAKTA' { + memory = 7.GB + } + +} diff --git a/modules/nf-core/bakta/bakta/tests/tags.yml b/modules/nf-core/bakta/bakta/tests/tags.yml new file mode 100644 index 00000000..ecb08c45 --- /dev/null +++ b/modules/nf-core/bakta/bakta/tests/tags.yml @@ -0,0 +1,2 @@ +bakta/bakta: + - "modules/nf-core/bakta/bakta/**" diff --git a/modules/nf-core/bakta/baktadbdownload/tests/main.nf.test b/modules/nf-core/bakta/baktadbdownload/tests/main.nf.test new file mode 100644 index 00000000..a5f827f9 --- /dev/null +++ b/modules/nf-core/bakta/baktadbdownload/tests/main.nf.test @@ -0,0 +1,55 @@ +nextflow_process { + + name "Test Process BAKTA_BAKTADBDOWNLOAD" + script "../main.nf" + process "BAKTA_BAKTADBDOWNLOAD" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "bakta" + tag "bakta/baktadbdownload" + + test("Bakta database download") { + + when { + process { + """ + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.db.get(0)).exists() }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + + test("Bakta database download - stub") { + + options "-stub" + + when { + process { + """ + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.db + + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bakta/baktadbdownload/tests/main.nf.test.snap b/modules/nf-core/bakta/baktadbdownload/tests/main.nf.test.snap new file mode 100644 index 00000000..b1c82267 --- /dev/null +++ b/modules/nf-core/bakta/baktadbdownload/tests/main.nf.test.snap @@ -0,0 +1,29 @@ +{ + "Bakta database download": { + "content": [ + [ + "versions.yml:md5,df9b091b08a41b7d5eef95727b7eac29" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-19T11:34:41.812416438" + }, + "Bakta database download - stub": { + "content": [ + [ + [ + + ], + "versions.yml:md5,df9b091b08a41b7d5eef95727b7eac29" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-19T11:35:01.082923401" + } +} \ No newline at end of file diff --git a/modules/nf-core/bakta/baktadbdownload/tests/nextflow.config b/modules/nf-core/bakta/baktadbdownload/tests/nextflow.config new file mode 100644 index 00000000..8b99646a --- /dev/null +++ b/modules/nf-core/bakta/baktadbdownload/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: 'BAKTA_BAKTADBDOWNLOAD' { + ext.args = "--type light" + } + +} diff --git a/modules/nf-core/bakta/baktadbdownload/tests/tags.yml b/modules/nf-core/bakta/baktadbdownload/tests/tags.yml new file mode 100644 index 00000000..c469fa48 --- /dev/null +++ b/modules/nf-core/bakta/baktadbdownload/tests/tags.yml @@ -0,0 +1,2 @@ +bakta/baktadbdownload: + - "modules/nf-core/bakta/baktadbdownload/**" diff --git a/modules/nf-core/bioawk/bioawk.diff b/modules/nf-core/bioawk/bioawk.diff deleted file mode 100644 index 54255a5c..00000000 --- a/modules/nf-core/bioawk/bioawk.diff +++ /dev/null @@ -1,24 +0,0 @@ -Changes in module 'nf-core/bioawk' ---- modules/nf-core/bioawk/main.nf -+++ modules/nf-core/bioawk/main.nf -@@ -12,7 +12,8 @@ - - output: - tuple val(meta), path("*.gz"), emit: output -- path "versions.yml" , emit: versions -+ tuple val(meta), env(LONGEST), emit: longest -+ path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when -@@ -30,6 +31,8 @@ - - gzip ${prefix} - -+ LONGEST=\$(zcat ${prefix}.gz | grep -v '>' | sort -n | tail -n 1) -+ - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bioawk: $VERSION - -************************************************************ diff --git a/modules/nf-core/bioawk/main.nf b/modules/nf-core/bioawk/main.nf deleted file mode 100644 index 02f0bbdd..00000000 --- a/modules/nf-core/bioawk/main.nf +++ /dev/null @@ -1,41 +0,0 @@ -process BIOAWK { - tag "$meta.id" - label 'process_single' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bioawk:1.0--h5bf99c6_6': - 'biocontainers/bioawk:1.0--h5bf99c6_6' }" - - input: - tuple val(meta), path(input) - - output: - tuple val(meta), path("*.gz"), emit: output - tuple val(meta), env(LONGEST), emit: longest - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' // args is used for the main arguments of the tool - prefix = task.ext.prefix ?: "${meta.id}" - if ("${input}" == "${prefix}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - def VERSION = '1.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - """ - bioawk \\ - $args \\ - $input \\ - > ${prefix} - - gzip ${prefix} - - LONGEST=\$(zcat ${prefix}.gz | grep -v '>' | sort -n | tail -n 1) - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bioawk: $VERSION - END_VERSIONS - """ -} diff --git a/modules/nf-core/bioawk/meta.yml b/modules/nf-core/bioawk/meta.yml deleted file mode 100644 index c9d00111..00000000 --- a/modules/nf-core/bioawk/meta.yml +++ /dev/null @@ -1,46 +0,0 @@ -name: "bioawk" -description: Bioawk is an extension to Brian Kernighan's awk, adding the support of several common biological data formats. -keywords: - - bioawk - - fastq - - fasta - - sam - - file manipulation - - awk -tools: - - "bioawk": - description: "BWK awk modified for biological data" - homepage: "https://github.com/lh3/bioawk" - documentation: "https://github.com/lh3/bioawk" - tool_dev_url: "https://github.com/lh3/bioawk" - licence: "['Free software license (https://github.com/lh3/bioawk/blob/master/README.awk#L1)']" -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input: - type: file - description: Input sequence biological sequence file (optionally gzipped) to be manipulated via program specified in `$args`. - pattern: "*.{bed,gff,sam,vcf,fastq,fasta,tab,bed.gz,gff.gz,sam.gz,vcf.gz,fastq.gz,fasta.gz,tab.gz}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - output: - type: file - description: | - Manipulated and gzipped version of input sequence file following program specified in `args`. - File name will be what is specified in `$prefix`. Do not include `.gz` suffix in `$prefix`! Output files` will be gzipped for you! - pattern: "*.gz" -authors: - - "@jfy133" -maintainers: - - "@jfy133" diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf deleted file mode 100644 index c9d014b1..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ /dev/null @@ -1,24 +0,0 @@ -process CUSTOM_DUMPSOFTWAREVERSIONS { - label 'process_single' - - // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.15" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : - 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" - - input: - path versions - - output: - path "software_versions.yml" , emit: yml - path "software_versions_mqc.yml", emit: mqc_yml - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - template 'dumpsoftwareversions.py' -} diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml deleted file mode 100644 index c32657de..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ /dev/null @@ -1,36 +0,0 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: custom_dumpsoftwareversions -description: Custom module used to dump software versions within the nf-core pipeline template -keywords: - - custom - - dump - - version -tools: - - custom: - description: Custom module used to dump software versions within the nf-core pipeline template - homepage: https://github.com/nf-core/tools - documentation: https://github.com/nf-core/tools - licence: ["MIT"] -input: - - versions: - type: file - description: YML file containing software versions - pattern: "*.yml" - -output: - - yml: - type: file - description: Standard YML file containing software versions - pattern: "software_versions.yml" - - mqc_yml: - type: file - description: MultiQC custom content YML file containing software versions - pattern: "software_versions_mqc.yml" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@drpatelh" - - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py deleted file mode 100755 index da033408..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env python - - -"""Provide functions to merge multiple versions.yml files.""" - - -import yaml -import platform -from textwrap import dedent - - -def _make_versions_html(versions): - """Generate a tabular HTML output of all versions for MultiQC.""" - html = [ - dedent( - """\\ - - - - - - - - - - """ - ) - ] - for process, tmp_versions in sorted(versions.items()): - html.append("") - for i, (tool, version) in enumerate(sorted(tmp_versions.items())): - html.append( - dedent( - f"""\\ - - - - - - """ - ) - ) - html.append("") - html.append("
    Process Name Software Version
    {process if (i == 0) else ''}{tool}{version}
    ") - return "\\n".join(html) - - -def main(): - """Load all version files and generate merged output.""" - versions_this_module = {} - versions_this_module["${task.process}"] = { - "python": platform.python_version(), - "yaml": yaml.__version__, - } - - with open("$versions") as f: - versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module - - # aggregate versions by the module name (derived from fully-qualified process name) - versions_by_module = {} - for process, process_versions in versions_by_process.items(): - module = process.split(":")[-1] - try: - if versions_by_module[module] != process_versions: - raise AssertionError( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) - except KeyError: - versions_by_module[module] = process_versions - - versions_by_module["Workflow"] = { - "Nextflow": "$workflow.nextflow.version", - "$workflow.manifest.name": "$workflow.manifest.version", - } - - versions_mqc = { - "id": "software_versions", - "section_name": "${workflow.manifest.name} Software Versions", - "section_href": "https://github.com/${workflow.manifest.name}", - "plot_type": "html", - "description": "are collected at run time from the software output.", - "data": _make_versions_html(versions_by_module), - } - - with open("software_versions.yml", "w") as f: - yaml.dump(versions_by_module, f, default_flow_style=False) - with open("software_versions_mqc.yml", "w") as f: - yaml.dump(versions_mqc, f, default_flow_style=False) - - with open("versions.yml", "w") as f: - yaml.dump(versions_this_module, f, default_flow_style=False) - - -if __name__ == "__main__": - main() diff --git a/modules/nf-core/deeparg/downloaddata/environment.yml b/modules/nf-core/deeparg/downloaddata/environment.yml new file mode 100644 index 00000000..87435be5 --- /dev/null +++ b/modules/nf-core/deeparg/downloaddata/environment.yml @@ -0,0 +1,7 @@ +name: deeparg_downloaddata +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::deeparg=1.0.4 diff --git a/modules/nf-core/deeparg/downloaddata/main.nf b/modules/nf-core/deeparg/downloaddata/main.nf index 724a002b..787c0027 100644 --- a/modules/nf-core/deeparg/downloaddata/main.nf +++ b/modules/nf-core/deeparg/downloaddata/main.nf @@ -1,16 +1,20 @@ process DEEPARG_DOWNLOADDATA { label 'process_single' - conda "bioconda::deeparg=1.0.2" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/deeparg:1.0.2--pyhdfd78af_1' : - 'biocontainers/deeparg:1.0.2--pyhdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/deeparg:1.0.4--pyhdfd78af_0' : + 'biocontainers/deeparg:1.0.4--pyhdfd78af_0' }" + /* - We have to force singularity to run with -B to allow reading of a problematic file with borked read-write permissions in an upstream dependency (theanos). + We have to force docker/singularity to mount a fake file to allow reading of a problematic file with borked read-write permissions in an upstream dependency (theanos). Original report: https://github.com/nf-core/funcscan/issues/23 */ - containerOptions { "${workflow.containerEngine}" == 'singularity' ? '-B $(which bash):/usr/local/lib/python2.7/site-packages/Theano-0.8.2-py2.7.egg-info/PKG-INFO' : '' } - + containerOptions { + "${workflow.containerEngine}" == 'singularity' ? '-B $(which bash):/usr/local/lib/python2.7/site-packages/Theano-0.8.2-py2.7.egg-info/PKG-INFO' : + "${workflow.containerEngine}" == 'docker' ? '-v $(which bash):/usr/local/lib/python2.7/site-packages/Theano-0.8.2-py2.7.egg-info/PKG-INFO' : + '' + } input: @@ -23,8 +27,15 @@ process DEEPARG_DOWNLOADDATA { script: def args = task.ext.args ?: '' - def VERSION='1.0.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def VERSION='1.0.4' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ + + # Theano needs a writable space and uses the home directory by default, + # but the latter is not always writable, for instance when Singularity + # is run in --no-home mode + mkdir -p theano + export THEANO_FLAGS="base_compiledir=\$PWD/theano" + deeparg \\ download_data \\ $args \\ @@ -35,4 +46,16 @@ process DEEPARG_DOWNLOADDATA { deeparg: $VERSION END_VERSIONS """ + + stub: + def args = task.ext.args ?: '' + def VERSION='1.0.4' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + mkdir db/ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deeparg: $VERSION + END_VERSIONS + """ } diff --git a/modules/nf-core/deeparg/downloaddata/meta.yml b/modules/nf-core/deeparg/downloaddata/meta.yml index 624da297..65fb3903 100644 --- a/modules/nf-core/deeparg/downloaddata/meta.yml +++ b/modules/nf-core/deeparg/downloaddata/meta.yml @@ -10,14 +10,12 @@ keywords: tools: - deeparg: description: A deep learning based approach to predict Antibiotic Resistance Genes (ARGs) from metagenomes - homepage: https://bench.cs.vt.edu/deeparg - documentation: https://bitbucket.org/gusphdproj/deeparg-ss/src/master/ - tool_dev_url: https://bitbucket.org/gusphdproj/deeparg-ss/src/master/ + homepage: https://github.com/gaarangoa/deeparg + documentation: https://github.com/gaarangoa/deeparg + tool_dev_url: https://github.com/gaarangoa/deeparg doi: "10.1186/s40168-018-0401-z" licence: ["MIT"] - # No input required for download module. - output: - versions: type: file @@ -27,6 +25,7 @@ output: type: directory description: Directory containing database required for deepARG. pattern: "db/" - authors: - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/deeparg/downloaddata/tests/main.nf.test b/modules/nf-core/deeparg/downloaddata/tests/main.nf.test new file mode 100644 index 00000000..8e8c7647 --- /dev/null +++ b/modules/nf-core/deeparg/downloaddata/tests/main.nf.test @@ -0,0 +1,56 @@ +nextflow_process { + + name "Test Process DEEPARG_DOWNLOADDATA" + script "../main.nf" + process "DEEPARG_DOWNLOADDATA" + + tag "modules" + tag "modules_nfcore" + tag "deeparg" + tag "deeparg/downloaddata" + + test("downloaddata") { + + + when { + process { + """ + // No input required + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot ( + file(process.out.db.get(0)).list().sort(), + process.out.versions, + ).match() } + ) + } + + } + + test("downloaddata - stub") { + + options "-stub" + + when { + process { + """ + // No input required + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/deeparg/downloaddata/tests/main.nf.test.snap b/modules/nf-core/deeparg/downloaddata/tests/main.nf.test.snap new file mode 100644 index 00000000..fca46527 --- /dev/null +++ b/modules/nf-core/deeparg/downloaddata/tests/main.nf.test.snap @@ -0,0 +1,53 @@ +{ + "downloaddata": { + "content": [ + [ + "LICENSE:md5,f244898ceed024da6d64a1b97746edb1", + "README.md:md5,6c0450350c2d52c0f9b5d81c3d22ea7b", + "__MACOSX", + "bin", + "database", + "deeparg", + "deeparg.gz", + "gg13", + "model", + "scripts" + ], + [ + "versions.yml:md5,30e73617295a9f10ac7781bfe8ba617f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-07T16:40:43.022804921" + }, + "downloaddata - stub": { + "content": [ + { + "0": [ + [ + + ] + ], + "1": [ + "versions.yml:md5,30e73617295a9f10ac7781bfe8ba617f" + ], + "db": [ + [ + + ] + ], + "versions": [ + "versions.yml:md5,30e73617295a9f10ac7781bfe8ba617f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-07T16:40:47.261220647" + } +} \ No newline at end of file diff --git a/modules/nf-core/deeparg/downloaddata/tests/tags.yml b/modules/nf-core/deeparg/downloaddata/tests/tags.yml new file mode 100644 index 00000000..b909db89 --- /dev/null +++ b/modules/nf-core/deeparg/downloaddata/tests/tags.yml @@ -0,0 +1,2 @@ +deeparg/downloaddata: + - "modules/nf-core/deeparg/downloaddata/**" diff --git a/modules/nf-core/deeparg/predict/environment.yml b/modules/nf-core/deeparg/predict/environment.yml new file mode 100644 index 00000000..aa686701 --- /dev/null +++ b/modules/nf-core/deeparg/predict/environment.yml @@ -0,0 +1,7 @@ +name: deeparg_predict +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::deeparg=1.0.4 diff --git a/modules/nf-core/deeparg/predict/main.nf b/modules/nf-core/deeparg/predict/main.nf index 11318476..20fd0a93 100644 --- a/modules/nf-core/deeparg/predict/main.nf +++ b/modules/nf-core/deeparg/predict/main.nf @@ -2,15 +2,20 @@ process DEEPARG_PREDICT { tag "$meta.id" label 'process_single' - conda "bioconda::deeparg=1.0.2" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/deeparg:1.0.2--pyhdfd78af_1' : - 'biocontainers/deeparg:1.0.2--pyhdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/deeparg:1.0.4--pyhdfd78af_0' : + 'biocontainers/deeparg:1.0.4--pyhdfd78af_0' }" + /* - We have to force singularity to run with -B to allow reading of a problematic file with borked read-write permissions in an upstream dependency (theanos). - Original report: https://github.com/nf-core/funcscan/issues/23 + We have to force docker/singularity to mount a fake file to allow reading of a problematic file with borked read-write permissions in an upstream dependency (theanos). + Original report: https://github.com/nf-core/funcscan/issues/23 */ - containerOptions { "${workflow.containerEngine}" == 'singularity' ? '-B $(which bash):/usr/local/lib/python2.7/site-packages/Theano-0.8.2-py2.7.egg-info/PKG-INFO' : '' } + containerOptions { + "${workflow.containerEngine}" == 'singularity' ? '-B $(which bash):/usr/local/lib/python2.7/site-packages/Theano-0.8.2-py2.7.egg-info/PKG-INFO' : + "${workflow.containerEngine}" == 'docker' ? '-v $(which bash):/usr/local/lib/python2.7/site-packages/Theano-0.8.2-py2.7.egg-info/PKG-INFO' : + '' + } input: tuple val(meta), path(fasta), val(model) @@ -29,14 +34,22 @@ process DEEPARG_PREDICT { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION='1.0.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def VERSION='1.0.4' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ + DATABASE=`find -L $db -type d -name "database" | sed 's/database//'` + + # Theano needs a writable space and uses the home directory by default, + # but the latter is not always writable, for instance when Singularity + # is run in --no-home mode + mkdir -p theano + export THEANO_FLAGS="base_compiledir=\$PWD/theano" + deeparg \\ predict \\ $args \\ -i $fasta \\ -o ${prefix} \\ - -d $db \\ + -d \$DATABASE \\ --model $model cat <<-END_VERSIONS > versions.yml @@ -44,4 +57,20 @@ process DEEPARG_PREDICT { deeparg: $VERSION END_VERSIONS """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION='1.0.4' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefix}.align.daa + touch ${prefix}.align.daa.tsv + touch ${prefix}.mapping.ARG + touch ${prefix}.mapping.potential.ARG + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deeparg: $VERSION + END_VERSIONS + """ } diff --git a/modules/nf-core/deeparg/predict/meta.yml b/modules/nf-core/deeparg/predict/meta.yml index fa50c70e..d62c2c5f 100644 --- a/modules/nf-core/deeparg/predict/meta.yml +++ b/modules/nf-core/deeparg/predict/meta.yml @@ -12,12 +12,11 @@ keywords: tools: - deeparg: description: A deep learning based approach to predict Antibiotic Resistance Genes (ARGs) from metagenomes - homepage: https://bench.cs.vt.edu/deeparg - documentation: https://bitbucket.org/gusphdproj/deeparg-ss/src/master/ - tool_dev_url: https://bitbucket.org/gusphdproj/deeparg-ss/src/master/ + homepage: https://github.com/gaarangoa/deeparg + documentation: https://github.com/gaarangoa/deeparg + tool_dev_url: https://github.com/gaarangoa/deeparg doi: "10.1186/s40168-018-0401-z" licence: ["MIT"] - input: - meta: type: map @@ -36,7 +35,6 @@ input: type: directory description: Path to a directory containing the deepARG pre-built models pattern: "*/" - output: - meta: type: map @@ -63,6 +61,7 @@ output: type: file description: Table containing sequences with an ARG-like probability of less than specified thresholds, and requires manual inspection pattern: "*.mapping.potential.ARG" - authors: - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/deeparg/predict/tests/main.nf.test b/modules/nf-core/deeparg/predict/tests/main.nf.test new file mode 100644 index 00000000..4841c6eb --- /dev/null +++ b/modules/nf-core/deeparg/predict/tests/main.nf.test @@ -0,0 +1,81 @@ +nextflow_process { + + name "Test Process DEEPARG_PREDICT" + script "../main.nf" + process "DEEPARG_PREDICT" + + tag "modules" + tag "modules_nfcore" + tag "deeparg" + tag "deeparg/predict" + tag "deeparg/downloaddata" + + setup { + run("DEEPARG_DOWNLOADDATA") { + script "../../../deeparg/downloaddata/main.nf" + process { + """ + // No input necessary + """ + } + } + + } + + test("bacteroides_fragilis - fasta") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true), + 'LS' + ] + input[1] = DEEPARG_DOWNLOADDATA.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.daa_tsv, + process.out.arg, + file(process.out.daa[0][1]).name, + path(process.out.potential_arg[0][1]).readLines().first().contains("#ARG") + ).match() + } + ) + } + + } + + test("bacteroides_fragilis - fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true), + 'LS' + ] + input[1] = DEEPARG_DOWNLOADDATA.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/deeparg/predict/tests/main.nf.test.snap b/modules/nf-core/deeparg/predict/tests/main.nf.test.snap new file mode 100644 index 00000000..fa5df047 --- /dev/null +++ b/modules/nf-core/deeparg/predict/tests/main.nf.test.snap @@ -0,0 +1,120 @@ +{ + "bacteroides_fragilis - fasta": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.align.daa.tsv:md5,46b6eba345742365fc1dbd5b4bacd3a2" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.mapping.ARG:md5,0e049e99eab4c55666062df21707d5b9" + ] + ], + "test.align.daa", + true + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-07T20:55:06.511718259" + }, + "bacteroides_fragilis - fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.align.daa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.align.daa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.mapping.ARG:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.mapping.potential.ARG:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,1668194fbcb82f7cce4699baa00c02a1" + ], + "arg": [ + [ + { + "id": "test", + "single_end": false + }, + "test.mapping.ARG:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "daa": [ + [ + { + "id": "test", + "single_end": false + }, + "test.align.daa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "daa_tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.align.daa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "potential_arg": [ + [ + { + "id": "test", + "single_end": false + }, + "test.mapping.potential.ARG:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,1668194fbcb82f7cce4699baa00c02a1" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-07T16:50:17.147755715" + } +} \ No newline at end of file diff --git a/modules/nf-core/deeparg/predict/tests/tags.yml b/modules/nf-core/deeparg/predict/tests/tags.yml new file mode 100644 index 00000000..7fa73e37 --- /dev/null +++ b/modules/nf-core/deeparg/predict/tests/tags.yml @@ -0,0 +1,2 @@ +deeparg/predict: + - "modules/nf-core/deeparg/predict/**" diff --git a/modules/nf-core/deepbgc/download/environment.yml b/modules/nf-core/deepbgc/download/environment.yml new file mode 100644 index 00000000..84d467f0 --- /dev/null +++ b/modules/nf-core/deepbgc/download/environment.yml @@ -0,0 +1,7 @@ +name: deepbgc_download +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::deepbgc=0.1.31 diff --git a/modules/nf-core/deepbgc/download/main.nf b/modules/nf-core/deepbgc/download/main.nf index e4f0d503..b141142c 100644 --- a/modules/nf-core/deepbgc/download/main.nf +++ b/modules/nf-core/deepbgc/download/main.nf @@ -1,10 +1,10 @@ process DEEPBGC_DOWNLOAD { label 'process_single' - conda "bioconda::deepbgc=0.1.30" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/deepbgc:0.1.30--pyhb7b1952_1': - 'biocontainers/deepbgc:0.1.30--pyhb7b1952_1' }" + 'https://depot.galaxyproject.org/singularity/deepbgc:0.1.31--pyhca03a8a_0': + 'biocontainers/deepbgc:0.1.31--pyhca03a8a_0' }" output: path "deepbgc_db/" , emit: db diff --git a/modules/nf-core/deepbgc/download/meta.yml b/modules/nf-core/deepbgc/download/meta.yml index c2d74aa6..6444dd41 100644 --- a/modules/nf-core/deepbgc/download/meta.yml +++ b/modules/nf-core/deepbgc/download/meta.yml @@ -18,8 +18,7 @@ tools: documentation: "https://github.com/Merck/deepbgc" tool_dev_url: "https://github.com/Merck/deepbgc" doi: "10.1093/nar/gkz654" - licence: "['MIT']" - + licence: ["MIT"] output: - versions: type: file @@ -29,6 +28,7 @@ output: type: directory description: Contains reference database files pattern: "deepbgc_db" - authors: - "@louperelo" +maintainers: + - "@louperelo" diff --git a/modules/nf-core/deepbgc/pipeline/environment.yml b/modules/nf-core/deepbgc/pipeline/environment.yml new file mode 100644 index 00000000..fe0087a2 --- /dev/null +++ b/modules/nf-core/deepbgc/pipeline/environment.yml @@ -0,0 +1,7 @@ +name: deepbgc_pipeline +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::deepbgc=0.1.31 diff --git a/modules/nf-core/deepbgc/pipeline/main.nf b/modules/nf-core/deepbgc/pipeline/main.nf index e3aefba8..fc72d238 100644 --- a/modules/nf-core/deepbgc/pipeline/main.nf +++ b/modules/nf-core/deepbgc/pipeline/main.nf @@ -2,27 +2,27 @@ process DEEPBGC_PIPELINE { tag "$meta.id" label 'process_single' - conda "bioconda::deepbgc=0.1.30" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/deepbgc:0.1.30--pyhb7b1952_1': - 'biocontainers/deepbgc:0.1.30--pyhb7b1952_1' }" + 'https://depot.galaxyproject.org/singularity/deepbgc:0.1.31--pyhca03a8a_0': + 'biocontainers/deepbgc:0.1.31--pyhca03a8a_0' }" input: tuple val(meta), path(genome) path(db) output: - tuple val(meta), path("${prefix}/README.txt") , optional: true, emit: readme - tuple val(meta), path("${prefix}/LOG.txt") , emit: log - tuple val(meta), path("${prefix}/${genome.baseName}.antismash.json") , optional: true, emit: json - tuple val(meta), path("${prefix}/${genome.baseName}.bgc.gbk") , optional: true, emit: bgc_gbk - tuple val(meta), path("${prefix}/${genome.baseName}.bgc.tsv") , optional: true, emit: bgc_tsv - tuple val(meta), path("${prefix}/${genome.baseName}.full.gbk") , optional: true, emit: full_gbk - tuple val(meta), path("${prefix}/${genome.baseName}.pfam.tsv") , optional: true, emit: pfam_tsv - tuple val(meta), path("${prefix}/evaluation/${genome.baseName}.bgc.png") , optional: true, emit: bgc_png - tuple val(meta), path("${prefix}/evaluation/${genome.baseName}.pr.png") , optional: true, emit: pr_png - tuple val(meta), path("${prefix}/evaluation/${genome.baseName}.roc.png") , optional: true, emit: roc_png - tuple val(meta), path("${prefix}/evaluation/${genome.baseName}.score.png") , optional: true, emit: score_png + tuple val(meta), path("${prefix}/README.txt") , optional: true, emit: readme + tuple val(meta), path("${prefix}/LOG.txt") , emit: log + tuple val(meta), path("${prefix}/${prefix}.antismash.json") , optional: true, emit: json + tuple val(meta), path("${prefix}/${prefix}.bgc.gbk") , optional: true, emit: bgc_gbk + tuple val(meta), path("${prefix}/${prefix}.bgc.tsv") , optional: true, emit: bgc_tsv + tuple val(meta), path("${prefix}/${prefix}.full.gbk") , optional: true, emit: full_gbk + tuple val(meta), path("${prefix}/${prefix}.pfam.tsv") , optional: true, emit: pfam_tsv + tuple val(meta), path("${prefix}/evaluation/${prefix}.bgc.png") , optional: true, emit: bgc_png + tuple val(meta), path("${prefix}/evaluation/${prefix}.pr.png") , optional: true, emit: pr_png + tuple val(meta), path("${prefix}/evaluation/${prefix}.roc.png") , optional: true, emit: roc_png + tuple val(meta), path("${prefix}/evaluation/${prefix}.score.png"), optional: true, emit: score_png path "versions.yml" , emit: versions when: @@ -43,6 +43,34 @@ process DEEPBGC_PIPELINE { mv "${genome.baseName}/" "${prefix}/" fi + for i in \$(find -name '${genome.baseName}*' -type f); do + mv \$i \${i/${genome.baseName}/${prefix}}; + done + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deepbgc: \$(echo \$(deepbgc info 2>&1 /dev/null/ | grep 'version' | cut -d " " -f3) ) + prodigal: \$(prodigal -v 2>&1 | sed -n 's/Prodigal V\\(.*\\):.*/\\1/p') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir -p ${prefix}/evaluation + touch ${prefix}/README.txt + touch ${prefix}/LOG.txt + touch ${prefix}/${prefix}.antismash.json + touch ${prefix}/${prefix}.bgc.gbk + touch ${prefix}/${prefix}.bgc.tsv + touch ${prefix}/${prefix}.full.gbk + touch ${prefix}/${prefix}.pfam.tsv + touch ${prefix}/evaluation/${prefix}.bgc.png + touch ${prefix}/evaluation/${prefix}.pr.png + touch ${prefix}/evaluation/${prefix}.roc.png + touch ${prefix}/evaluation/${prefix}.score.png + cat <<-END_VERSIONS > versions.yml "${task.process}": deepbgc: \$(echo \$(deepbgc info 2>&1 /dev/null/ | grep 'version' | cut -d " " -f3) ) diff --git a/modules/nf-core/deepbgc/pipeline/meta.yml b/modules/nf-core/deepbgc/pipeline/meta.yml index fb388661..5f939eaa 100644 --- a/modules/nf-core/deepbgc/pipeline/meta.yml +++ b/modules/nf-core/deepbgc/pipeline/meta.yml @@ -16,8 +16,7 @@ tools: documentation: "https://github.com/Merck/deepbgc" tool_dev_url: "https://github.com/Merck/deepbgc" doi: "10.1093/nar/gkz654" - licence: "['MIT']" - + licence: ["MIT"] input: - meta: type: map @@ -28,7 +27,9 @@ input: type: file description: FASTA/GenBank/Pfam CSV file pattern: "*.{fasta,fa,fna,gbk,csv}" - + - db: + type: directory + description: Database path output: - meta: type: map @@ -83,7 +84,9 @@ output: type: file description: BGC detection scores of each Pfam domain in genomic order pattern: "*.{score.png}" - authors: - "@louperelo" - "@jfy133" +maintainers: + - "@louperelo" + - "@jfy133" diff --git a/modules/nf-core/deepbgc/pipeline/tests/main.nf.test b/modules/nf-core/deepbgc/pipeline/tests/main.nf.test new file mode 100644 index 00000000..9dd24049 --- /dev/null +++ b/modules/nf-core/deepbgc/pipeline/tests/main.nf.test @@ -0,0 +1,116 @@ +nextflow_process { + + name "Test Process DEEPBGC_PIPELINE" + script "../main.nf" + process "DEEPBGC_PIPELINE" + + tag "modules" + tag "modules_nfcore" + tag "deepbgc" + tag "deepbgc/pipeline" + tag "deepbgc/download" + tag "gunzip" + tag "prodigal" + + setup { + run("DEEPBGC_DOWNLOAD") { + script "../..//download/main.nf" + process { + """ + """ + } + } + run("GUNZIP") { + script "../../../gunzip/main.nf" + process { + """ + input[0] = Channel.fromList([ + tuple([ id:'test_gbk', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/illumina/fasta/test1.contigs.fa.gz', checkIfExists: true)) + ]) + """ + } + } + run("PRODIGAL") { + script "../../../prodigal/main.nf" + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = 'gbk' + """ + } + } + } + + test("deepbgc pipeline gbk - bacteroides fragilis - test1_contigs.fa.gz") { + + when { + process { + """ + input [0] = PRODIGAL.out.gene_annotations + input [1] = DEEPBGC_DOWNLOAD.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("gbk_versions") }, + { assert snapshot(process.out.json).match("gbk_json") }, + { assert path(process.out.log.get(0).get(1)).exists() }, + { assert path(process.out.bgc_gbk.get(0).get(1)).exists() }, + { assert path(process.out.full_gbk.get(0).get(1)).exists() } + ) + } + + } + + test("deepbgc pipeline fa - bacteroides fragilis - test1_contigs.fa.gz") { + + when { + process { + """ + input [0] = GUNZIP.out.gunzip + input [1] = DEEPBGC_DOWNLOAD.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("fa_versions") }, + { assert snapshot(process.out.bgc_gbk).match("fa_bgc_gbk") }, + { assert snapshot(process.out.bgc_png).match("fa_bgc_png") }, + { assert snapshot(process.out.score_png).match("fa_score_png") }, + { assert snapshot(process.out.pfam_tsv).match("fa_pfam_tsv") }, + { assert path(process.out.json.get(0).get(1)).exists() }, + { assert path(process.out.log.get(0).get(1)).exists() }, + { assert path(process.out.bgc_tsv.get(0).get(1)).exists() }, + { assert path(process.out.full_gbk.get(0).get(1)).exists() } + ) + } + } + + test("deepbgc pipeline fa - bacteroides fragilis - test1_contigs.fa.gz - stub") { + options "-stub" + when { + process { + """ + input [0] = GUNZIP.out.gunzip + input [1] = DEEPBGC_DOWNLOAD.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} + diff --git a/modules/nf-core/deepbgc/pipeline/tests/main.nf.test.snap b/modules/nf-core/deepbgc/pipeline/tests/main.nf.test.snap new file mode 100644 index 00000000..ef64db97 --- /dev/null +++ b/modules/nf-core/deepbgc/pipeline/tests/main.nf.test.snap @@ -0,0 +1,331 @@ +{ + "gbk_versions": { + "content": [ + [ + "versions.yml:md5,988a1db70bd9e95ad22c25b4d6d40e6e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2023-12-01T18:29:41.728695197" + }, + "fa_bgc_png": { + "content": [ + [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.bgc.png:md5,f4a0fc6cd260e2d7ad16f7a1fa103f96" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T16:29:32.389704368" + }, + "deepbgc pipeline fa - bacteroides fragilis - test1_contigs.fa.gz - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "README.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "LOG.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.score.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "11": [ + "versions.yml:md5,988a1db70bd9e95ad22c25b4d6d40e6e" + ], + "2": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.antismash.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.bgc.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.bgc.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.full.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.pfam.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.bgc.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.pr.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.roc.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bgc_gbk": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.bgc.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bgc_png": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.bgc.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bgc_tsv": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.bgc.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "full_gbk": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.full.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.antismash.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "LOG.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "pfam_tsv": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.pfam.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "pr_png": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.pr.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "readme": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "README.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "roc_png": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.roc.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "score_png": [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.score.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,988a1db70bd9e95ad22c25b4d6d40e6e" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T16:32:11.354631831" + }, + "fa_score_png": { + "content": [ + [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.score.png:md5,572e8882031f667580d8c8e13c2cbb91" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T16:29:32.401051746" + }, + "fa_pfam_tsv": { + "content": [ + [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.pfam.tsv:md5,1179eb4e6df0c83aaeec18d7d34e7524" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T16:29:32.411632144" + }, + "gbk_json": { + "content": [ + [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.antismash.json:md5,889ac1efb6a9a7d7b8c65e4cd2233bba" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T16:25:25.861672633" + }, + "fa_versions": { + "content": [ + [ + "versions.yml:md5,988a1db70bd9e95ad22c25b4d6d40e6e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2023-12-01T18:44:16.352023677" + }, + "fa_bgc_gbk": { + "content": [ + [ + [ + { + "id": "test_gbk", + "single_end": false + }, + "test_gbk.bgc.gbk:md5,7fc70dd034903622dae273bf71b402f2" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T16:29:32.383560585" + } +} \ No newline at end of file diff --git a/modules/nf-core/deepbgc/pipeline/tests/tags.yml b/modules/nf-core/deepbgc/pipeline/tests/tags.yml new file mode 100644 index 00000000..c6c4e11d --- /dev/null +++ b/modules/nf-core/deepbgc/pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +deepbgc/pipeline: + - "modules/nf-core/deepbgc/pipeline/**" diff --git a/modules/nf-core/fargene/main.nf b/modules/nf-core/fargene/main.nf index b2feb86a..42aa2ca2 100644 --- a/modules/nf-core/fargene/main.nf +++ b/modules/nf-core/fargene/main.nf @@ -17,6 +17,7 @@ process FARGENE { tuple val(meta), path("*.log") , emit: log tuple val(meta), path("${prefix}/results_summary.txt") , emit: txt tuple val(meta), path("${prefix}/hmmsearchresults/*.out") , optional: true, emit: hmm + tuple val(meta), path("${prefix}/hmmsearchresults/retrieved-*.out") , optional: true, emit: hmm_genes tuple val(meta), path("${prefix}/predictedGenes/predicted-orfs.fasta") , optional: true, emit: orfs tuple val(meta), path("${prefix}/predictedGenes/predicted-orfs-amino.fasta") , optional: true, emit: orfs_amino tuple val(meta), path("${prefix}/predictedGenes/retrieved-contigs.fasta") , optional: true, emit: contigs @@ -24,7 +25,7 @@ process FARGENE { tuple val(meta), path("${prefix}/predictedGenes/*filtered.fasta") , optional: true, emit: filtered tuple val(meta), path("${prefix}/predictedGenes/*filtered-peptides.fasta") , optional: true, emit: filtered_pept tuple val(meta), path("${prefix}/retrievedFragments/all_retrieved_*.fastq") , optional: true, emit: fragments - tuple val(meta), path("${prefix}/retrievedFragments/retrievedFragments/trimmedReads/*.fasta"), optional: true, emit: trimmed + tuple val(meta), path("${prefix}/retrievedFragments/trimmedReads/*.fasta") , optional: true, emit: trimmed tuple val(meta), path("${prefix}/spades_assembly/*") , optional: true, emit: spades tuple val(meta), path("${prefix}/tmpdir/*.fasta") , optional: true, emit: metagenome tuple val(meta), path("${prefix}/tmpdir/*.out") , optional: true, emit: tmp @@ -50,4 +51,33 @@ process FARGENE { fargene: $VERSION END_VERSIONS """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '0.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefix}.log + mkdir -p ${prefix}/{hmmsearchresults,predictedGenes,retrievedFragments} + mkdir -p ${prefix}/retrievedFragments/trimmedReads/ + + touch ${prefix}/results_summary.txt + touch ${prefix}/hmmsearchresults/retrieved-${prefix}.out + touch ${prefix}/hmmsearchresults/${prefix}.out + touch ${prefix}/predictedGenes/predicted-orfs.fasta + touch ${prefix}/predictedGenes/predicted-orfs-amino.fasta + touch ${prefix}/predictedGenes/retrieved-contigs.fasta + touch ${prefix}/predictedGenes/retrieved-contigs-peptides.fasta + touch ${prefix}/predictedGenes/${prefix}-filtered.fasta + touch ${prefix}/predictedGenes/${prefix}-filtered-peptides.fasta + touch ${prefix}/retrievedFragments/all_retrieved_${prefix}.fastq + touch ${prefix}/retrievedFragments/trimmedReads/${prefix}.fasta + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fargene: $VERSION + END_VERSIONS + """ + } diff --git a/modules/nf-core/fargene/meta.yml b/modules/nf-core/fargene/meta.yml index 8f1540b1..9fc5ce0f 100644 --- a/modules/nf-core/fargene/meta.yml +++ b/modules/nf-core/fargene/meta.yml @@ -46,8 +46,12 @@ output: pattern: "*.{txt}" - hmm: type: file - description: output from hmmsearch + description: output from hmmsearch (both single gene annotations + contigs) pattern: "*.{out}" + - hmm_genes: + type: file + description: output from hmmsearch (single gene annotations only) + pattern: "retrieved-*.{out}" - orfs: type: file description: open reading frames (ORFs) diff --git a/modules/nf-core/fargene/tests/main.nf.test b/modules/nf-core/fargene/tests/main.nf.test new file mode 100644 index 00000000..2f4e3fc6 --- /dev/null +++ b/modules/nf-core/fargene/tests/main.nf.test @@ -0,0 +1,76 @@ +nextflow_process { + + name "Test Process FARGENE" + script "../main.nf" + process "FARGENE" + + tag "modules" + tag "modules_nfcore" + tag "fargene" + tag "gunzip" + + setup { + + run("GUNZIP") { + script "../../gunzip/main.nf" + process { + """ + input[0] = Channel.fromList([ + tuple([ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/illumina/fasta/test1.contigs.fa.gz', checkIfExists: true)) + ]) + """ + } + } + } + + test("fargene - bacteroides fragilis - contigs.fa.gz") { + + when { + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = 'class_a' + """ + } + } + + then { + assertAll { + { assert process.success } + { assert snapshot( + process.out.txt, + path(process.out.log[0][1]).readLines().last().contains("Output can be found in"), + path(process.out.hmm[0][1]).readLines().last().contains("[ok]"), + file(process.out.tmp[0][1].find { file(it).name == "tmp.out" }).readLines().last().contains("[ok]"), + process.out.versions + ).match() + } + } + } + + } + + test("fargene - bacteroides fragilis - contigs.fa.gz - stub") { + + options "-stub" + + when { + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = 'class_a' + """ + } + } + + then { + assertAll { + { assert process.success } + { assert snapshot(process.out).match() } + } + } + + } + +} diff --git a/modules/nf-core/fargene/tests/main.nf.test.snap b/modules/nf-core/fargene/tests/main.nf.test.snap new file mode 100644 index 00000000..54724f1b --- /dev/null +++ b/modules/nf-core/fargene/tests/main.nf.test.snap @@ -0,0 +1,283 @@ +{ + "fargene - bacteroides fragilis - contigs.fa.gz - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "results_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": false + }, + "all_retrieved_test.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + + ], + "13": [ + + ], + "14": [ + + ], + "15": [ + "versions.yml:md5,a146d432794c87b5850fb7c4cbee11fc" + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "retrieved-test.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "retrieved-test.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "predicted-orfs.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "predicted-orfs-amino.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "retrieved-contigs.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "retrieved-contigs-peptides.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": false + }, + "test-filtered.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": false + }, + "test-filtered-peptides.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "contigs": [ + [ + { + "id": "test", + "single_end": false + }, + "retrieved-contigs.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "contigs_pept": [ + [ + { + "id": "test", + "single_end": false + }, + "retrieved-contigs-peptides.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "filtered": [ + [ + { + "id": "test", + "single_end": false + }, + "test-filtered.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "filtered_pept": [ + [ + { + "id": "test", + "single_end": false + }, + "test-filtered-peptides.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fragments": [ + [ + { + "id": "test", + "single_end": false + }, + "all_retrieved_test.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "hmm": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "retrieved-test.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "hmm_genes": [ + [ + { + "id": "test", + "single_end": false + }, + "retrieved-test.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "metagenome": [ + + ], + "orfs": [ + [ + { + "id": "test", + "single_end": false + }, + "predicted-orfs.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "orfs_amino": [ + [ + { + "id": "test", + "single_end": false + }, + "predicted-orfs-amino.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "spades": [ + + ], + "tmp": [ + + ], + "trimmed": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "txt": [ + [ + { + "id": "test", + "single_end": false + }, + "results_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,a146d432794c87b5850fb7c4cbee11fc" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-12T15:08:54.830926802" + }, + "fargene - bacteroides fragilis - contigs.fa.gz": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "results_summary.txt:md5,690d351cfc52577263ef4cfab1c81f50" + ] + ], + true, + true, + true, + [ + "versions.yml:md5,a146d432794c87b5850fb7c4cbee11fc" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-12T15:24:49.384451483" + } +} \ No newline at end of file diff --git a/modules/nf-core/fargene/tests/tags.yml b/modules/nf-core/fargene/tests/tags.yml new file mode 100644 index 00000000..c470032f --- /dev/null +++ b/modules/nf-core/fargene/tests/tags.yml @@ -0,0 +1,2 @@ +fargene: + - "modules/nf-core/fargene/**" diff --git a/modules/nf-core/gecco/run/environment.yml b/modules/nf-core/gecco/run/environment.yml new file mode 100644 index 00000000..9d7cde8d --- /dev/null +++ b/modules/nf-core/gecco/run/environment.yml @@ -0,0 +1,7 @@ +name: gecco_run +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gecco=0.9.10 diff --git a/modules/nf-core/gecco/run/main.nf b/modules/nf-core/gecco/run/main.nf index 711fb965..04399d14 100644 --- a/modules/nf-core/gecco/run/main.nf +++ b/modules/nf-core/gecco/run/main.nf @@ -2,16 +2,15 @@ process GECCO_RUN { tag "$meta.id" label 'process_low' - conda "bioconda::gecco=0.9.8" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gecco:0.9.8--pyhdfd78af_0': - 'biocontainers/gecco:0.9.8--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gecco:0.9.10--pyhdfd78af_0': + 'biocontainers/gecco:0.9.10--pyhdfd78af_0' }" input: tuple val(meta), path(input), path(hmm) path model_dir - output: tuple val(meta), path("*.genes.tsv") , optional: true, emit: genes tuple val(meta), path("*.features.tsv") , emit: features @@ -39,6 +38,25 @@ process GECCO_RUN { $custom_model \\ $custom_hmm + for i in \$(find -name '${input.baseName}*' -type f); do + mv \$i \${i/${input.baseName}/${prefix}}; + done + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gecco: \$(echo \$(gecco --version) | cut -f 2 -d ' ' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.genes.tsv + touch ${prefix}.features.tsv + touch ${prefix}.clusters.tsv + touch NC_018507.1_cluster_1.gbk + cat <<-END_VERSIONS > versions.yml "${task.process}": gecco: \$(echo \$(gecco --version) | cut -f 2 -d ' ' ) diff --git a/modules/nf-core/gecco/run/meta.yml b/modules/nf-core/gecco/run/meta.yml index 5450f967..a2f4a726 100644 --- a/modules/nf-core/gecco/run/meta.yml +++ b/modules/nf-core/gecco/run/meta.yml @@ -12,8 +12,7 @@ tools: documentation: "https://gecco.embl.de" tool_dev_url: "https://github.com/zellerlab/GECCO" doi: "10.1101/2021.05.03.442509" - licence: "['GPL v3']" - + licence: ["GPL v3"] input: - meta: type: map @@ -31,7 +30,6 @@ input: - model_dir: type: directory description: Path to an alternative CRF (Conditional Random Fields) module to use - output: - meta: type: map @@ -62,6 +60,7 @@ output: type: file description: AntiSMASH v6 sideload JSON file (if --antismash-sideload) supplied. Will not be generated if no hits are found. pattern: "*.gbk" - authors: - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/gecco/run/tests/main.nf.test b/modules/nf-core/gecco/run/tests/main.nf.test new file mode 100644 index 00000000..002a8e84 --- /dev/null +++ b/modules/nf-core/gecco/run/tests/main.nf.test @@ -0,0 +1,68 @@ +nextflow_process { + + name "Test Process GECCO_RUN" + script "../main.nf" + process "GECCO_RUN" + + tag "modules" + tag "modules_nfcore" + tag "gecco" + tag "gecco/run" + + test("gecco/run - candidatus_portiera_aleyrodidarum - genome_fasta") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/genome.fasta', checkIfExists: true), + [] + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.genes + + process.out.features + + process.out.clusters + + process.out.versions + ).match() }, + { assert path(process.out.gbk.get(0).get(1)).text.contains("MVKNDIDILILGGGCTGLSLAYYLSFLPNTVRIFLIENKFIYNND") } + ) + } + + } + + test("gecco/run - candidatus_portiera_aleyrodidarum - genome_fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/genome.fasta', checkIfExists: true), + [] + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gecco/run/tests/main.nf.test.snap b/modules/nf-core/gecco/run/tests/main.nf.test.snap new file mode 100644 index 00000000..aec1cd52 --- /dev/null +++ b/modules/nf-core/gecco/run/tests/main.nf.test.snap @@ -0,0 +1,130 @@ +{ + "gecco/run - candidatus_portiera_aleyrodidarum - genome_fasta": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.genes.tsv:md5,2338b99d9b77200e9a071941ad8bb3ac" + ], + [ + { + "id": "test", + "single_end": false + }, + "test.features.tsv:md5,dcd929c60337b4835729d886897cdd36" + ], + [ + { + "id": "test", + "single_end": false + }, + "test.clusters.tsv:md5,84a10da0afff6e2085d8296db69eb8b0" + ], + "versions.yml:md5,3ade04da8c7c1f46cdf07ecb0334a777" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-05-29T13:44:10.071851827" + }, + "gecco/run - candidatus_portiera_aleyrodidarum - genome_fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.genes.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.features.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.clusters.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "NC_018507.1_cluster_1.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + "versions.yml:md5,3ade04da8c7c1f46cdf07ecb0334a777" + ], + "clusters": [ + [ + { + "id": "test", + "single_end": false + }, + "test.clusters.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "features": [ + [ + { + "id": "test", + "single_end": false + }, + "test.features.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gbk": [ + [ + { + "id": "test", + "single_end": false + }, + "NC_018507.1_cluster_1.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "genes": [ + [ + { + "id": "test", + "single_end": false + }, + "test.genes.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + + ], + "versions": [ + "versions.yml:md5,3ade04da8c7c1f46cdf07ecb0334a777" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-22T22:07:47.729614386" + } +} \ No newline at end of file diff --git a/modules/nf-core/gecco/run/tests/tags.yml b/modules/nf-core/gecco/run/tests/tags.yml new file mode 100644 index 00000000..1542bcca --- /dev/null +++ b/modules/nf-core/gecco/run/tests/tags.yml @@ -0,0 +1,2 @@ +gecco/run: + - "modules/nf-core/gecco/run/**" diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml new file mode 100644 index 00000000..dfc02a7b --- /dev/null +++ b/modules/nf-core/gunzip/environment.yml @@ -0,0 +1,9 @@ +name: gunzip +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::grep=3.11 + - conda-forge::sed=4.8 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf index 73bf08cd..5e67e3b9 100644 --- a/modules/nf-core/gunzip/main.nf +++ b/modules/nf-core/gunzip/main.nf @@ -2,10 +2,10 @@ process GUNZIP { tag "$archive" label 'process_single' - conda "conda-forge::sed=4.7" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'nf-core/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:22.04' : + 'nf-core/ubuntu:22.04' }" input: tuple val(meta), path(archive) @@ -18,8 +18,11 @@ process GUNZIP { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - gunzip = archive.toString() - '.gz' + def args = task.ext.args ?: '' + def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".$extension" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".$extension" """ # Not calling gunzip itself because it creates files # with the original group ownership rather than the @@ -37,7 +40,11 @@ process GUNZIP { """ stub: - gunzip = archive.toString() - '.gz' + def args = task.ext.args ?: '' + def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".$extension" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".$extension" """ touch $gunzip cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml index 4cdcdf4c..f32973a0 100644 --- a/modules/nf-core/gunzip/meta.yml +++ b/modules/nf-core/gunzip/meta.yml @@ -33,3 +33,8 @@ authors: - "@joseespinosa" - "@drpatelh" - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" + - "@gallvp" diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test new file mode 100644 index 00000000..776211ad --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test @@ -0,0 +1,121 @@ +nextflow_process { + + name "Test Process GUNZIP" + script "../main.nf" + process "GUNZIP" + tag "gunzip" + tag "modules_nfcore" + tag "modules" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - prefix") { + + config './nextflow.config' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - prefix - stub") { + + options '-stub' + config './nextflow.config' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap new file mode 100644 index 00000000..069967e7 --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test.snap @@ -0,0 +1,134 @@ +{ + "Should run without failures - prefix - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:35:10.861293" + }, + "Should run without failures - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:35:05.857145" + }, + "Should run without failures": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2023-10-17T15:35:37.690477896" + }, + "Should run without failures - prefix": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:33:32.921739" + } +} \ No newline at end of file diff --git a/modules/nf-core/gunzip/tests/nextflow.config b/modules/nf-core/gunzip/tests/nextflow.config new file mode 100644 index 00000000..dec77642 --- /dev/null +++ b/modules/nf-core/gunzip/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GUNZIP { + ext.prefix = { "${meta.id}.xyz" } + } +} diff --git a/modules/nf-core/gunzip/tests/tags.yml b/modules/nf-core/gunzip/tests/tags.yml new file mode 100644 index 00000000..fd3f6915 --- /dev/null +++ b/modules/nf-core/gunzip/tests/tags.yml @@ -0,0 +1,2 @@ +gunzip: + - modules/nf-core/gunzip/** diff --git a/modules/nf-core/hamronization/abricate/environment.yml b/modules/nf-core/hamronization/abricate/environment.yml new file mode 100644 index 00000000..75f349f1 --- /dev/null +++ b/modules/nf-core/hamronization/abricate/environment.yml @@ -0,0 +1,7 @@ +name: hamronization_abricate +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hamronization=1.1.4 diff --git a/modules/nf-core/hamronization/abricate/main.nf b/modules/nf-core/hamronization/abricate/main.nf index b73e04be..54cd9904 100644 --- a/modules/nf-core/hamronization/abricate/main.nf +++ b/modules/nf-core/hamronization/abricate/main.nf @@ -2,10 +2,10 @@ process HAMRONIZATION_ABRICATE { tag "$meta.id" label 'process_single' - conda "bioconda::hamronization=1.1.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hamronization:1.1.1--pyhdfd78af_0': - 'biocontainers/hamronization:1.1.1--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/hamronization:1.1.4--pyhdfd78af_0': + 'biocontainers/hamronization:1.1.4--pyhdfd78af_0' }" input: tuple val(meta), path(report) @@ -34,6 +34,16 @@ process HAMRONIZATION_ABRICATE { --reference_database_version ${reference_db_version} \\ > ${prefix}.${format} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hamronization: \$(echo \$(hamronize --version 2>&1) | cut -f 2 -d ' ' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.${format} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/hamronization/abricate/meta.yml b/modules/nf-core/hamronization/abricate/meta.yml index c9489525..4a0867d6 100644 --- a/modules/nf-core/hamronization/abricate/meta.yml +++ b/modules/nf-core/hamronization/abricate/meta.yml @@ -11,9 +11,7 @@ tools: homepage: "https://github.com/pha4ge/hAMRonization/" documentation: "https://github.com/pha4ge/hAMRonization/" tool_dev_url: "https://github.com/pha4ge/hAMRonization" - - licence: "['GNU Lesser General Public v3 (LGPL v3)']" - + licence: ["GNU Lesser General Public v3 (LGPL v3)"] input: - meta: type: map @@ -25,18 +23,17 @@ input: description: Output TSV or CSV file from ABRicate pattern: "*.{csv,tsv}" - format: - type: value + type: string description: Type of report file to be produced pattern: "tsv|json" - software_version: - type: value + type: string description: Version of ABRicate used pattern: "[0-9].[0-9].[0-9]" - reference_db_version: - type: value + type: string description: Database version of ABRicate used pattern: "[0-9][0-9][0-9][0-9]-[A-Z][a-z][a-z]-[0-9][0-9]" - output: - meta: type: map @@ -55,6 +52,7 @@ output: type: file description: hAMRonised report in TSV format pattern: "*.json" - authors: - "@jasmezz" +maintainers: + - "@jasmezz" diff --git a/modules/nf-core/hamronization/abricate/tests/main.nf.test b/modules/nf-core/hamronization/abricate/tests/main.nf.test new file mode 100644 index 00000000..d6c21350 --- /dev/null +++ b/modules/nf-core/hamronization/abricate/tests/main.nf.test @@ -0,0 +1,55 @@ +nextflow_process { + + name "Test Process HAMRONIZATION_ABRICATE" + script "../main.nf" + process "HAMRONIZATION_ABRICATE" + + tag "modules" + tag "modules_nfcore" + tag "hamronization" + tag "hamronization/abricate" + + test("hamronization/abricate - bacteroides_fragilis - genome_abricate_tsv") { + + when { + process { + """ + input[0] = [ [ id:"test" ], file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/hamronization/genome.abricate.tsv', checkIfExists: true) ] + input[1] = 'tsv' + input[2] = '1.0.1' + input[3] = '2021-Mar-27' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("hamronization/abricate - bacteroides_fragilis - genome_abricate_tsv - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:"test" ], file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/hamronization/genome.abricate.tsv', checkIfExists: true) ] + input[1] = 'tsv' + input[2] = '1.0.1' + input[3] = '2021-Mar-27' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/hamronization/abricate/tests/main.nf.test.snap b/modules/nf-core/hamronization/abricate/tests/main.nf.test.snap new file mode 100644 index 00000000..47432f37 --- /dev/null +++ b/modules/nf-core/hamronization/abricate/tests/main.nf.test.snap @@ -0,0 +1,80 @@ +{ + "hamronization/abricate - bacteroides_fragilis - genome_abricate_tsv": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "test.tsv:md5,4b1024ba25c116a5312944f65dd40e9b" + ] + ], + "2": [ + "versions.yml:md5,c826615ee7b88b615cae5bded792b790" + ], + "json": [ + + ], + "tsv": [ + [ + { + "id": "test" + }, + "test.tsv:md5,4b1024ba25c116a5312944f65dd40e9b" + ] + ], + "versions": [ + "versions.yml:md5,c826615ee7b88b615cae5bded792b790" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-07T22:16:18.968989666" + }, + "hamronization/abricate - bacteroides_fragilis - genome_abricate_tsv - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,c826615ee7b88b615cae5bded792b790" + ], + "json": [ + + ], + "tsv": [ + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,c826615ee7b88b615cae5bded792b790" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-07T22:25:57.524839789" + } +} \ No newline at end of file diff --git a/modules/nf-core/hamronization/abricate/tests/tags.yml b/modules/nf-core/hamronization/abricate/tests/tags.yml new file mode 100644 index 00000000..ed35a969 --- /dev/null +++ b/modules/nf-core/hamronization/abricate/tests/tags.yml @@ -0,0 +1,2 @@ +hamronization/abricate: + - "modules/nf-core/hamronization/abricate/**" diff --git a/modules/nf-core/hamronization/amrfinderplus/environment.yml b/modules/nf-core/hamronization/amrfinderplus/environment.yml new file mode 100644 index 00000000..2f9cb27f --- /dev/null +++ b/modules/nf-core/hamronization/amrfinderplus/environment.yml @@ -0,0 +1,7 @@ +name: hamronization_amrfinderplus +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hamronization=1.1.4 diff --git a/modules/nf-core/hamronization/amrfinderplus/main.nf b/modules/nf-core/hamronization/amrfinderplus/main.nf index 6f4cda34..22a56d66 100644 --- a/modules/nf-core/hamronization/amrfinderplus/main.nf +++ b/modules/nf-core/hamronization/amrfinderplus/main.nf @@ -2,10 +2,10 @@ process HAMRONIZATION_AMRFINDERPLUS { tag "$meta.id" label 'process_single' - conda "bioconda::hamronization=1.1.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hamronization:1.1.1--pyhdfd78af_0': - 'biocontainers/hamronization:1.1.1--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/hamronization:1.1.4--pyhdfd78af_0': + 'biocontainers/hamronization:1.1.4--pyhdfd78af_0' }" input: tuple val(meta), path(report) @@ -40,4 +40,15 @@ process HAMRONIZATION_AMRFINDERPLUS { hamronization: \$(echo \$(hamronize --version 2>&1) | cut -f 2 -d ' ' ) END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.${format} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hamronization: \$(echo \$(hamronize --version 2>&1) | cut -f 2 -d ' ' ) + END_VERSIONS + """ } diff --git a/modules/nf-core/hamronization/amrfinderplus/meta.yml b/modules/nf-core/hamronization/amrfinderplus/meta.yml index 4ac82637..c0997150 100644 --- a/modules/nf-core/hamronization/amrfinderplus/meta.yml +++ b/modules/nf-core/hamronization/amrfinderplus/meta.yml @@ -13,8 +13,7 @@ tools: homepage: "https://github.com/pha4ge/hAMRonization/" documentation: "https://github.com/pha4ge/hAMRonization/" tool_dev_url: "https://github.com/pha4ge/hAMRonization" - licence: "['GNU Lesser General Public v3 (LGPL v3)']" - + licence: ["GNU Lesser General Public v3 (LGPL v3)"] input: - meta: type: map @@ -26,18 +25,17 @@ input: description: Output .tsv file from AMRfinderPlus pattern: "*.tsv" - format: - type: value + type: string description: Type of report file to be produced pattern: "tsv|json" - software_version: - type: value + type: string description: Version of AMRfinder used pattern: "[0-9].[0-9].[0-9]" - reference_db_version: - type: value + type: string description: Database version of ncbi_AMRfinder used pattern: "[0-9]-[0-9]-[0-9].[0-9]" - output: - meta: type: map @@ -56,6 +54,7 @@ output: type: file description: hAMRonised report in TSV format pattern: "*.tsv" - authors: - "@louperelo" +maintainers: + - "@louperelo" diff --git a/modules/nf-core/hamronization/amrfinderplus/tests/main.nf.test b/modules/nf-core/hamronization/amrfinderplus/tests/main.nf.test new file mode 100644 index 00000000..d74f2f56 --- /dev/null +++ b/modules/nf-core/hamronization/amrfinderplus/tests/main.nf.test @@ -0,0 +1,64 @@ +nextflow_process { + + name "Test Process HAMRONIZATION_AMRFINDERPLUS" + script "../main.nf" + process "HAMRONIZATION_AMRFINDERPLUS" + + tag "modules" + tag "modules_nfcore" + tag "hamronization" + tag "hamronization/amrfinderplus" + + test("hamronization/amrfinderplus - delete_me/amrfinderplus - tsv") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/amrfinderplus/test_output.tsv", checkIfExists: true) + ] + input[1] = 'tsv' + input[2] = '3.10.30' + input[3] = '2022-05-26.1' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("hamronization/amrfinderplus - delete_me/amrfinderplus - tsv - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/amrfinderplus/test_output.tsv", checkIfExists: true) + ] + input[1] = 'tsv' + input[2] = '3.10.30' + input[3] = '2022-05-26.1' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/hamronization/amrfinderplus/tests/main.nf.test.snap b/modules/nf-core/hamronization/amrfinderplus/tests/main.nf.test.snap new file mode 100644 index 00000000..486d8cdc --- /dev/null +++ b/modules/nf-core/hamronization/amrfinderplus/tests/main.nf.test.snap @@ -0,0 +1,84 @@ +{ + "hamronization/amrfinderplus - delete_me/amrfinderplus - tsv - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,8a2099aeaf76a6cacfca285faa389257" + ], + "json": [ + + ], + "tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,8a2099aeaf76a6cacfca285faa389257" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-07T10:47:35.965140591" + }, + "hamronization/amrfinderplus - delete_me/amrfinderplus - tsv": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,cde9a32a5b0a8902c4a76ebd2a820d4d" + ] + ], + "2": [ + "versions.yml:md5,8a2099aeaf76a6cacfca285faa389257" + ], + "json": [ + + ], + "tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,cde9a32a5b0a8902c4a76ebd2a820d4d" + ] + ], + "versions": [ + "versions.yml:md5,8a2099aeaf76a6cacfca285faa389257" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-07T10:47:30.194755603" + } +} \ No newline at end of file diff --git a/modules/nf-core/hamronization/amrfinderplus/tests/tags.yml b/modules/nf-core/hamronization/amrfinderplus/tests/tags.yml new file mode 100644 index 00000000..d3791c05 --- /dev/null +++ b/modules/nf-core/hamronization/amrfinderplus/tests/tags.yml @@ -0,0 +1,2 @@ +hamronization/amrfinderplus: + - "modules/nf-core/hamronization/amrfinderplus/**" diff --git a/modules/nf-core/hamronization/deeparg/environment.yml b/modules/nf-core/hamronization/deeparg/environment.yml new file mode 100644 index 00000000..c9db54c6 --- /dev/null +++ b/modules/nf-core/hamronization/deeparg/environment.yml @@ -0,0 +1,7 @@ +name: hamronization_deeparg +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hamronization=1.1.4 diff --git a/modules/nf-core/hamronization/deeparg/main.nf b/modules/nf-core/hamronization/deeparg/main.nf index 193b1ae1..be3ef006 100644 --- a/modules/nf-core/hamronization/deeparg/main.nf +++ b/modules/nf-core/hamronization/deeparg/main.nf @@ -2,10 +2,10 @@ process HAMRONIZATION_DEEPARG { tag "$meta.id" label 'process_single' - conda "bioconda::hamronization=1.1.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hamronization:1.1.1--pyhdfd78af_0': - 'biocontainers/hamronization:1.1.1--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/hamronization:1.1.4--pyhdfd78af_0': + 'biocontainers/hamronization:1.1.4--pyhdfd78af_0' }" input: tuple val(meta), path(report) @@ -36,6 +36,17 @@ process HAMRONIZATION_DEEPARG { > ${prefix}.${format} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hamronization: \$(echo \$(hamronize --version 2>&1) | cut -f 2 -d ' ' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.${format} + cat <<-END_VERSIONS > versions.yml "${task.process}": hamronization: \$(echo \$(hamronize --version 2>&1) | cut -f 2 -d ' ' ) diff --git a/modules/nf-core/hamronization/deeparg/meta.yml b/modules/nf-core/hamronization/deeparg/meta.yml index 34771a9b..de01196e 100644 --- a/modules/nf-core/hamronization/deeparg/meta.yml +++ b/modules/nf-core/hamronization/deeparg/meta.yml @@ -11,9 +11,7 @@ tools: homepage: https://github.com/pha4ge/hAMRonization/ documentation: https://github.com/pha4ge/hAMRonization/ tool_dev_url: https://github.com/pha4ge/hAMRonization - licence: ["GNU Lesser General Public v3 (LGPL v3)"] - input: - meta: type: map @@ -25,18 +23,17 @@ input: description: Output .mapping.ARG file from DeepARG pattern: "*.mapping.ARG" - format: - type: value + type: string description: Type of report file to be produced pattern: "tsv|json" - software_version: - type: value + type: string description: Version of DeepARG used pattern: "[0-9].[0-9].[0-9]" - reference_db_version: - type: value + type: integer description: Database version of DeepARG used pattern: "[0-9]" - output: - meta: type: map @@ -55,6 +52,7 @@ output: type: file description: hAMRonised report in TSV format pattern: "*.json" - authors: - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/hamronization/deeparg/tests/main.nf.test b/modules/nf-core/hamronization/deeparg/tests/main.nf.test new file mode 100644 index 00000000..e13be328 --- /dev/null +++ b/modules/nf-core/hamronization/deeparg/tests/main.nf.test @@ -0,0 +1,64 @@ +nextflow_process { + + name "Test Process HAMRONIZATION_DEEPARG" + script "../main.nf" + process "HAMRONIZATION_DEEPARG" + + tag "modules" + tag "modules_nfcore" + tag "hamronization" + tag "hamronization/deeparg" + + test("hamronization/deeparg - bacteroides_fragilis - genome_mapping_potential_arg") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/hamronization/genome.mapping.potential.ARG', checkIfExists: true), + ] + input[1] = 'tsv' + input[2] = '1.0.2' + input[3] = '2' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("hamronization/deeparg - bacteroides_fragilis - genome_mapping_potential_arg - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/hamronization/genome.mapping.potential.ARG', checkIfExists: true), + ] + input[1] = 'tsv' + input[2] = '1.0.2' + input[3] = '2' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/hamronization/deeparg/tests/main.nf.test.snap b/modules/nf-core/hamronization/deeparg/tests/main.nf.test.snap new file mode 100644 index 00000000..d680080b --- /dev/null +++ b/modules/nf-core/hamronization/deeparg/tests/main.nf.test.snap @@ -0,0 +1,84 @@ +{ + "hamronization/deeparg - bacteroides_fragilis - genome_mapping_potential_arg - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,2f75284447982578412e05eb09c5367f" + ], + "json": [ + + ], + "tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,2f75284447982578412e05eb09c5367f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-08T00:43:44.834346159" + }, + "hamronization/deeparg - bacteroides_fragilis - genome_mapping_potential_arg": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,e886d665bf9fc266be8193859863d2f4" + ] + ], + "2": [ + "versions.yml:md5,2f75284447982578412e05eb09c5367f" + ], + "json": [ + + ], + "tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,e886d665bf9fc266be8193859863d2f4" + ] + ], + "versions": [ + "versions.yml:md5,2f75284447982578412e05eb09c5367f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-07T10:32:36.722182719" + } +} \ No newline at end of file diff --git a/modules/nf-core/hamronization/deeparg/tests/tags.yml b/modules/nf-core/hamronization/deeparg/tests/tags.yml new file mode 100644 index 00000000..0dedf03d --- /dev/null +++ b/modules/nf-core/hamronization/deeparg/tests/tags.yml @@ -0,0 +1,2 @@ +hamronization/deeparg: + - "modules/nf-core/hamronization/deeparg/**" diff --git a/modules/nf-core/hamronization/fargene/environment.yml b/modules/nf-core/hamronization/fargene/environment.yml new file mode 100644 index 00000000..6507e7d4 --- /dev/null +++ b/modules/nf-core/hamronization/fargene/environment.yml @@ -0,0 +1,7 @@ +name: hamronization_fargene +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hamronization=1.1.4 diff --git a/modules/nf-core/hamronization/fargene/main.nf b/modules/nf-core/hamronization/fargene/main.nf index 79ebcc99..ca1edc73 100644 --- a/modules/nf-core/hamronization/fargene/main.nf +++ b/modules/nf-core/hamronization/fargene/main.nf @@ -2,10 +2,10 @@ process HAMRONIZATION_FARGENE { tag "$meta.id" label 'process_single' - conda "bioconda::hamronization=1.1.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hamronization:1.1.1--pyhdfd78af_0': - 'biocontainers/hamronization:1.1.1--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/hamronization:1.1.4--pyhdfd78af_0': + 'biocontainers/hamronization:1.1.4--pyhdfd78af_0' }" input: tuple val(meta), path(report) @@ -40,4 +40,15 @@ process HAMRONIZATION_FARGENE { hamronization: \$(echo \$(hamronize --version 2>&1) | cut -f 2 -d ' ' ) END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "stub" > ${prefix}.${format} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hamronization: \$(echo \$(hamronize --version 2>&1) | cut -f 2 -d ' ' ) + END_VERSIONS + """ } diff --git a/modules/nf-core/hamronization/fargene/meta.yml b/modules/nf-core/hamronization/fargene/meta.yml index 08b2efb0..45a3811d 100644 --- a/modules/nf-core/hamronization/fargene/meta.yml +++ b/modules/nf-core/hamronization/fargene/meta.yml @@ -13,9 +13,7 @@ tools: homepage: "https://github.com/pha4ge/hAMRonization/" documentation: "https://github.com/pha4ge/hAMRonization/" tool_dev_url: "https://github.com/pha4ge/hAMRonization" - - licence: "['GNU Lesser General Public v3 (LGPL v3)']" - + licence: ["GNU Lesser General Public v3 (LGPL v3)"] input: - meta: type: map @@ -27,18 +25,17 @@ input: description: Output .txt file from fARGene pattern: "*.txt" - format: - type: value + type: string description: Type of report file to be produced pattern: "tsv|json" - software_version: - type: value + type: string description: Version of fARGene used pattern: "[0-9].[0-9].[0-9]" - reference_db_version: - type: value + type: string description: Database version of fARGene used pattern: "[0-9].[0-9].[0-9]" - output: - meta: type: map @@ -57,6 +54,7 @@ output: type: file description: hAMRonised report in TSV format pattern: "*.json" - authors: - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/hamronization/fargene/tests/main.nf.test b/modules/nf-core/hamronization/fargene/tests/main.nf.test new file mode 100644 index 00000000..a5c5f2f7 --- /dev/null +++ b/modules/nf-core/hamronization/fargene/tests/main.nf.test @@ -0,0 +1,82 @@ +nextflow_process { + + name "Test Process HAMRONIZATION_FARGENE" + script "../main.nf" + process "HAMRONIZATION_FARGENE" + + tag "modules" + tag "modules_nfcore" + tag "hamronization" + tag "hamronization/fargene" + tag "gunzip" + tag "fargene" + + test("hamronization/fargene - bacteroides_fragilis - illumina - fa.gz/gunzip") { + setup { + + run("GUNZIP") { + script "../../../gunzip/main.nf" + process { + """ + input[0] = Channel.fromList([ + tuple([ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/illumina/fasta/test1.contigs.fa.gz', checkIfExists: true)) + ]) + """ + } + } + + run("FARGENE") { + script "../../../fargene/main.nf" + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = 'class_a' + """ + } + } + } + + when { + process { + """ + input[0] = FARGENE.out.hmm + input[1] = 'tsv' + input[2] = '0.1' + input[3] = '0.1' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.tsv.get(0).get(1)).exists() }, // No md5 check because of empty file + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("hamronization/fargene - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [id: 'test'], file("dummy.fa") ] + input[1] = 'tsv' + input[2] = '0.1' + input[3] = '0.1' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/hamronization/fargene/tests/main.nf.test.snap b/modules/nf-core/hamronization/fargene/tests/main.nf.test.snap new file mode 100644 index 00000000..b60c696b --- /dev/null +++ b/modules/nf-core/hamronization/fargene/tests/main.nf.test.snap @@ -0,0 +1,53 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,a961ebe5815800b3b27c935a4ecbf7f3" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-12T15:18:12.580157967" + }, + "hamronization/fargene - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "test.tsv:md5,f50b84b1db4b83ba62ec1deacc69c260" + ] + ], + "2": [ + "versions.yml:md5,a961ebe5815800b3b27c935a4ecbf7f3" + ], + "json": [ + + ], + "tsv": [ + [ + { + "id": "test" + }, + "test.tsv:md5,f50b84b1db4b83ba62ec1deacc69c260" + ] + ], + "versions": [ + "versions.yml:md5,a961ebe5815800b3b27c935a4ecbf7f3" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-08T01:00:18.784613823" + } +} \ No newline at end of file diff --git a/modules/nf-core/hamronization/fargene/tests/tags.yml b/modules/nf-core/hamronization/fargene/tests/tags.yml new file mode 100644 index 00000000..49357928 --- /dev/null +++ b/modules/nf-core/hamronization/fargene/tests/tags.yml @@ -0,0 +1,2 @@ +hamronization/fargene: + - "modules/nf-core/hamronization/fargene/**" diff --git a/modules/nf-core/hamronization/rgi/environment.yml b/modules/nf-core/hamronization/rgi/environment.yml new file mode 100644 index 00000000..91d03e49 --- /dev/null +++ b/modules/nf-core/hamronization/rgi/environment.yml @@ -0,0 +1,7 @@ +name: hamronization_rgi +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hamronization=1.1.4 diff --git a/modules/nf-core/hamronization/rgi/main.nf b/modules/nf-core/hamronization/rgi/main.nf index bb550ee6..9a99a0ff 100644 --- a/modules/nf-core/hamronization/rgi/main.nf +++ b/modules/nf-core/hamronization/rgi/main.nf @@ -2,10 +2,10 @@ process HAMRONIZATION_RGI { tag "$meta.id" label 'process_single' - conda "bioconda::hamronization=1.1.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hamronization:1.1.1--pyhdfd78af_0': - 'biocontainers/hamronization:1.1.1--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/hamronization:1.1.4--pyhdfd78af_0': + 'biocontainers/hamronization:1.1.4--pyhdfd78af_0' }" input: tuple val(meta), path(report) @@ -40,4 +40,15 @@ process HAMRONIZATION_RGI { hamronization: \$(echo \$(hamronize --version 2>&1) | cut -f 2 -d ' ' ) END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.${format} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hamronization: \$(echo \$(hamronize --version 2>&1) | cut -f 2 -d ' ' ) + END_VERSIONS + """ } diff --git a/modules/nf-core/hamronization/rgi/meta.yml b/modules/nf-core/hamronization/rgi/meta.yml index b0f88eb2..0cca8502 100644 --- a/modules/nf-core/hamronization/rgi/meta.yml +++ b/modules/nf-core/hamronization/rgi/meta.yml @@ -13,9 +13,7 @@ tools: homepage: "https://github.com/pha4ge/hAMRonization/" documentation: "https://github.com/pha4ge/hAMRonization/" tool_dev_url: "https://github.com/pha4ge/hAMRonization" - - licence: "['GNU Lesser General Public v3 (LGPL v3)']" - + licence: ["GNU Lesser General Public v3 (LGPL v3)"] input: - meta: type: map @@ -27,18 +25,17 @@ input: description: Output .txt file from RGI pattern: "*.txt" - format: - type: value + type: string description: Type of report file to be produced pattern: "tsv|json" - software_version: - type: value + type: string description: Version of DeepARG used pattern: "[0-9].[0-9].[0-9]" - reference_db_version: - type: value + type: string description: Database version of DeepARG used pattern: "[0-9].[0-9].[0-9]" - output: - meta: type: map @@ -57,6 +54,7 @@ output: type: file description: hAMRonised report in TSV format pattern: "*.json" - authors: - "@louperelo" +maintainers: + - "@louperelo" diff --git a/modules/nf-core/hamronization/rgi/tests/main.nf.test b/modules/nf-core/hamronization/rgi/tests/main.nf.test new file mode 100644 index 00000000..52945ebe --- /dev/null +++ b/modules/nf-core/hamronization/rgi/tests/main.nf.test @@ -0,0 +1,98 @@ +nextflow_process { + + name "Test Process HAMRONIZATION_RGI" + script "../main.nf" + process "HAMRONIZATION_RGI" + + tag "modules" + tag "modules_nfcore" + tag "hamronization" + tag "hamronization/rgi" + tag "rgi/main" + tag "rgi/cardannotation" + tag "untar" + + setup { + + run("UNTAR") { + script "modules/nf-core/untar/main.nf" + process { + """ + file('https://card.mcmaster.ca/latest/data', checkIfExists: true).copyTo('data.tar.gz') + + input[0] = [ + [ ], + file("data.tar.gz") + ] + """ + } + } + + run("RGI_CARDANNOTATION") { + script "modules/nf-core/rgi/cardannotation/main.nf" + process { + """ + input[0] = UNTAR.out.untar.map{ it[1] } + """ + } + } + + run("RGI_MAIN") { + script "modules/nf-core/rgi/main/main.nf" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['haemophilus_influenzae']['genome']['genome_fna_gz'], checkIfExists: true) + ] + input[1] = RGI_CARDANNOTATION.out.db + input[2] = [] + """ + } + } + } + + test("hamronization/rgi - haemophilus_influenzae - genome - fna.gz") { + + when { + process { + """ + input[0] = RGI_MAIN.out.tsv + input[1] = 'tsv' + input[2] = '1.0.2' + input[3] = '3.2.3' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("hamronization/rgi - haemophilus_influenzae - genome - fna.gz - stub") { + + options "-stub" + + when { + process { + """ + input[0] = RGI_MAIN.out.tsv + input[1] = 'tsv' + input[2] = '1.0.2' + input[3] = '3.2.3' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/hamronization/rgi/tests/main.nf.test.snap b/modules/nf-core/hamronization/rgi/tests/main.nf.test.snap new file mode 100644 index 00000000..07a41eae --- /dev/null +++ b/modules/nf-core/hamronization/rgi/tests/main.nf.test.snap @@ -0,0 +1,84 @@ +{ + "hamronization/rgi - haemophilus_influenzae - genome - fna.gz - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,aa58854ac6d5892e025ca2bd8db4e677" + ], + "json": [ + + ], + "tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,aa58854ac6d5892e025ca2bd8db4e677" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-19T23:16:18.164635116" + }, + "hamronization/rgi - haemophilus_influenzae - genome - fna.gz": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,98b98bc42db5569db041d1819bbf1d89" + ] + ], + "2": [ + "versions.yml:md5,aa58854ac6d5892e025ca2bd8db4e677" + ], + "json": [ + + ], + "tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,98b98bc42db5569db041d1819bbf1d89" + ] + ], + "versions": [ + "versions.yml:md5,aa58854ac6d5892e025ca2bd8db4e677" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-19T23:15:49.081218466" + } +} \ No newline at end of file diff --git a/modules/nf-core/hamronization/rgi/tests/tags.yml b/modules/nf-core/hamronization/rgi/tests/tags.yml new file mode 100644 index 00000000..40c55588 --- /dev/null +++ b/modules/nf-core/hamronization/rgi/tests/tags.yml @@ -0,0 +1,2 @@ +hamronization/rgi: + - "modules/nf-core/hamronization/rgi/**" diff --git a/modules/nf-core/hamronization/summarize/environment.yml b/modules/nf-core/hamronization/summarize/environment.yml new file mode 100644 index 00000000..1872a689 --- /dev/null +++ b/modules/nf-core/hamronization/summarize/environment.yml @@ -0,0 +1,7 @@ +name: hamronization_summarize +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hamronization=1.1.4 diff --git a/modules/nf-core/hamronization/summarize/main.nf b/modules/nf-core/hamronization/summarize/main.nf index fc58b720..358ad83f 100644 --- a/modules/nf-core/hamronization/summarize/main.nf +++ b/modules/nf-core/hamronization/summarize/main.nf @@ -1,10 +1,10 @@ process HAMRONIZATION_SUMMARIZE { label 'process_single' - conda "bioconda::hamronization=1.1.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hamronization:1.1.1--pyhdfd78af_0': - 'biocontainers/hamronization:1.1.1--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/hamronization:1.1.4--pyhdfd78af_0': + 'biocontainers/hamronization:1.1.4--pyhdfd78af_0' }" input: path(reports) @@ -35,4 +35,15 @@ process HAMRONIZATION_SUMMARIZE { hamronization: \$(echo \$(hamronize --version 2>&1) | cut -f 2 -d ' ' ) END_VERSIONS """ + + stub: + def outformat = format == 'interactive' ? 'html' : format + """ + touch hamronization_combined_report.${outformat} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hamronization: \$(echo \$(hamronize --version 2>&1) | cut -f 2 -d ' ' ) + END_VERSIONS + """ } diff --git a/modules/nf-core/hamronization/summarize/meta.yml b/modules/nf-core/hamronization/summarize/meta.yml index 0e2a4c76..7d4c7b68 100644 --- a/modules/nf-core/hamronization/summarize/meta.yml +++ b/modules/nf-core/hamronization/summarize/meta.yml @@ -10,19 +10,16 @@ tools: homepage: https://github.com/pha4ge/hAMRonization/ documentation: https://github.com/pha4ge/hAMRonization/ tool_dev_url: https://github.com/pha4ge/hAMRonization - licence: ["GNU Lesser General Public v3 (LGPL v3)"] - input: - reports: type: file description: List of multiple hAMRonization reports in either JSON or TSV format pattern: "*.{json,tsv}" - format: - type: value + type: string description: Type of final combined report file to be produced pattern: "tsv|json|interactive" - output: - versions: type: file @@ -40,6 +37,7 @@ output: type: file description: hAMRonised summary in HTML format pattern: "*.html" - authors: - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/hamronization/summarize/tests/main.nf.test b/modules/nf-core/hamronization/summarize/tests/main.nf.test new file mode 100644 index 00000000..dc2da33e --- /dev/null +++ b/modules/nf-core/hamronization/summarize/tests/main.nf.test @@ -0,0 +1,106 @@ +nextflow_process { + + name "Test Process HAMRONIZATION_SUMMARIZE" + script "../main.nf" + process "HAMRONIZATION_SUMMARIZE" + + tag "modules" + tag "modules_nfcore" + tag "hamronization" + tag "hamronization/summarize" + tag "hamronization/deeparg" + + setup { + run("HAMRONIZATION_DEEPARG", alias: "HAMRONIZATION_DEEPARG1") { + script "../../deeparg/main.nf" + process { + """ + input[0] = [ + [ id:'test1', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/hamronization/genome.mapping.potential.ARG', checkIfExists: true), + ] + input[1] = 'tsv' + input[2] = '1.0.2' + input[3] = '2' + """ + } + } + + run("HAMRONIZATION_DEEPARG", alias: "HAMRONIZATION_DEEPARG2") { + script "../../deeparg/main.nf" + process { + """ + input[0] = [ + [ id:'test2', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/hamronization/genome.mapping.potential.ARG', checkIfExists: true), + ] + input[1] = 'tsv' + input[2] = '1.0.2' + input[3] = '2' + """ + } + } + } + + test("hamronization/summarize - bacteroides_fragilis - hamronization - arg") { + + when { + process { + """ + ch_deeparg_run_one = HAMRONIZATION_DEEPARG1.out.tsv + ch_deeparg_run_two = HAMRONIZATION_DEEPARG2.out.tsv + + ch_deeparg_run_one + .mix( ch_deeparg_run_two ) + .map{ + [ it[1] ] + } + .collect() + .set { ch_input_for_summarize } + + input[0] = ch_input_for_summarize + input[1] = 'json' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("hamronization/summarize - stub") { + + options "-stub" + + when { + process { + """ + ch_deeparg_run_one = HAMRONIZATION_DEEPARG1.out.tsv + ch_deeparg_run_two = HAMRONIZATION_DEEPARG2.out.tsv + + ch_deeparg_run_one + .mix( ch_deeparg_run_two ) + .map{ + [ it[1] ] + } + .collect() + .set { ch_input_for_summarize } + + input[0] = ch_input_for_summarize + input[1] = 'json' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/hamronization/summarize/tests/main.nf.test.snap b/modules/nf-core/hamronization/summarize/tests/main.nf.test.snap new file mode 100644 index 00000000..8449f2ed --- /dev/null +++ b/modules/nf-core/hamronization/summarize/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "hamronization/summarize - stub": { + "content": [ + { + "0": [ + "hamronization_combined_report.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,c8de17c417d53c848738d4bf7a419e2e" + ], + "html": [ + + ], + "json": [ + "hamronization_combined_report.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "tsv": [ + + ], + "versions": [ + "versions.yml:md5,c8de17c417d53c848738d4bf7a419e2e" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-08T00:28:03.124164682" + }, + "hamronization/summarize - bacteroides_fragilis - hamronization - arg": { + "content": [ + { + "0": [ + "hamronization_combined_report.json:md5,b27855689f41a9a95ddcfbf6c02d3528" + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,c8de17c417d53c848738d4bf7a419e2e" + ], + "html": [ + + ], + "json": [ + "hamronization_combined_report.json:md5,b27855689f41a9a95ddcfbf6c02d3528" + ], + "tsv": [ + + ], + "versions": [ + "versions.yml:md5,c8de17c417d53c848738d4bf7a419e2e" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-07T15:04:33.885586093" + } +} \ No newline at end of file diff --git a/modules/nf-core/hamronization/summarize/tests/tags.yml b/modules/nf-core/hamronization/summarize/tests/tags.yml new file mode 100644 index 00000000..f98ff599 --- /dev/null +++ b/modules/nf-core/hamronization/summarize/tests/tags.yml @@ -0,0 +1,2 @@ +hamronization/summarize: + - "modules/nf-core/hamronization/summarize/**" diff --git a/modules/nf-core/hmmer/hmmsearch/environment.yml b/modules/nf-core/hmmer/hmmsearch/environment.yml new file mode 100644 index 00000000..d672c2b3 --- /dev/null +++ b/modules/nf-core/hmmer/hmmsearch/environment.yml @@ -0,0 +1,7 @@ +name: hmmer_hmmsearch +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmmer=3.4 diff --git a/modules/nf-core/hmmer/hmmsearch/main.nf b/modules/nf-core/hmmer/hmmsearch/main.nf index d40292d6..603a865e 100644 --- a/modules/nf-core/hmmer/hmmsearch/main.nf +++ b/modules/nf-core/hmmer/hmmsearch/main.nf @@ -2,31 +2,31 @@ process HMMER_HMMSEARCH { tag "$meta.id" label 'process_medium' - conda "bioconda::hmmer=3.3.2" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hmmer:3.3.2--h1b792b2_1' : - 'biocontainers/hmmer:3.3.2--h1b792b2_1' }" + 'https://depot.galaxyproject.org/singularity/hmmer:3.4--hdbdd923_1' : + 'biocontainers/hmmer:3.4--hdbdd923_1' }" input: tuple val(meta), path(hmmfile), path(seqdb), val(write_align), val(write_target), val(write_domain) output: - tuple val(meta), path('*.txt.gz') , emit: output + tuple val(meta), path('*.txt.gz') , emit: output tuple val(meta), path('*.sto.gz') , emit: alignments , optional: true tuple val(meta), path('*.tbl.gz') , emit: target_summary, optional: true tuple val(meta), path('*.domtbl.gz'), emit: domain_summary, optional: true - path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - output = "${prefix}.txt" - alignment = write_align ? "-A ${prefix}.sto" : '' - target_summary = write_target ? "--tblout ${prefix}.tbl" : '' - domain_summary = write_domain ? "--domtblout ${prefix}.domtbl" : '' + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + output = "${prefix}.txt" + alignment = write_align ? "-A ${prefix}.sto" : '' + target_summary = write_target ? "--tblout ${prefix}.tbl" : '' + domain_summary = write_domain ? "--domtblout ${prefix}.domtbl" : '' """ hmmsearch \\ $args \\ @@ -48,4 +48,23 @@ process HMMER_HMMSEARCH { hmmer: \$(hmmsearch -h | grep -o '^# HMMER [0-9.]*' | sed 's/^# HMMER *//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch "${prefix}.txt" + ${write_align ? "touch ${prefix}.sto" : ''} \\ + ${write_target ? "touch ${prefix}.tbl" : ''} \\ + ${write_domain ? "touch ${prefix}.domtbl" : ''} + + gzip --no-name *.txt \\ + ${write_align ? '*.sto' : ''} \\ + ${write_target ? '*.tbl' : ''} \\ + ${write_domain ? '*.domtbl' : ''} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hmmer: \$(hmmsearch -h | grep -o '^# HMMER [0-9.]*' | sed 's/^# HMMER *//') + END_VERSIONS + """ } diff --git a/modules/nf-core/hmmer/hmmsearch/meta.yml b/modules/nf-core/hmmer/hmmsearch/meta.yml index 3f4459ba..39893c3b 100644 --- a/modules/nf-core/hmmer/hmmsearch/meta.yml +++ b/modules/nf-core/hmmer/hmmsearch/meta.yml @@ -1,7 +1,7 @@ name: hmmer_hmmsearch description: search profile(s) against a sequence database keywords: - - hidden Markov model + - Hidden Markov Model - HMM - hmmer - hmmsearch @@ -13,7 +13,6 @@ tools: tool_dev_url: https://github.com/EddyRivasLab/hmmer doi: "10.1371/journal.pcbi.1002195" licence: ["BSD"] - input: - meta: type: map @@ -29,15 +28,14 @@ input: description: Database of sequences in FASTA format pattern: "*.{fasta,fna,faa,fa,fasta.gz,fna.gz,faa.gz,fa.gz}" - write_align: - type: val - description: Flag to write optional alignment output. Specify with 'true' to output + type: boolean + description: Flag to save optional alignment output. Specify with 'true' to save. - write_target: - type: val - description: Flag to write optional per target summary . Specify with 'true' to output + type: boolean + description: Flag to save optional per target summary. Specify with 'true' to save. - write_domain: - type: val - description: Flag to write optional per domain summary. Specify with 'true' to output - + type: boolean + description: Flag to save optional per domain summary. Specify with 'true' to save. output: - meta: type: map @@ -64,6 +62,7 @@ output: type: file description: Optional tabular (space-delimited) summary of per-domain output pattern: "*.{domtbl.gz}" - authors: - "@Midnighter" +maintainers: + - "@Midnighter" diff --git a/modules/nf-core/hmmer/hmmsearch/tests/main.nf.test b/modules/nf-core/hmmer/hmmsearch/tests/main.nf.test new file mode 100644 index 00000000..f1b59e98 --- /dev/null +++ b/modules/nf-core/hmmer/hmmsearch/tests/main.nf.test @@ -0,0 +1,126 @@ +nextflow_process { + + name "Test Process HMMER_HMMSEARCH" + script "../main.nf" + process "HMMER_HMMSEARCH" + + tag "modules" + tag "modules_nfcore" + tag "hmmer" + tag "hmmer/hmmsearch" + + test("hmmer/hmmsearch") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/bac.16S_rRNA.hmm.gz', checkIfExists: true), + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/e_coli_k12_16s.fna.gz', checkIfExists: true), + false, + false, + false + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.output[0][1]).linesGzip.toString().contains('[ok]') }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + + test("hmmer/hmmsearch - optional") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/bac.16S_rRNA.hmm.gz', checkIfExists: true), + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/e_coli_k12_16s.fna.gz', checkIfExists: true), + true, + true, + true + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.output.get(0).get(1)).linesGzip.toString().contains('[ok]') }, + { assert path(process.out.target_summary.get(0).get(1)).linesGzip.toString().contains('[ok]') }, + { assert snapshot( + process.out.alignments + + process.out.versions + ).match() } + ) + } + + } + + test("hmmer/hmmsearch - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/bac.16S_rRNA.hmm.gz', checkIfExists: true), + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/e_coli_k12_16s.fna.gz', checkIfExists: true), + false, + false, + false + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("hmmer/hmmsearch - optional - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/bac.16S_rRNA.hmm.gz', checkIfExists: true), + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/hmmer/e_coli_k12_16s.fna.gz', checkIfExists: true), + true, + true, + true + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/hmmer/hmmsearch/tests/main.nf.test.snap b/modules/nf-core/hmmer/hmmsearch/tests/main.nf.test.snap new file mode 100644 index 00000000..e6b22771 --- /dev/null +++ b/modules/nf-core/hmmer/hmmsearch/tests/main.nf.test.snap @@ -0,0 +1,175 @@ +{ + "hmmer/hmmsearch": { + "content": [ + [ + "versions.yml:md5,37393b1da5a14113d3290ab8b3b4c40f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-28T12:18:47.293093635" + }, + "hmmer/hmmsearch - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + "versions.yml:md5,37393b1da5a14113d3290ab8b3b4c40f" + ], + "alignments": [ + + ], + "domain_summary": [ + + ], + "output": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "target_summary": [ + + ], + "versions": [ + "versions.yml:md5,37393b1da5a14113d3290ab8b3b4c40f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-28T12:18:57.862047944" + }, + "hmmer/hmmsearch - optional - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sto.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tbl.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.domtbl.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,37393b1da5a14113d3290ab8b3b4c40f" + ], + "alignments": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sto.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "domain_summary": [ + [ + { + "id": "test", + "single_end": false + }, + "test.domtbl.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "output": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "target_summary": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tbl.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,37393b1da5a14113d3290ab8b3b4c40f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-28T12:19:03.49192788" + }, + "hmmer/hmmsearch - optional": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sto.gz:md5,5c44c289b9e36aa1f7f3afae2005fbb7" + ], + "versions.yml:md5,37393b1da5a14113d3290ab8b3b4c40f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-28T12:18:52.725638562" + } +} \ No newline at end of file diff --git a/modules/nf-core/hmmer/hmmsearch/tests/tags.yml b/modules/nf-core/hmmer/hmmsearch/tests/tags.yml new file mode 100644 index 00000000..1776d21f --- /dev/null +++ b/modules/nf-core/hmmer/hmmsearch/tests/tags.yml @@ -0,0 +1,2 @@ +hmmer/hmmsearch: + - "modules/nf-core/hmmer/hmmsearch/**" diff --git a/modules/nf-core/macrel/contigs/environment.yml b/modules/nf-core/macrel/contigs/environment.yml new file mode 100644 index 00000000..e6c11226 --- /dev/null +++ b/modules/nf-core/macrel/contigs/environment.yml @@ -0,0 +1,7 @@ +name: macrel_contigs +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::macrel=1.2.0 diff --git a/modules/nf-core/macrel/contigs/main.nf b/modules/nf-core/macrel/contigs/main.nf index df71bdea..6b62a868 100644 --- a/modules/nf-core/macrel/contigs/main.nf +++ b/modules/nf-core/macrel/contigs/main.nf @@ -2,7 +2,7 @@ process MACREL_CONTIGS { tag "$meta.id" label 'process_medium' - conda "bioconda::macrel=1.2.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/macrel:1.2.0--pyh5e36f6f_0': 'biocontainers/macrel:1.2.0--pyh5e36f6f_0' }" diff --git a/modules/nf-core/macrel/contigs/meta.yml b/modules/nf-core/macrel/contigs/meta.yml index 25473470..ba0b0e6f 100644 --- a/modules/nf-core/macrel/contigs/meta.yml +++ b/modules/nf-core/macrel/contigs/meta.yml @@ -14,7 +14,6 @@ tools: tool_dev_url: https://github.com/BigDataBiology/macrel doi: "10.7717/peerj.10555" licence: ["MIT"] - input: - meta: type: map @@ -25,7 +24,6 @@ input: type: file description: A fasta file with nucleotide sequences. pattern: "*.{fasta,fa,fna,fasta.gz,fa.gz,fna.gz}" - output: - meta: type: map @@ -56,6 +54,7 @@ output: type: file description: A log file containing the information pertaining to the run. pattern: "*_log.txt" - authors: - "@darcy220606" +maintainers: + - "@darcy220606" diff --git a/modules/nf-core/mmseqs/createdb/environment.yml b/modules/nf-core/mmseqs/createdb/environment.yml new file mode 100644 index 00000000..77b28f59 --- /dev/null +++ b/modules/nf-core/mmseqs/createdb/environment.yml @@ -0,0 +1,7 @@ +name: mmseqs_createdb +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::mmseqs2=15.6f452 diff --git a/modules/nf-core/mmseqs/createdb/main.nf b/modules/nf-core/mmseqs/createdb/main.nf new file mode 100644 index 00000000..9487e5bc --- /dev/null +++ b/modules/nf-core/mmseqs/createdb/main.nf @@ -0,0 +1,65 @@ +process MMSEQS_CREATEDB { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mmseqs2:15.6f452--pl5321h6a68c12_0': + 'biocontainers/mmseqs2:15.6f452--pl5321h6a68c12_0' }" + + input: + tuple val(meta), path(sequence) + + output: + tuple val(meta), path("${prefix}/"), emit: db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def is_compressed = sequence.getExtension() == "gz" ? true : false + def sequence_name = is_compressed ? sequence.getBaseName() : sequence + """ + if [ "${is_compressed}" == "true" ]; then + gzip -c -d ${sequence} > ${sequence_name} + fi + + mkdir -p ${prefix} + + mmseqs \\ + createdb \\ + ${sequence_name} \\ + ${prefix}/${prefix} \\ + $args \\ + --compressed 1 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mmseqs: \$(mmseqs | grep 'Version' | sed 's/MMseqs2 Version: //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir -p ${prefix} + + touch ${prefix}/${prefix} + touch ${prefix}/${prefix}.dbtype + touch ${prefix}/${prefix}.index + touch ${prefix}/${prefix}.lookup + touch ${prefix}/${prefix}.source + touch ${prefix}/${prefix}_h + touch ${prefix}/${prefix}_h.dbtype + touch ${prefix}/${prefix}_h.index + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mmseqs: \$(mmseqs | grep 'Version' | sed 's/MMseqs2 Version: //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/mmseqs/createdb/meta.yml b/modules/nf-core/mmseqs/createdb/meta.yml new file mode 100644 index 00000000..a011020b --- /dev/null +++ b/modules/nf-core/mmseqs/createdb/meta.yml @@ -0,0 +1,47 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: "mmseqs_createdb" +description: Create an MMseqs database from an existing FASTA/Q file +keywords: + - protein sequence + - databases + - clustering + - searching + - indexing + - mmseqs2 +tools: + - "mmseqs": + description: "MMseqs2: ultra fast and sensitive sequence search and clustering suite" + homepage: "https://github.com/soedinglab/MMseqs2" + documentation: "https://mmseqs.com/latest/userguide.pdf" + tool_dev_url: "https://github.com/soedinglab/MMseqs2" + doi: "10.1093/bioinformatics/btw006" + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - sequence: + type: file + description: Input sequences in FASTA/Q (zipped or unzipped) format to parse into an mmseqs database + pattern: "*.{fasta,fasta.gz,fa,fa.gz,fna,fna.gz,fastq,fastq.gz,fq,fq.gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - db: + type: directory + description: The created MMseqs2 database + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Joon-Klaps" +maintainers: + - "@Joon-Klaps" + - "@vagkaratzas" diff --git a/modules/nf-core/mmseqs/createdb/tests/main.nf.test b/modules/nf-core/mmseqs/createdb/tests/main.nf.test new file mode 100644 index 00000000..d4a4f0c8 --- /dev/null +++ b/modules/nf-core/mmseqs/createdb/tests/main.nf.test @@ -0,0 +1,64 @@ +nextflow_process { + + name "Test Process MMSEQS_CREATEDB" + script "../main.nf" + process "MMSEQS_CREATEDB" + tag "modules" + tag "modules_nfcore" + tag "mmseqs" + tag "mmseqs/createdb" + + test("Should build an mmseqs db from a contigs fasta file") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.db, + process.out.versions + ).match() + } + ) + } + + } + + test("Should build an mmseqs db from a zipped amino acid sequence file") { + + when { + process { + """ + + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.db, + process.out.versions + ).match() + } + ) + } + + } + +} diff --git a/modules/nf-core/mmseqs/createdb/tests/main.nf.test.snap b/modules/nf-core/mmseqs/createdb/tests/main.nf.test.snap new file mode 100644 index 00000000..a24c4118 --- /dev/null +++ b/modules/nf-core/mmseqs/createdb/tests/main.nf.test.snap @@ -0,0 +1,61 @@ +{ + "Should build an mmseqs db from a contigs fasta file": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test:md5,7c3c2c5926cf8fa82e66b9628f680256", + "test.dbtype:md5,c8ed20c23ba91f4577f84c940c86c7db", + "test.index:md5,5b2fd8abd0ad3fee24738af7082e6a6e", + "test.lookup:md5,32f88756dbcb6aaf7b239b0d61730f1b", + "test.source:md5,9ada5b3ea6e1a7e16c4418eb98ae8d9d", + "test_h:md5,8c29f5ed94d83d7115e9c8a883ce358d", + "test_h.dbtype:md5,8895d3d8e9322aedbf45249dfb3ddb0a", + "test_h.index:md5,87c7c8c6d16018ebfaa6f408391a5ae2" + ] + ] + ], + [ + "versions.yml:md5,e644cbe263d4560298438a24f268eb6f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-09T10:01:44.163384" + }, + "Should build an mmseqs db from a zipped amino acid sequence file": { + "content": [ + [ + [ + { + "id": "test" + }, + [ + "test:md5,4b494965ed7ab67da8ca3f39523eb104", + "test.dbtype:md5,152afd7bf4dbe26f85032eee0269201a", + "test.index:md5,46f9d884e9a7f442fe1cd2ce339734e3", + "test.lookup:md5,3e27cb93d9ee875ad42a6f32f5651bdc", + "test.source:md5,eaa64fc8a5f7ec1ee49b0dcbd1a72e9d", + "test_h:md5,6e798b81c70d191f78939c2dd6223a7f", + "test_h.dbtype:md5,8895d3d8e9322aedbf45249dfb3ddb0a", + "test_h.index:md5,d5ac49ff56df064b980fa0eb5da57673" + ] + ] + ], + [ + "versions.yml:md5,e644cbe263d4560298438a24f268eb6f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-09T10:01:48.894044" + } +} \ No newline at end of file diff --git a/modules/nf-core/mmseqs/createdb/tests/tags.yml b/modules/nf-core/mmseqs/createdb/tests/tags.yml new file mode 100644 index 00000000..1f511ab0 --- /dev/null +++ b/modules/nf-core/mmseqs/createdb/tests/tags.yml @@ -0,0 +1,2 @@ +mmseqs/createdb: + - modules/nf-core/mmseqs/createdb/** diff --git a/modules/nf-core/mmseqs/createtsv/environment.yml b/modules/nf-core/mmseqs/createtsv/environment.yml new file mode 100644 index 00000000..4840fc02 --- /dev/null +++ b/modules/nf-core/mmseqs/createtsv/environment.yml @@ -0,0 +1,7 @@ +name: mmseqs_createtsv +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::mmseqs2=15.6f452 diff --git a/modules/nf-core/mmseqs/createtsv/main.nf b/modules/nf-core/mmseqs/createtsv/main.nf new file mode 100644 index 00000000..dcd4c13d --- /dev/null +++ b/modules/nf-core/mmseqs/createtsv/main.nf @@ -0,0 +1,63 @@ + +process MMSEQS_CREATETSV { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mmseqs2:15.6f452--pl5321h6a68c12_0': + 'biocontainers/mmseqs2:15.6f452--pl5321h6a68c12_0' }" + + input: + tuple val(meta), path(db_result) + tuple val(meta2), path(db_query) + tuple val(meta3), path(db_target) + + output: + tuple val(meta), path("*.tsv"), emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args ?: "*.dbtype" // database generated by mmyseqs cluster | search | taxonomy | ... + def args3 = task.ext.args ?: "*.dbtype" // database generated by mmyseqs/createdb + def args4 = task.ext.args ?: "*.dbtype" // database generated by mmyseqs/createdb + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + # Extract files with specified args based suffix | remove suffix | isolate longest common substring of files + DB_RESULT_PATH_NAME=\$(find -L "$db_result/" -maxdepth 1 -name "$args2" | sed 's/\\.[^.]*\$//' | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' ) + DB_QUERY_PATH_NAME=\$(find -L "$db_query/" -maxdepth 1 -name "$args3" | sed 's/\\.[^.]*\$//' | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' ) + DB_TARGET_PATH_NAME=\$(find -L "$db_target/" -maxdepth 1 -name "$args4" | sed 's/\\.[^.]*\$//' | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' ) + + mmseqs \\ + createtsv \\ + \$DB_QUERY_PATH_NAME \\ + \$DB_TARGET_PATH_NAME \\ + \$DB_RESULT_PATH_NAME \\ + ${prefix}.tsv \\ + $args \\ + --threads ${task.cpus} \\ + --compressed 1 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mmseqs: \$(mmseqs | grep 'Version' | sed 's/MMseqs2 Version: //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mmseqs: \$(mmseqs | grep 'Version' | sed 's/MMseqs2 Version: //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/mmseqs/createtsv/meta.yml b/modules/nf-core/mmseqs/createtsv/meta.yml new file mode 100644 index 00000000..e85b066f --- /dev/null +++ b/modules/nf-core/mmseqs/createtsv/meta.yml @@ -0,0 +1,65 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: "mmseqs_createtsv" +description: Create a tsv file from a query and a target database as well as the result database +keywords: + - protein sequence + - databases + - clustering + - searching + - indexing + - mmseqs2 + - tsv +tools: + - "mmseqs": + description: "MMseqs2: ultra fast and sensitive sequence search and clustering suite" + homepage: "https://github.com/soedinglab/MMseqs2" + documentation: "https://mmseqs.com/latest/userguide.pdf" + tool_dev_url: "https://github.com/soedinglab/MMseqs2" + doi: "10.1093/bioinformatics/btw006" + licence: ["GPL v3"] +input: + # Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - db_result: + type: directory + description: an MMseqs2 database with result data + - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - db_query: + type: directory + description: an MMseqs2 database with query data + - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - db_target: + type: directory + description: an MMseqs2 database with target data +output: + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - tsv: + type: file + description: The resulting tsv file created using the query, target and result MMseqs databases + pattern: "*.{tsv}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Joon-Klaps" +maintainers: + - "@Joon-Klaps" diff --git a/modules/nf-core/mmseqs/createtsv/tests/cluster.nextflow.config b/modules/nf-core/mmseqs/createtsv/tests/cluster.nextflow.config new file mode 100644 index 00000000..48fee164 --- /dev/null +++ b/modules/nf-core/mmseqs/createtsv/tests/cluster.nextflow.config @@ -0,0 +1,6 @@ +process { + + withName: MMSEQS_CREATETSV { + ext.args2 = '*_clu.dbtype' + } +} diff --git a/modules/nf-core/mmseqs/createtsv/tests/main.nf.test b/modules/nf-core/mmseqs/createtsv/tests/main.nf.test new file mode 100644 index 00000000..1aa7463d --- /dev/null +++ b/modules/nf-core/mmseqs/createtsv/tests/main.nf.test @@ -0,0 +1,247 @@ +nextflow_process { + + name "Test Process MMSEQS_CREATETSV" + script "../main.nf" + process "MMSEQS_CREATETSV" + + tag "modules" + tag "modules_nfcore" + tag "mmseqs" + tag "mmseqs/taxonomy" + tag "mmseqs/createdb" + tag "mmseqs/databases" + tag "untar" + tag "mmseqs/createtsv" + + test("mmseqs/createtsv - bacteroides_fragilis - taxonomy") { + + config "./taxonomy.nextflow.config" + + setup { + run("MMSEQS_CREATEDB", alias: "MMSEQS_TAXA") { + script "../../createdb/main.nf" + process { + """ + input[0] = [ + [ id:'test_query', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) + ] + """ + } + } + run("MMSEQS_DATABASES") { + script "../../databases/main.nf" + process { + """ + input[0] = 'SILVA' + """ + } + } + run("MMSEQS_TAXONOMY") { + script "../../taxonomy/main.nf" + process { + """ + input[0] = MMSEQS_TAXA.out.db + input[1] = MMSEQS_DATABASES.out.database + """ + } + } + } + when { + process { + """ + input[0] = MMSEQS_TAXONOMY.out.db_taxonomy + input[1] = [[:],[]] + input[2] = MMSEQS_TAXA.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("mmseqs/createtsv - sarscov2 - cluster") { + + config "./cluster.nextflow.config" + + setup { + run("UNTAR", alias: "UNTAR_QUERY") { + script "../../../untar/main.nf" + process { + """ + input[0] = [ + [ id:'test_query', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/mmseqs.tar.gz', checkIfExists: true), + ] + """ + } + } + run("UNTAR", alias: "UNTAR_TARGET") { + script "../../../untar/main.nf" + process { + """ + input[0] = [ + [ id:'test_target', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/mmseqs.tar.gz', checkIfExists: true), + ] + """ + } + } + run("UNTAR", alias: "UNTAR_RESULT") { + script "../../../untar/main.nf" + process { + """ + input[0] = [ + [ id:'test_result', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/mmseqs.tar.gz', checkIfExists: true), + ] + """ + } + } + } + + when { + + process { + """ + ch_query = UNTAR_QUERY.out.untar + ch_target = UNTAR_TARGET.out.untar + ch_result = UNTAR_RESULT.out.untar + + input[0] = ch_result + input[1] = ch_query + input[2] = ch_target + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("mmseqs/createtsv - bacteroides_fragilis - taxonomy - stub") { + + options "-stub" + config "./taxonomy.nextflow.config" + + setup { + run("MMSEQS_CREATEDB", alias: "MMSEQS_TAXA") { + script "../../createdb/main.nf" + process { + """ + input[0] = [ + [ id:'test_query', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) + ] + """ + } + } + run("MMSEQS_DATABASES") { + script "../../databases/main.nf" + process { + """ + input[0] = 'SILVA' + """ + } + } + run("MMSEQS_TAXONOMY") { + script "../../taxonomy/main.nf" + process { + """ + input[0] = MMSEQS_TAXA.out.db + input[1] = MMSEQS_DATABASES.out.database + """ + } + } + } + when { + process { + """ + input[0] = MMSEQS_TAXONOMY.out.db_taxonomy + input[1] = [[:],[]] + input[2] = MMSEQS_TAXA.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("mmseqs/createtsv - sarscov2 - cluster - stub") { + + options "-stub" + config "./cluster.nextflow.config" + + setup { + run("UNTAR", alias: "UNTAR_QUERY") { + script "../../../untar/main.nf" + process { + """ + input[0] = [ + [ id:'test_query', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/mmseqs.tar.gz', checkIfExists: true), + ] + """ + } + } + run("UNTAR", alias: "UNTAR_TARGET") { + script "../../../untar/main.nf" + process { + """ + input[0] = [ + [ id:'test_target', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/mmseqs.tar.gz', checkIfExists: true), + ] + """ + } + } + run("UNTAR", alias: "UNTAR_RESULT") { + script "../../../untar/main.nf" + process { + """ + input[0] = [ + [ id:'test_result', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/mmseqs.tar.gz', checkIfExists: true), + ] + """ + } + } + } + + when { + + process { + """ + ch_query = UNTAR_QUERY.out.untar + ch_target = UNTAR_TARGET.out.untar + ch_result = UNTAR_RESULT.out.untar + + input[0] = ch_result + input[1] = ch_query + input[2] = ch_target + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/mmseqs/createtsv/tests/main.nf.test.snap b/modules/nf-core/mmseqs/createtsv/tests/main.nf.test.snap new file mode 100644 index 00000000..1087de88 --- /dev/null +++ b/modules/nf-core/mmseqs/createtsv/tests/main.nf.test.snap @@ -0,0 +1,142 @@ +{ + "mmseqs/createtsv - bacteroides_fragilis - taxonomy - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_query", + "single_end": false + }, + "test_query.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,20a853f50c920d431e5ab7593ca79e6f" + ], + "tsv": [ + [ + { + "id": "test_query", + "single_end": false + }, + "test_query.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,20a853f50c920d431e5ab7593ca79e6f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-12T13:55:17.642787" + }, + "mmseqs/createtsv - sarscov2 - cluster - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_result", + "single_end": true + }, + "test_result.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,20a853f50c920d431e5ab7593ca79e6f" + ], + "tsv": [ + [ + { + "id": "test_result", + "single_end": true + }, + "test_result.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,20a853f50c920d431e5ab7593ca79e6f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-12T13:55:33.645454" + }, + "mmseqs/createtsv - bacteroides_fragilis - taxonomy": { + "content": [ + { + "0": [ + [ + { + "id": "test_query", + "single_end": false + }, + "test_query.tsv:md5,9179f5c85b8b87a4dc998c9d17840161" + ] + ], + "1": [ + "versions.yml:md5,20a853f50c920d431e5ab7593ca79e6f" + ], + "tsv": [ + [ + { + "id": "test_query", + "single_end": false + }, + "test_query.tsv:md5,9179f5c85b8b87a4dc998c9d17840161" + ] + ], + "versions": [ + "versions.yml:md5,20a853f50c920d431e5ab7593ca79e6f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-12T13:54:45.718678" + }, + "mmseqs/createtsv - sarscov2 - cluster": { + "content": [ + { + "0": [ + [ + { + "id": "test_result", + "single_end": true + }, + "test_result.tsv:md5,4e7ba50ce2879660dc6595286bf0d097" + ] + ], + "1": [ + "versions.yml:md5,20a853f50c920d431e5ab7593ca79e6f" + ], + "tsv": [ + [ + { + "id": "test_result", + "single_end": true + }, + "test_result.tsv:md5,4e7ba50ce2879660dc6595286bf0d097" + ] + ], + "versions": [ + "versions.yml:md5,20a853f50c920d431e5ab7593ca79e6f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-12T13:55:02.731974" + } +} \ No newline at end of file diff --git a/modules/nf-core/mmseqs/createtsv/tests/tags.yml b/modules/nf-core/mmseqs/createtsv/tests/tags.yml new file mode 100644 index 00000000..e27827f5 --- /dev/null +++ b/modules/nf-core/mmseqs/createtsv/tests/tags.yml @@ -0,0 +1,2 @@ +mmseqs/createtsv: + - "modules/nf-core/mmseqs/createtsv/**" diff --git a/modules/nf-core/mmseqs/createtsv/tests/taxonomy.nextflow.config b/modules/nf-core/mmseqs/createtsv/tests/taxonomy.nextflow.config new file mode 100644 index 00000000..f08205d1 --- /dev/null +++ b/modules/nf-core/mmseqs/createtsv/tests/taxonomy.nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: MMSEQS_TAXONOMY { + ext.args = '--search-type 2' + } + +} diff --git a/modules/nf-core/mmseqs/databases/environment.yml b/modules/nf-core/mmseqs/databases/environment.yml new file mode 100644 index 00000000..3bf8437d --- /dev/null +++ b/modules/nf-core/mmseqs/databases/environment.yml @@ -0,0 +1,7 @@ +name: mmseqs_databases +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::mmseqs2=15.6f452 diff --git a/modules/nf-core/mmseqs/databases/main.nf b/modules/nf-core/mmseqs/databases/main.nf new file mode 100644 index 00000000..3e228b29 --- /dev/null +++ b/modules/nf-core/mmseqs/databases/main.nf @@ -0,0 +1,62 @@ +process MMSEQS_DATABASES { + tag "${database}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mmseqs2:15.6f452--pl5321h6a68c12_0': + 'biocontainers/mmseqs2:15.6f452--pl5321h6a68c12_0' }" + + input: + val database + + output: + path "${prefix}/" , emit: database + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: 'mmseqs_database' + """ + mkdir ${prefix}/ + + mmseqs databases \\ + ${database} \\ + ${prefix}/database \\ + tmp/ \\ + --threads ${task.cpus} \\ + --compressed 1 \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mmseqs: \$(mmseqs | grep 'Version' | sed 's/MMseqs2 Version: //') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: 'mmseqs_database' + """ + mkdir ${prefix}/ + + touch ${prefix}/database + touch ${prefix}/database.dbtype + touch ${prefix}/database_h + touch ${prefix}/database_h.dbtype + touch ${prefix}/database_h.index + touch ${prefix}/database.index + touch ${prefix}/database.lookup + touch ${prefix}/database_mapping + touch ${prefix}/database.source + touch ${prefix}/database_taxonomy + touch ${prefix}/database.version + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mmseqs: \$(mmseqs | grep 'Version' | sed 's/MMseqs2 Version: /') + END_VERSIONS + """ +} diff --git a/modules/nf-core/mmseqs/databases/meta.yml b/modules/nf-core/mmseqs/databases/meta.yml new file mode 100644 index 00000000..803a87f6 --- /dev/null +++ b/modules/nf-core/mmseqs/databases/meta.yml @@ -0,0 +1,33 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: "mmseqs_databases" +description: Download an mmseqs-formatted database +keywords: + - database + - indexing + - clustering + - searching +tools: + - "mmseqs": + description: "MMseqs2: ultra fast and sensitive sequence search and clustering suite" + homepage: "https://github.com/soedinglab/MMseqs2" + documentation: "https://mmseqs.com/latest/userguide.pdf" + tool_dev_url: "https://github.com/soedinglab/MMseqs2" + doi: "10.1093/bioinformatics/btw006" + licence: ["GPL v3"] +input: + - database: + type: string + description: Database available through the mmseqs2 databases interface - see https://github.com/soedinglab/MMseqs2/wiki#downloading-databases for details +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - database: + type: directory + description: Directory containing processed mmseqs database +authors: + - "@prototaxites" +maintainers: + - "@prototaxites" diff --git a/modules/nf-core/mmseqs/taxonomy/environment.yml b/modules/nf-core/mmseqs/taxonomy/environment.yml new file mode 100644 index 00000000..fa40c277 --- /dev/null +++ b/modules/nf-core/mmseqs/taxonomy/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "mmseqs_taxonomy" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::mmseqs2=15.6f452" diff --git a/modules/nf-core/mmseqs/taxonomy/main.nf b/modules/nf-core/mmseqs/taxonomy/main.nf new file mode 100644 index 00000000..54849885 --- /dev/null +++ b/modules/nf-core/mmseqs/taxonomy/main.nf @@ -0,0 +1,65 @@ +process MMSEQS_TAXONOMY { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mmseqs2:15.6f452--pl5321h6a68c12_0': + 'biocontainers/mmseqs2:15.6f452--pl5321h6a68c12_0' }" + + input: + tuple val(meta), path(db_query) + path(db_target) + + output: + tuple val(meta), path("${prefix}_taxonomy"), emit: db_taxonomy + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: "*.dbtype" //represents the db_query + def args3 = task.ext.args3 ?: "*.dbtype" //represents the db_target + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ${prefix}_taxonomy + + # Extract files with specified args based suffix | remove suffix | isolate longest common substring of files + DB_QUERY_PATH_NAME=\$(find -L "${db_query}/" -maxdepth 1 -name "${args2}" | sed 's/\\.[^.]*\$//' | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' ) + DB_TARGET_PATH_NAME=\$(find -L "${db_target}/" -maxdepth 1 -name "${args3}" | sed 's/\\.[^.]*\$//' | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' ) + + mmseqs \\ + taxonomy \\ + \$DB_QUERY_PATH_NAME \\ + \$DB_TARGET_PATH_NAME \\ + ${prefix}_taxonomy/${prefix} \\ + tmp1 \\ + $args \\ + --threads ${task.cpus} \\ + --compressed 1 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mmseqs: \$(mmseqs | grep 'Version' | sed 's/MMseqs2 Version: //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ${prefix}_taxonomy + touch ${prefix}_taxonomy/${prefix}.{0..25} + touch ${prefix}_taxonomy/${prefix}.dbtype + touch ${prefix}_taxonomy/${prefix}.index + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mmseqs: \$(mmseqs | grep 'Version' | sed 's/MMseqs2 Version: //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/mmseqs/taxonomy/meta.yml b/modules/nf-core/mmseqs/taxonomy/meta.yml new file mode 100644 index 00000000..d836029c --- /dev/null +++ b/modules/nf-core/mmseqs/taxonomy/meta.yml @@ -0,0 +1,48 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "mmseqs_taxonomy" +description: Computes the lowest common ancestor by identifying the query sequence homologs against the target database. +keywords: + - protein sequence + - nucleotide sequence + - databases + - taxonomy + - homologs + - mmseqs2 +tools: + - "mmseqs": + description: "MMseqs2: ultra fast and sensitive sequence search and clustering suite" + homepage: "https://github.com/soedinglab/MMseqs2" + documentation: "https://mmseqs.com/latest/userguide.pdf" + tool_dev_url: "https://github.com/soedinglab/MMseqs2" + doi: "10.1093/bioinformatics/btw006" + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - db_query: + type: directory + description: An MMseqs2 database with query data + - db_target: + type: directory + description: an MMseqs2 database with target data including the taxonomy classification +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - db_taxonomy: + type: directory + description: An MMseqs2 database with target data including the taxonomy classification + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@darcy220606" +maintainers: + - "@darcy220606" diff --git a/modules/nf-core/mmseqs/taxonomy/tests/main.nf.test b/modules/nf-core/mmseqs/taxonomy/tests/main.nf.test new file mode 100644 index 00000000..95f1bc22 --- /dev/null +++ b/modules/nf-core/mmseqs/taxonomy/tests/main.nf.test @@ -0,0 +1,81 @@ +nextflow_process { + + name "Test Process MMSEQS_TAXONOMY" + script "../main.nf" + config "./nextflow.config" + process "MMSEQS_TAXONOMY" + + tag "modules" + tag "modules_nfcore" + tag "mmseqs" + tag "mmseqs/taxonomy" + tag "mmseqs/createdb" + tag "mmseqs/databases" + + setup { + run("MMSEQS_CREATEDB") { + script "modules/nf-core/mmseqs/createdb/main.nf" + process { + """ + input[0] = [ + [ id:'test_query', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) + ] + """ + } + } + + run("MMSEQS_DATABASES") { + script "modules/nf-core/mmseqs/databases/main.nf" + process { + """ + input[0] = 'SILVA' + """ + } + } + } + + test("mmseqs/taxonomy - bacteroides_fragilis - genome_nt") { + when { + process { + """ + input[0] = MMSEQS_CREATEDB.out.db + input[1] = MMSEQS_DATABASES.out.database + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.db_taxonomy[0][1]).list().sort(), + process.out.versions + ).match() + } + ) + } + } + + test("mmseqs/taxonomy - bacteroides_fragilis - genome_nt - stub") { + + options "-stub" + + when { + process { + """ + input[0] = MMSEQS_CREATEDB.out.db + input[1] = MMSEQS_DATABASES.out.database + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} + diff --git a/modules/nf-core/mmseqs/taxonomy/tests/main.nf.test.snap b/modules/nf-core/mmseqs/taxonomy/tests/main.nf.test.snap new file mode 100644 index 00000000..225680ac --- /dev/null +++ b/modules/nf-core/mmseqs/taxonomy/tests/main.nf.test.snap @@ -0,0 +1,113 @@ +{ + "mmseqs/taxonomy - bacteroides_fragilis - genome_nt": { + "content": [ + [ + "test_query.0", + "test_query.1", + "test_query.dbtype", + "test_query.index" + ], + [ + "versions.yml:md5,a8f24dca956a1c84099ff129f826c63f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-09T10:11:53.632751" + }, + "mmseqs/taxonomy - bacteroides_fragilis - genome_nt - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_query", + "single_end": false + }, + [ + "test_query.0:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.1:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.10:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.11:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.12:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.13:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.14:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.15:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.16:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.17:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.18:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.19:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.2:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.20:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.21:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.22:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.23:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.24:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.25:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.3:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.4:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.5:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.6:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.7:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.8:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.9:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.dbtype:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.index:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,a8f24dca956a1c84099ff129f826c63f" + ], + "db_taxonomy": [ + [ + { + "id": "test_query", + "single_end": false + }, + [ + "test_query.0:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.1:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.10:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.11:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.12:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.13:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.14:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.15:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.16:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.17:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.18:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.19:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.2:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.20:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.21:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.22:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.23:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.24:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.25:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.3:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.4:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.5:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.6:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.7:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.8:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.9:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.dbtype:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_query.index:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,a8f24dca956a1c84099ff129f826c63f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-09T10:12:00.148815" + } +} \ No newline at end of file diff --git a/modules/nf-core/mmseqs/taxonomy/tests/nextflow.config b/modules/nf-core/mmseqs/taxonomy/tests/nextflow.config new file mode 100644 index 00000000..72f6fc81 --- /dev/null +++ b/modules/nf-core/mmseqs/taxonomy/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: MMSEQS_TAXONOMY { + ext.args = '--search-type 2' + } +} diff --git a/modules/nf-core/mmseqs/taxonomy/tests/tags.yml b/modules/nf-core/mmseqs/taxonomy/tests/tags.yml new file mode 100644 index 00000000..76172197 --- /dev/null +++ b/modules/nf-core/mmseqs/taxonomy/tests/tags.yml @@ -0,0 +1,2 @@ +mmseqs/taxonomy: + - "modules/nf-core/mmseqs/taxonomy/**" diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml new file mode 100644 index 00000000..0eb9d9c9 --- /dev/null +++ b/modules/nf-core/multiqc/environment.yml @@ -0,0 +1,7 @@ +name: multiqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.24 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 65d7dd0d..9790c23c 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,16 +1,18 @@ process MULTIQC { label 'process_single' - conda "bioconda::multiqc=1.15" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : - 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.24--pyhdfd78af_0' : + 'biocontainers/multiqc:1.24--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" path(multiqc_config) path(extra_multiqc_config) path(multiqc_logo) + path(replace_names) + path(sample_names) output: path "*multiqc_report.html", emit: report @@ -23,14 +25,22 @@ process MULTIQC { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ? "--filename ${task.ext.prefix}.html" : '' def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' + def replace = replace_names ? "--replace-names ${replace_names}" : '' + def samples = sample_names ? "--sample-names ${sample_names}" : '' """ multiqc \\ --force \\ $args \\ $config \\ + $prefix \\ $extra_config \\ + $logo \\ + $replace \\ + $samples \\ . cat <<-END_VERSIONS > versions.yml @@ -41,7 +51,7 @@ process MULTIQC { stub: """ - touch multiqc_data + mkdir multiqc_data touch multiqc_plots touch multiqc_report.html diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index f93b5ee5..382c08cb 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,5 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: MultiQC +name: multiqc description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: - QC @@ -13,7 +12,6 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] - input: - multiqc_files: type: file @@ -31,7 +29,19 @@ input: type: file description: Optional logo file for MultiQC pattern: "*.{png}" - + - replace_names: + type: file + description: | + Optional two-column sample renaming file. First column a set of + patterns, second column a set of corresponding replacements. Passed via + MultiQC's `--replace-names` option. + pattern: "*.{tsv}" + - sample_names: + type: file + description: | + Optional TSV file with headers, passed to the MultiQC --sample_names + argument. + pattern: "*.{tsv}" output: - report: type: file @@ -54,3 +64,8 @@ authors: - "@bunop" - "@drpatelh" - "@jfy133" +maintainers: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test new file mode 100644 index 00000000..6aa27f4c --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -0,0 +1,90 @@ +nextflow_process { + + name "Test Process MULTIQC" + script "../main.nf" + process "MULTIQC" + + tag "modules" + tag "modules_nfcore" + tag "multiqc" + + test("sarscov2 single-end [fastqc]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_single") } + ) + } + + } + + test("sarscov2 single-end [fastqc] [config]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_config") } + ) + } + } + + test("sarscov2 single-end [fastqc] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.report.collect { file(it).getName() } + + process.out.data.collect { file(it).getName() } + + process.out.plots.collect { file(it).getName() } + + process.out.versions ).match("multiqc_stub") } + ) + } + + } +} diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap new file mode 100644 index 00000000..ef35f6d5 --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -0,0 +1,41 @@ +{ + "multiqc_versions_single": { + "content": [ + [ + "versions.yml:md5,0c5c5c2a79011c26b34b0b0e80b7c8e2" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-10T12:41:34.562023" + }, + "multiqc_stub": { + "content": [ + [ + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + "versions.yml:md5,0c5c5c2a79011c26b34b0b0e80b7c8e2" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-10T11:27:11.933869532" + }, + "multiqc_versions_config": { + "content": [ + [ + "versions.yml:md5,0c5c5c2a79011c26b34b0b0e80b7c8e2" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-10T11:26:56.709849369" + } +} diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml new file mode 100644 index 00000000..bea6c0d3 --- /dev/null +++ b/modules/nf-core/multiqc/tests/tags.yml @@ -0,0 +1,2 @@ +multiqc: + - modules/nf-core/multiqc/** diff --git a/modules/nf-core/prodigal/environment.yml b/modules/nf-core/prodigal/environment.yml new file mode 100644 index 00000000..85746534 --- /dev/null +++ b/modules/nf-core/prodigal/environment.yml @@ -0,0 +1,8 @@ +name: prodigal +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::prodigal=2.6.3 + - conda-forge::pigz=2.6 diff --git a/modules/nf-core/prodigal/main.nf b/modules/nf-core/prodigal/main.nf index 8cf87a6d..49ced167 100644 --- a/modules/nf-core/prodigal/main.nf +++ b/modules/nf-core/prodigal/main.nf @@ -2,7 +2,7 @@ process PRODIGAL { tag "$meta.id" label 'process_single' - conda "bioconda::prodigal=2.6.3 conda-forge::pigz=2.6" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-2e442ba7b07bfa102b9cf8fac6221263cd746ab8:57f05cfa73f769d6ed6d54144cb3aa2a6a6b17e0-0' : 'biocontainers/mulled-v2-2e442ba7b07bfa102b9cf8fac6221263cd746ab8:57f05cfa73f769d6ed6d54144cb3aa2a6a6b17e0-0' }" @@ -33,7 +33,10 @@ process PRODIGAL { -a "${prefix}.faa" \\ -s "${prefix}_all.txt" - pigz -nm ${prefix}* + pigz -nm ${prefix}.fna + pigz -nm ${prefix}.${output_format} + pigz -nm ${prefix}.faa + pigz -nm ${prefix}_all.txt cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -41,4 +44,21 @@ process PRODIGAL { pigz: \$(pigz -V 2>&1 | sed 's/pigz //g') END_VERSIONS """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.fna.gz + touch ${prefix}.${output_format}.gz + touch ${prefix}.faa.gz + touch ${prefix}_all.txt.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + prodigal: \$(prodigal -v 2>&1 | sed -n 's/Prodigal V\\(.*\\):.*/\\1/p') + pigz: \$(pigz -V 2>&1 | sed 's/pigz //g') + END_VERSIONS + """ + } diff --git a/modules/nf-core/prodigal/meta.yml b/modules/nf-core/prodigal/meta.yml index 30747a90..a5d15d58 100644 --- a/modules/nf-core/prodigal/meta.yml +++ b/modules/nf-core/prodigal/meta.yml @@ -12,7 +12,6 @@ tools: tool_dev_url: https://github.com/hyattpd/Prodigal doi: "10.1186/1471-2105-11-119" licence: ["GPL v3"] - input: - meta: type: map @@ -25,7 +24,6 @@ input: - output_format: type: string description: Output format ("gbk"/"gff"/"sqn"/"sco") - output: - meta: type: map @@ -52,6 +50,7 @@ output: type: file description: gene annotations in output_format given as input pattern: "*.{output_format}" - authors: - "@grst" +maintainers: + - "@grst" diff --git a/modules/nf-core/prodigal/tests/main.nf.test b/modules/nf-core/prodigal/tests/main.nf.test new file mode 100644 index 00000000..446bd0d1 --- /dev/null +++ b/modules/nf-core/prodigal/tests/main.nf.test @@ -0,0 +1,101 @@ +nextflow_process { + + name "Test Process PRODIGAL" + script "../main.nf" + process "PRODIGAL" + + tag "modules" + tag "modules_nfcore" + tag "prodigal" + + test("prodigal - sarscov2 - gff") { + when { + process { + """ + input[0] = [ + [id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = 'gff' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("prodigal - sarscov2 - gbk") { + when { + process { + """ + input[0] = [ + [id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = 'gbk' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("prodigal - sarscov2 - gff - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = 'gff' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.out).match() } + ) + } + } + + test("prodigal - sarscov2 - gbk - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = 'gbk' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.out).match() } + ) + } + } + +} \ No newline at end of file diff --git a/modules/nf-core/prodigal/tests/main.nf.test.snap b/modules/nf-core/prodigal/tests/main.nf.test.snap new file mode 100644 index 00000000..f29802b4 --- /dev/null +++ b/modules/nf-core/prodigal/tests/main.nf.test.snap @@ -0,0 +1,196 @@ +{ + "prodigal - sarscov2 - gbk - stub": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T13:58:09.852618454" + }, + "prodigal - sarscov2 - gff": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gff.gz:md5,612c2724c2891c63350f171f74165757" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fna.gz:md5,1bc8a05bcb72a3c324f5e4ffaa716d3b" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.faa.gz:md5,7168b854103f3586ccfdb71a44c389f7" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test_all.txt.gz:md5,e6d6c50f0c39e5169f84ae3c90837fa9" + ] + ], + "4": [ + "versions.yml:md5,9541e53a6927e9856036bb97bfb30307" + ], + "all_gene_annotations": [ + [ + { + "id": "test", + "single_end": false + }, + "test_all.txt.gz:md5,e6d6c50f0c39e5169f84ae3c90837fa9" + ] + ], + "amino_acid_fasta": [ + [ + { + "id": "test", + "single_end": false + }, + "test.faa.gz:md5,7168b854103f3586ccfdb71a44c389f7" + ] + ], + "gene_annotations": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gff.gz:md5,612c2724c2891c63350f171f74165757" + ] + ], + "nucleotide_fasta": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fna.gz:md5,1bc8a05bcb72a3c324f5e4ffaa716d3b" + ] + ], + "versions": [ + "versions.yml:md5,9541e53a6927e9856036bb97bfb30307" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T13:57:49.57989696" + }, + "prodigal - sarscov2 - gff - stub": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T13:58:03.210222528" + }, + "prodigal - sarscov2 - gbk": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gbk.gz:md5,188b3a0e3f78740ded7f3ec4d876cb4b" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fna.gz:md5,1bc8a05bcb72a3c324f5e4ffaa716d3b" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.faa.gz:md5,7168b854103f3586ccfdb71a44c389f7" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test_all.txt.gz:md5,e6d6c50f0c39e5169f84ae3c90837fa9" + ] + ], + "4": [ + "versions.yml:md5,9541e53a6927e9856036bb97bfb30307" + ], + "all_gene_annotations": [ + [ + { + "id": "test", + "single_end": false + }, + "test_all.txt.gz:md5,e6d6c50f0c39e5169f84ae3c90837fa9" + ] + ], + "amino_acid_fasta": [ + [ + { + "id": "test", + "single_end": false + }, + "test.faa.gz:md5,7168b854103f3586ccfdb71a44c389f7" + ] + ], + "gene_annotations": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gbk.gz:md5,188b3a0e3f78740ded7f3ec4d876cb4b" + ] + ], + "nucleotide_fasta": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fna.gz:md5,1bc8a05bcb72a3c324f5e4ffaa716d3b" + ] + ], + "versions": [ + "versions.yml:md5,9541e53a6927e9856036bb97bfb30307" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T13:57:56.606374214" + } +} \ No newline at end of file diff --git a/modules/nf-core/prodigal/tests/tags.yml b/modules/nf-core/prodigal/tests/tags.yml new file mode 100644 index 00000000..fc0cb020 --- /dev/null +++ b/modules/nf-core/prodigal/tests/tags.yml @@ -0,0 +1,2 @@ +prodigal: + - "modules/nf-core/prodigal/**" diff --git a/modules/nf-core/prokka/environment.yml b/modules/nf-core/prokka/environment.yml new file mode 100644 index 00000000..d7c44d5a --- /dev/null +++ b/modules/nf-core/prokka/environment.yml @@ -0,0 +1,7 @@ +name: prokka +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::prokka=1.14.6 diff --git a/modules/nf-core/prokka/main.nf b/modules/nf-core/prokka/main.nf index 60fbe232..adfda037 100644 --- a/modules/nf-core/prokka/main.nf +++ b/modules/nf-core/prokka/main.nf @@ -2,9 +2,9 @@ process PROKKA { tag "$meta.id" label 'process_low' - conda "bioconda::prokka=1.14.6" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/prokka%3A1.14.6--pl5321hdfd78af_4' : + 'https://depot.galaxyproject.org/singularity/prokka:1.14.6--pl5321hdfd78af_4' : 'biocontainers/prokka:1.14.6--pl5321hdfd78af_4' }" input: diff --git a/modules/nf-core/prokka/meta.yml b/modules/nf-core/prokka/meta.yml index 7fc9e185..9d82ffac 100644 --- a/modules/nf-core/prokka/meta.yml +++ b/modules/nf-core/prokka/meta.yml @@ -10,7 +10,6 @@ tools: homepage: https://github.com/tseemann/prokka doi: "10.1093/bioinformatics/btu153" licence: ["GPL v2"] - input: - meta: type: map @@ -27,7 +26,6 @@ input: - prodigal_tf: type: file description: Training file to use for Prodigal (optional) - output: - meta: type: map @@ -86,6 +84,7 @@ output: type: file description: tab-separated file of all features (locus_tag,ftype,len_bp,gene,EC_number,COG,product) pattern: "*.{tsv}" - authors: - "@rpetit3" +maintainers: + - "@rpetit3" diff --git a/modules/nf-core/prokka/tests/main.nf.test b/modules/nf-core/prokka/tests/main.nf.test new file mode 100644 index 00000000..dca19bba --- /dev/null +++ b/modules/nf-core/prokka/tests/main.nf.test @@ -0,0 +1,50 @@ +nextflow_process { + + name "Test Process PROKKA" + script "../main.nf" + process "PROKKA" + + tag "modules" + tag "modules_nfcore" + tag "prokka" + + test("Prokka - sarscov2 - genome.fasta") { + + when { + process { + """ + input[0] = Channel.fromList([ + tuple([ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)) + ]) + input[1] = [] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.gbk.get(0).get(1)).exists() }, + { assert path(process.out.log.get(0).get(1)).exists() }, + { assert path(process.out.sqn.get(0).get(1)).exists() }, + { assert snapshot( + process.out.gff, + process.out.fna, + process.out.faa, + process.out.ffn, + process.out.fsa, + process.out.tbl, + process.out.err, + process.out.txt, + process.out.tsv, + process.out.versions + ).match() + } + ) + } + + } + +} diff --git a/modules/nf-core/prokka/tests/main.nf.test.snap b/modules/nf-core/prokka/tests/main.nf.test.snap new file mode 100644 index 00000000..874c989d --- /dev/null +++ b/modules/nf-core/prokka/tests/main.nf.test.snap @@ -0,0 +1,95 @@ +{ + "Prokka - sarscov2 - genome.fasta": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.gff:md5,5dbfb8fcf2db020564c16045976a0933" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.fna:md5,787307f29a263e5657cc276ebbf7e2b3" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.faa:md5,a4ceda83262b3c222a6b1f508fb9e24b" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.ffn:md5,80f474b5367b7ea5ed23791935f65e34" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.fsa:md5,71bbefcb7f12046bcd3263f58cfd5404" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.tbl:md5,d8f816a066ced94b62d9618b13fb8add" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.err:md5,b3daedc646fddd422824e2b3e5e9229d" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,b40e485ffc8eaf1feacf8d79d9751a33" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,da7c720c3018c5081d6a70b517b7d450" + ] + ], + [ + "versions.yml:md5,e83a22fe02167e290d90853b45650db9" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-30T12:34:20.447734" + } +} \ No newline at end of file diff --git a/modules/nf-core/prokka/tests/tags.yml b/modules/nf-core/prokka/tests/tags.yml new file mode 100644 index 00000000..a2dc7bdc --- /dev/null +++ b/modules/nf-core/prokka/tests/tags.yml @@ -0,0 +1,2 @@ +prokka: + - "modules/nf-core/prokka/**" diff --git a/modules/nf-core/pyrodigal/environment.yml b/modules/nf-core/pyrodigal/environment.yml new file mode 100644 index 00000000..3e538e8c --- /dev/null +++ b/modules/nf-core/pyrodigal/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "pyrodigal" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::pyrodigal=3.3.0 + - conda-forge::pigz=2.8 diff --git a/modules/nf-core/pyrodigal/main.nf b/modules/nf-core/pyrodigal/main.nf index aa8e9e48..7cb97594 100644 --- a/modules/nf-core/pyrodigal/main.nf +++ b/modules/nf-core/pyrodigal/main.nf @@ -2,20 +2,21 @@ process PYRODIGAL { tag "$meta.id" label 'process_single' - conda "bioconda::pyrodigal=2.1.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-2fe9a8ce513c91df34b43a6610df94c3a2eb3bd0:697b3838b186fac6a9ceec198b09d4032162a079-0': - 'biocontainers/mulled-v2-2fe9a8ce513c91df34b43a6610df94c3a2eb3bd0:697b3838b186fac6a9ceec198b09d4032162a079-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-2fe9a8ce513c91df34b43a6610df94c3a2eb3bd0:47e7d40834619419f202394563267d74cef857be-0': + 'biocontainers/mulled-v2-2fe9a8ce513c91df34b43a6610df94c3a2eb3bd0:47e7d40834619419f202394563267d74cef857be-0' }" input: tuple val(meta), path(fasta) + val(output_format) output: - tuple val(meta), path("*.gff.gz") , emit: gff - tuple val(meta), path("*.fna.gz") , emit: fna - tuple val(meta), path("*.faa.gz") , emit: faa - tuple val(meta), path("*.score.gz") , emit: score - path "versions.yml" , emit: versions + tuple val(meta), path("*.${output_format}.gz") , emit: annotations + tuple val(meta), path("*.fna.gz") , emit: fna + tuple val(meta), path("*.faa.gz") , emit: faa + tuple val(meta), path("*.score.gz") , emit: score + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -29,13 +30,29 @@ process PYRODIGAL { pyrodigal \\ $args \\ -i pigz_fasta.fna \\ - -o ${prefix}.gff \\ + -f $output_format \\ + -o "${prefix}.${output_format}" \\ -d ${prefix}.fna \\ -a ${prefix}.faa \\ -s ${prefix}.score pigz -nmf ${prefix}* + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pyrodigal: \$(echo \$(pyrodigal --version 2>&1 | sed 's/pyrodigal v//')) + END_VERSIONS + """ + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.${output_format}.gz + touch ${prefix}.fna.gz + touch ${prefix}.faa.gz + touch ${prefix}.score.gz + touch versions.yml + cat <<-END_VERSIONS > versions.yml "${task.process}": pyrodigal: \$(echo \$(pyrodigal --version 2>&1 | sed 's/pyrodigal v//')) diff --git a/modules/nf-core/pyrodigal/meta.yml b/modules/nf-core/pyrodigal/meta.yml index 6553e3c6..0967606f 100644 --- a/modules/nf-core/pyrodigal/meta.yml +++ b/modules/nf-core/pyrodigal/meta.yml @@ -12,8 +12,7 @@ tools: documentation: "https://pyrodigal.readthedocs.org/" tool_dev_url: "https://github.com/althonos/pyrodigal/" doi: "10.21105/joss.04296" - licence: "['GPL v3']" - + licence: ["GPL v3"] input: - meta: type: map @@ -24,7 +23,10 @@ input: type: file description: FASTA file pattern: "*.{fasta.gz,fa.gz,fna.gz}" - + - output_format: + type: string + description: Output format + pattern: "{gbk,gff}" output: - meta: type: map @@ -35,10 +37,10 @@ output: type: file description: File containing software versions pattern: "versions.yml" - - gff: + - annotations: type: file - description: gene annotations in gff format - pattern: "*.{gff.gz}" + description: Gene annotations. The file format is specified via input channel "output_format". + pattern: "*.{gbk,gff}.gz" - faa: type: file description: protein translations file @@ -51,6 +53,7 @@ output: type: file description: all potential genes (with scores) pattern: "*.{score.gz}" - authors: - "@louperelo" +maintainers: + - "@louperelo" diff --git a/modules/nf-core/pyrodigal/tests/main.nf.test b/modules/nf-core/pyrodigal/tests/main.nf.test new file mode 100644 index 00000000..faa7c8ec --- /dev/null +++ b/modules/nf-core/pyrodigal/tests/main.nf.test @@ -0,0 +1,111 @@ +nextflow_process { + + name "Test Process PYRODIGAL" + script "../main.nf" + process "PYRODIGAL" + + tag "modules" + tag "modules_nfcore" + tag "pyrodigal" + + test("pyrodigal - sarscov2 - gff") { + when { + process { + """ + input[0] = [ + [id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = 'gff' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("pyrodigal - sarscov2 - gbk") { + when { + process { + """ + input[0] = [ + [id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = 'gbk' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + path(process.out.annotations.get(0).get(1)).linesGzip[14..22], + process.out.faa, + process.out.fna, + process.out.score, + process.out.versions, + ).match() } + ) + } + } + + test("pyrodigal - sarscov2 - gff - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = 'gff' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.gff.collect { file(it[1]).getName() } + + process.out.fna.collect { file(it[1]).getName() } + + process.out.faa.collect { file(it[1]).getName() } + + process.out.score.collect { file(it[1]).getName() } + + process.out.versions).match() } + ) + } + } + + test("pyrodigal - sarscov2 - gbk - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = 'gbk' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.gff.collect { file(it[1]).getName() } + + process.out.fna.collect { file(it[1]).getName() } + + process.out.faa.collect { file(it[1]).getName() } + + process.out.score.collect { file(it[1]).getName() } + + process.out.versions).match() } + ) + } + } + +} \ No newline at end of file diff --git a/modules/nf-core/pyrodigal/tests/main.nf.test.snap b/modules/nf-core/pyrodigal/tests/main.nf.test.snap new file mode 100644 index 00000000..827fdaaa --- /dev/null +++ b/modules/nf-core/pyrodigal/tests/main.nf.test.snap @@ -0,0 +1,171 @@ +{ + "pyrodigal - sarscov2 - gff - stub": { + "content": [ + [ + "test.fna.gz", + "test.faa.gz", + "test.score.gz", + "versions.yml:md5,4aab54554829148e01cc0dc7bf6cb5d3" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T15:42:12.012112014" + }, + "pyrodigal - sarscov2 - gbk": { + "content": [ + [ + " CDS 310..13476", + " /codon_start=1", + " /inference=\"ab initio prediction:pyrodigal:3.3.0\"", + " /locus_tag=\"MT192765.1_1\"", + " /transl_table=11", + " /translation=\"MPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGTCGLVEVEKGVLP", + " QLEQPYVFIKRSDARTAPHGHVMVELVAELEGIQYGRSGETLGVLVPHVGEIPVAYRKV", + " LLRKNGNKGAGGHSYGADLKSFDLGDELGTDPYEDFQENWNTKHSSGVTRELMRELNGG", + " AYTRYVDNNFCGPDGYPLECIKDLLARAGKASCTLSEQLDFIDTKRGVYCCREHEHEIA" + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.faa.gz:md5,7168b854103f3586ccfdb71a44c389f7" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.fna.gz:md5,1bc8a05bcb72a3c324f5e4ffaa716d3b" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.score.gz:md5,c0703a9e662ae0b21c7bbb082ef3fb5f" + ] + ], + [ + "versions.yml:md5,4aab54554829148e01cc0dc7bf6cb5d3" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-30T06:09:40.289778252" + }, + "pyrodigal - sarscov2 - gff": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gff.gz:md5,8fcd2d93131cf9fb0c82b81db059ad27" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fna.gz:md5,1bc8a05bcb72a3c324f5e4ffaa716d3b" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.faa.gz:md5,7168b854103f3586ccfdb71a44c389f7" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.score.gz:md5,c0703a9e662ae0b21c7bbb082ef3fb5f" + ] + ], + "4": [ + "versions.yml:md5,4aab54554829148e01cc0dc7bf6cb5d3" + ], + "annotations": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gff.gz:md5,8fcd2d93131cf9fb0c82b81db059ad27" + ] + ], + "faa": [ + [ + { + "id": "test", + "single_end": false + }, + "test.faa.gz:md5,7168b854103f3586ccfdb71a44c389f7" + ] + ], + "fna": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fna.gz:md5,1bc8a05bcb72a3c324f5e4ffaa716d3b" + ] + ], + "score": [ + [ + { + "id": "test", + "single_end": false + }, + "test.score.gz:md5,c0703a9e662ae0b21c7bbb082ef3fb5f" + ] + ], + "versions": [ + "versions.yml:md5,4aab54554829148e01cc0dc7bf6cb5d3" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T15:41:55.822235843" + }, + "pyrodigal - sarscov2 - gbk - stub": { + "content": [ + [ + "test.fna.gz", + "test.faa.gz", + "test.score.gz", + "versions.yml:md5,4aab54554829148e01cc0dc7bf6cb5d3" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T15:42:19.81157751" + } +} \ No newline at end of file diff --git a/modules/nf-core/pyrodigal/tests/tags.yml b/modules/nf-core/pyrodigal/tests/tags.yml new file mode 100644 index 00000000..8851ca8b --- /dev/null +++ b/modules/nf-core/pyrodigal/tests/tags.yml @@ -0,0 +1,2 @@ +pyrodigal: + - "modules/nf-core/pyrodigal/**" diff --git a/modules/nf-core/rgi/cardannotation/environment.yml b/modules/nf-core/rgi/cardannotation/environment.yml new file mode 100644 index 00000000..f1c5872a --- /dev/null +++ b/modules/nf-core/rgi/cardannotation/environment.yml @@ -0,0 +1,7 @@ +name: rgi_cardannotation +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::rgi=6.0.3 diff --git a/modules/nf-core/rgi/cardannotation/main.nf b/modules/nf-core/rgi/cardannotation/main.nf new file mode 100644 index 00000000..d2a814e0 --- /dev/null +++ b/modules/nf-core/rgi/cardannotation/main.nf @@ -0,0 +1,61 @@ +process RGI_CARDANNOTATION { + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/rgi:6.0.3--pyha8f3691_1': + 'biocontainers/rgi:6.0.3--pyha8f3691_1' }" + + input: + path(card) + + output: + path("card_database_processed") , emit: db + env RGI_VERSION , emit: tool_version + env DB_VERSION , emit: db_version + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + rgi card_annotation \\ + -i ${card}/card.json \\ + $args + + DB_VERSION=\$(ls card_database_*_all.fasta | sed "s/card_database_v\\([0-9].*[0-9]\\).*/\\1/") + + mkdir card_database_processed + mv card*.fasta card_database_processed + cp ${card}/* card_database_processed + + RGI_VERSION=\$(rgi main --version) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rgi: \$(echo \$RGI_VERSION) + rgi-database: \$(echo \$DB_VERSION) + END_VERSIONS + """ + + stub: + """ + touch card.fasta + touch card_all.fasta + + mkdir card_database_processed + mv card*.fasta card_database_processed + + RGI_VERSION=\$(rgi main --version) + DB_VERSION=stub_version + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rgi: \$(echo \$RGI_VERSION) + rgi-database: \$(echo \$DB_VERSION) + END_VERSIONS + """ +} diff --git a/modules/nf-core/rgi/cardannotation/meta.yml b/modules/nf-core/rgi/cardannotation/meta.yml new file mode 100644 index 00000000..97e6911d --- /dev/null +++ b/modules/nf-core/rgi/cardannotation/meta.yml @@ -0,0 +1,42 @@ +name: rgi_cardannotation +description: Preprocess the CARD database for RGI to predict antibiotic resistance from protein or nucleotide data +keywords: + - bacteria + - fasta + - antibiotic resistance +tools: + - rgi: + description: This module preprocesses the downloaded Comprehensive Antibiotic Resistance Database (CARD) which can then be used as input for RGI. + homepage: https://card.mcmaster.ca + documentation: https://github.com/arpcard/rgi + tool_dev_url: https://github.com/arpcard/rgi + doi: "10.1093/nar/gkz935" + licence: ["https://card.mcmaster.ca/about"] +input: + - card: + type: directory + description: Directory containing the CARD database + pattern: "*/" +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - db: + type: directory + description: Directory containing the processed CARD database files + pattern: "*/" + - tool_version: + type: string + description: The version of the tool in string format (useful for downstream tools such as hAMRronization) + - db_version: + type: string + description: The version of the used database in string format (useful for downstream tools such as hAMRronization) +authors: + - "@rpetit3" + - "@jfy133" + - "@jasmezz" +maintainers: + - "@rpetit3" + - "@jfy133" + - "@jasmezz" diff --git a/modules/nf-core/rgi/cardannotation/tests/main.nf.test b/modules/nf-core/rgi/cardannotation/tests/main.nf.test new file mode 100644 index 00000000..fa51142a --- /dev/null +++ b/modules/nf-core/rgi/cardannotation/tests/main.nf.test @@ -0,0 +1,66 @@ +nextflow_process { + + name "Test Process RGI_CARDANNOTATION" + script "../main.nf" + process "RGI_CARDANNOTATION" + + tag "modules" + tag "modules_nfcore" + tag "rgi" + tag "rgi/cardannotation" + tag "untar" + + setup { + run("UNTAR") { + script "modules/nf-core/untar/main.nf" + process { + """ + file('https://card.mcmaster.ca/latest/data', checkIfExists: true).copyTo('data.tar.gz') + + input[0] = [ + [ ], + file("data.tar.gz") + ] + """ + } + } + } + + test("rgi/cardannotation") { + + when { + process { + """ + input[0] = UNTAR.out.untar.map{ it[1] } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("rgi/cardannotation - stub") { + + options "-stub" + + when { + process { + """ + input[0] = UNTAR.out.untar.map{ it[1] } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/rgi/cardannotation/tests/main.nf.test.snap b/modules/nf-core/rgi/cardannotation/tests/main.nf.test.snap new file mode 100644 index 00000000..5d58124d --- /dev/null +++ b/modules/nf-core/rgi/cardannotation/tests/main.nf.test.snap @@ -0,0 +1,118 @@ +{ + "rgi/cardannotation - stub": { + "content": [ + { + "0": [ + [ + "card.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", + "card_all.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "6.0.3" + ], + "2": [ + "stub_version" + ], + "3": [ + "versions.yml:md5,ff6d0eeef874d3a3cb6e823cd4610e2d" + ], + "db": [ + [ + "card.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", + "card_all.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "db_version": [ + "stub_version" + ], + "tool_version": [ + "6.0.3" + ], + "versions": [ + "versions.yml:md5,ff6d0eeef874d3a3cb6e823cd4610e2d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-19T23:33:34.376943812" + }, + "rgi/cardannotation": { + "content": [ + { + "0": [ + [ + "CARD-Download-README.txt:md5,ca330e1d89e3a97ac6f50c86a8ca5c34", + "aro_categories.tsv:md5,ba2f33c43b199cd62ae5663125ce316e", + "aro_categories_index.tsv:md5,39f995f2356b6a0cb5fd34e3c6ffc8e1", + "aro_index.tsv:md5,b7250ed3208c8497ec2371527a689eeb", + "card.json:md5,e2cb53b1706a602d5265d2284a1fcdd5", + "card_database_v3.2.9.fasta:md5,0839d4447860694782a5db5cd6eae085", + "card_database_v3.2.9_all.fasta:md5,5295875faf06bef62ea954fef40958c3", + "nucleotide_fasta_protein_homolog_model.fasta:md5,ebcd48a6c9e14f339ffd9d2673eed803", + "nucleotide_fasta_protein_knockout_model.fasta:md5,ff476b358ef70da53acf4602568a9b9b", + "nucleotide_fasta_protein_overexpression_model.fasta:md5,68937e587c880153400fa8203f6a90d5", + "nucleotide_fasta_protein_variant_model.fasta:md5,1ff9cbaf0d640e2084f13751309f8176", + "nucleotide_fasta_rRNA_gene_variant_model.fasta:md5,b88fbe1d6de44b2ff2819ee63d001d75", + "protein_fasta_protein_homolog_model.fasta:md5,130a0947c60d18ef2e7d0ab886f80af3", + "protein_fasta_protein_knockout_model.fasta:md5,6b259399e3eae3f23eaa421bbba6ba25", + "protein_fasta_protein_overexpression_model.fasta:md5,758b753b821789147cdd795c654940ad", + "protein_fasta_protein_variant_model.fasta:md5,ec46ea3d9dc7ab01ec22cf265e410c88", + "shortname_antibiotics.tsv:md5,9d20abb9f6d37ed0cecc1573867ca49a", + "shortname_pathogens.tsv:md5,ae267113de686bc8f58eab5845cc343b", + "snps.txt:md5,ee6dfbe7a65f3ffdb6968822c47e4550" + ] + ], + "1": [ + "6.0.3" + ], + "2": [ + "3.2.9" + ], + "3": [ + "versions.yml:md5,43f331ec71ec01a1bae10e30f4ce4f26" + ], + "db": [ + [ + "CARD-Download-README.txt:md5,ca330e1d89e3a97ac6f50c86a8ca5c34", + "aro_categories.tsv:md5,ba2f33c43b199cd62ae5663125ce316e", + "aro_categories_index.tsv:md5,39f995f2356b6a0cb5fd34e3c6ffc8e1", + "aro_index.tsv:md5,b7250ed3208c8497ec2371527a689eeb", + "card.json:md5,e2cb53b1706a602d5265d2284a1fcdd5", + "card_database_v3.2.9.fasta:md5,0839d4447860694782a5db5cd6eae085", + "card_database_v3.2.9_all.fasta:md5,5295875faf06bef62ea954fef40958c3", + "nucleotide_fasta_protein_homolog_model.fasta:md5,ebcd48a6c9e14f339ffd9d2673eed803", + "nucleotide_fasta_protein_knockout_model.fasta:md5,ff476b358ef70da53acf4602568a9b9b", + "nucleotide_fasta_protein_overexpression_model.fasta:md5,68937e587c880153400fa8203f6a90d5", + "nucleotide_fasta_protein_variant_model.fasta:md5,1ff9cbaf0d640e2084f13751309f8176", + "nucleotide_fasta_rRNA_gene_variant_model.fasta:md5,b88fbe1d6de44b2ff2819ee63d001d75", + "protein_fasta_protein_homolog_model.fasta:md5,130a0947c60d18ef2e7d0ab886f80af3", + "protein_fasta_protein_knockout_model.fasta:md5,6b259399e3eae3f23eaa421bbba6ba25", + "protein_fasta_protein_overexpression_model.fasta:md5,758b753b821789147cdd795c654940ad", + "protein_fasta_protein_variant_model.fasta:md5,ec46ea3d9dc7ab01ec22cf265e410c88", + "shortname_antibiotics.tsv:md5,9d20abb9f6d37ed0cecc1573867ca49a", + "shortname_pathogens.tsv:md5,ae267113de686bc8f58eab5845cc343b", + "snps.txt:md5,ee6dfbe7a65f3ffdb6968822c47e4550" + ] + ], + "db_version": [ + "3.2.9" + ], + "tool_version": [ + "6.0.3" + ], + "versions": [ + "versions.yml:md5,43f331ec71ec01a1bae10e30f4ce4f26" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-19T23:33:06.962413561" + } +} \ No newline at end of file diff --git a/modules/nf-core/rgi/cardannotation/tests/tags.yml b/modules/nf-core/rgi/cardannotation/tests/tags.yml new file mode 100644 index 00000000..02c2de0b --- /dev/null +++ b/modules/nf-core/rgi/cardannotation/tests/tags.yml @@ -0,0 +1,2 @@ +rgi/cardannotation: + - "modules/nf-core/rgi/cardannotation/**" diff --git a/modules/nf-core/bioawk/environment.yml b/modules/nf-core/rgi/main/environment.yml similarity index 62% rename from modules/nf-core/bioawk/environment.yml rename to modules/nf-core/rgi/main/environment.yml index 5fdfd417..f229cc21 100644 --- a/modules/nf-core/bioawk/environment.yml +++ b/modules/nf-core/rgi/main/environment.yml @@ -1,7 +1,7 @@ -name: bioawk +name: rgi_main channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::bioawk=1.0 + - bioconda::rgi=6.0.3 diff --git a/modules/nf-core/rgi/main/main.nf b/modules/nf-core/rgi/main/main.nf index 26be7734..ba05358a 100644 --- a/modules/nf-core/rgi/main/main.nf +++ b/modules/nf-core/rgi/main/main.nf @@ -2,46 +2,88 @@ process RGI_MAIN { tag "$meta.id" label 'process_medium' - conda "bioconda::rgi=5.2.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/rgi:5.2.1--pyha8f3691_2': - 'biocontainers/rgi:5.2.1--pyha8f3691_2' }" + 'https://depot.galaxyproject.org/singularity/rgi:6.0.3--pyha8f3691_1': + 'biocontainers/rgi:6.0.3--pyha8f3691_1' }" input: tuple val(meta), path(fasta) + path(card) + path(wildcard) output: tuple val(meta), path("*.json"), emit: json tuple val(meta), path("*.txt") , emit: tsv - tuple val(meta), path("temp/") , emit: tmp - env VER , emit: tool_version - env DBVER , emit: db_version + tuple val(meta), path("temp/") , emit: tmp + env RGI_VERSION , emit: tool_version + env DB_VERSION , emit: db_version path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' // This customizes the command: rgi load + def args2 = task.ext.args ?: '' // This customizes the command: rgi main def prefix = task.ext.prefix ?: "${meta.id}" + def load_wildcard = "" + + if (wildcard) { + load_wildcard = """ \\ + --wildcard_annotation ${wildcard}/wildcard_database_v\$DB_VERSION.fasta \\ + --wildcard_annotation_all_models ${wildcard}/wildcard_database_v\$DB_VERSION\\_all.fasta \\ + --wildcard_index ${wildcard}/wildcard/index-for-model-sequences.txt \\ + --amr_kmers ${wildcard}/wildcard/all_amr_61mers.txt \\ + --kmer_database ${wildcard}/wildcard/61_kmer_db.json \\ + --kmer_size 61 + """ + } + """ + DB_VERSION=\$(ls ${card}/card_database_*_all.fasta | sed "s/${card}\\/card_database_v\\([0-9].*[0-9]\\).*/\\1/") + rgi \\ - main \\ + load \\ $args \\ + --card_json ${card}/card.json \\ + --debug --local \\ + --card_annotation ${card}/card_database_v\$DB_VERSION.fasta \\ + --card_annotation_all_models ${card}/card_database_v\$DB_VERSION\\_all.fasta \\ + $load_wildcard + + rgi \\ + main \\ + $args2 \\ --num_threads $task.cpus \\ --output_file $prefix \\ --input_sequence $fasta mkdir temp/ - mv *.xml *.fsa *.{nhr,nin,nsq} *.draft *.potentialGenes *{variant,rrna,protein,predictedGenes,overexpression,homolog}.json temp/ + for FILE in *.xml *.fsa *.{nhr,nin,nsq} *.draft *.potentialGenes *{variant,rrna,protein,predictedGenes,overexpression,homolog}.json; do [[ -e \$FILE ]] && mv \$FILE temp/; done + + RGI_VERSION=\$(rgi main --version) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rgi: \$(echo \$RGI_VERSION) + rgi-database: \$(echo \$DB_VERSION) + END_VERSIONS + """ + + stub: + """ + mkdir -p temp + touch test.json + touch test.txt - VER=\$(rgi main --version) - DBVER=\$(rgi database --version) + RGI_VERSION=\$(rgi main --version) + DB_VERSION=stub_version cat <<-END_VERSIONS > versions.yml "${task.process}": - rgi: \$(echo \$VER) - rgi-database: \$(echo \$DBVER) + rgi: \$(echo \$RGI_VERSION) + rgi-database: \$(echo \$DB_VERSION) END_VERSIONS """ } diff --git a/modules/nf-core/rgi/main/meta.yml b/modules/nf-core/rgi/main/meta.yml index 3bb7ddb5..7e444c8b 100644 --- a/modules/nf-core/rgi/main/meta.yml +++ b/modules/nf-core/rgi/main/meta.yml @@ -12,7 +12,6 @@ tools: tool_dev_url: https://github.com/arpcard/rgi doi: "10.1093/nar/gkz935" licence: ["https://card.mcmaster.ca/about"] - input: - meta: type: map @@ -23,6 +22,14 @@ input: type: file description: Nucleotide or protein sequences in FASTA format pattern: "*.{fasta,fasta.gz,fa,fa.gz,fna,fna.gz,faa,faa.gz}" + - card: + type: directory + description: Directory containing the CARD database. This is expected to be the unarchived but otherwise unaltered download folder (see RGI documentation for download instructions). + pattern: "*/" + - wildcard: + type: directory + description: Directory containing the WildCARD database (optional). This is expected to be the unarchived but otherwise unaltered download folder (see RGI documentation for download instructions). + pattern: "*/" output: - meta: @@ -42,7 +49,7 @@ output: type: file description: Tab-delimited file with RGI results pattern: "*.{txt}" - - temp: + - tmp: type: directory description: Directory containing various intermediate files pattern: "temp/" @@ -55,3 +62,8 @@ output: authors: - "@rpetit3" - "@jfy133" + - "@jasmezz" +maintainers: + - "@rpetit3" + - "@jfy133" + - "@jasmezz" diff --git a/modules/nf-core/rgi/main/tests/main.nf.test b/modules/nf-core/rgi/main/tests/main.nf.test new file mode 100644 index 00000000..1fca563a --- /dev/null +++ b/modules/nf-core/rgi/main/tests/main.nf.test @@ -0,0 +1,94 @@ +nextflow_process { + + name "Test Process RGI_MAIN" + script "../main.nf" + process "RGI_MAIN" + + tag "modules" + tag "modules_nfcore" + tag "rgi" + tag "rgi/main" + tag "rgi/cardannotation" + tag "untar" + + setup { + run("UNTAR") { + script "modules/nf-core/untar/main.nf" + process { + """ + file('https://card.mcmaster.ca/latest/data', checkIfExists: true).copyTo('card-data.tar.bz2') + + input[0] = [ + [ ], + file("card-data.tar.bz2") + ] + """ + } + } + + run("RGI_CARDANNOTATION") { + script "modules/nf-core/rgi/cardannotation" + process { + """ + input[0] = UNTAR.out.untar.map{ it[1] } + """ + } + } + } + + + test("rgi/main - haemophilus_influenzae - genome_fna_gz") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['haemophilus_influenzae']['genome']['genome_fna_gz'], checkIfExists: true) + ] + input[1] = RGI_CARDANNOTATION.out.db + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.tsv, + process.out.json, + file(process.out.tmp.get(0).get(1)).list().sort(), + process.out.tool_version, + process.out.db_version, + ).match() } + ) + } + } + + test("rgi/main - haemophilus_influenzae - genome_fna_gz - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['haemophilus_influenzae']['genome']['genome_fna_gz'], checkIfExists: true) + ] + input[1] = RGI_CARDANNOTATION.out.db + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/rgi/main/tests/main.nf.test.snap b/modules/nf-core/rgi/main/tests/main.nf.test.snap new file mode 100644 index 00000000..a8dc1d61 --- /dev/null +++ b/modules/nf-core/rgi/main/tests/main.nf.test.snap @@ -0,0 +1,143 @@ +{ + "rgi/main - haemophilus_influenzae - genome_fna_gz - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + [ + + ] + ] + ], + "3": [ + "6.0.3" + ], + "4": [ + "stub_version" + ], + "5": [ + "versions.yml:md5,f77ce9bdc8d309c9d6f7ec63bd53f5cf" + ], + "db_version": [ + "stub_version" + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tmp": [ + [ + { + "id": "test", + "single_end": false + }, + [ + + ] + ] + ], + "tool_version": [ + "6.0.3" + ], + "tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,f77ce9bdc8d309c9d6f7ec63bd53f5cf" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-19T22:51:36.047807514" + }, + "rgi/main - haemophilus_influenzae - genome_fna_gz": { + "content": [ + [ + "versions.yml:md5,a9f89e3bebd538efa07bcbe9fe1ba37a" + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,5854d6bef754d91da80980e96b6a054b" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.json:md5,f9ca00ea1ff6e733c7c25bb2dfd65128" + ] + ], + [ + "genome.fna.gz.temp.uncompressed.fsa", + "genome.fna.gz.temp.uncompressed.fsa.temp.blastRes.rrna.xml", + "genome.fna.gz.temp.uncompressed.fsa.temp.contig.fsa", + "genome.fna.gz.temp.uncompressed.fsa.temp.contig.fsa.blastRes.xml", + "genome.fna.gz.temp.uncompressed.fsa.temp.contigToORF.fsa", + "genome.fna.gz.temp.uncompressed.fsa.temp.db.nhr", + "genome.fna.gz.temp.uncompressed.fsa.temp.db.nin", + "genome.fna.gz.temp.uncompressed.fsa.temp.db.nsq", + "genome.fna.gz.temp.uncompressed.fsa.temp.draft", + "genome.fna.gz.temp.uncompressed.fsa.temp.homolog.json", + "genome.fna.gz.temp.uncompressed.fsa.temp.overexpression.json", + "genome.fna.gz.temp.uncompressed.fsa.temp.potentialGenes", + "genome.fna.gz.temp.uncompressed.fsa.temp.predictedGenes.json", + "genome.fna.gz.temp.uncompressed.fsa.temp.predictedGenes.protein.json", + "genome.fna.gz.temp.uncompressed.fsa.temp.rrna.json", + "genome.fna.gz.temp.uncompressed.fsa.temp.variant.json" + ], + [ + "6.0.3" + ], + [ + "3.2.9" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-19T22:51:14.372178941" + } +} \ No newline at end of file diff --git a/modules/nf-core/rgi/main/tests/tags.yml b/modules/nf-core/rgi/main/tests/tags.yml new file mode 100644 index 00000000..e68ad8a2 --- /dev/null +++ b/modules/nf-core/rgi/main/tests/tags.yml @@ -0,0 +1,2 @@ +rgi/main: + - "modules/nf-core/rgi/main/**" diff --git a/modules/nf-core/seqkit/seq/environment.yml b/modules/nf-core/seqkit/seq/environment.yml new file mode 100644 index 00000000..74e0dd76 --- /dev/null +++ b/modules/nf-core/seqkit/seq/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "seqkit_seq" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::seqkit=2.8.1" diff --git a/modules/nf-core/seqkit/seq/main.nf b/modules/nf-core/seqkit/seq/main.nf new file mode 100644 index 00000000..d7d38fc8 --- /dev/null +++ b/modules/nf-core/seqkit/seq/main.nf @@ -0,0 +1,63 @@ +process SEQKIT_SEQ { + tag "$meta.id" + label 'process_low' + // File IO can be a bottleneck. See: https://bioinf.shenwei.me/seqkit/usage/#parallelization-of-cpu-intensive-jobs + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/seqkit:2.8.1--h9ee0642_0': + 'biocontainers/seqkit:2.8.1--h9ee0642_0' }" + + input: + tuple val(meta), path(fastx) + + output: + tuple val(meta), path("${prefix}.*") , emit: fastx + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def extension = "fastq" + if ("$fastx" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz|.+\.fsa|.+\.fsa.gz/ ) { + extension = "fasta" + } + extension = fastx.toString().endsWith('.gz') ? "${extension}.gz" : extension + def call_gzip = extension.endsWith('.gz') ? "| gzip -c $args2" : '' + if("${prefix}.${extension}" == "$fastx") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + seqkit \\ + seq \\ + --threads $task.cpus \\ + $args \\ + $fastx \\ + $call_gzip \\ + > ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqkit: \$(seqkit version | cut -d' ' -f2) + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + def extension = "fastq" + if ("$fastx" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz|.+\.fsa|.+\.fsa.gz/ ) { + extension = "fasta" + } + extension = fastx.toString().endsWith('.gz') ? "${extension}.gz" : extension + if("${prefix}.${extension}" == "$fastx") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqkit: \$(seqkit version | cut -d' ' -f2) + END_VERSIONS + """ +} diff --git a/modules/nf-core/seqkit/seq/meta.yml b/modules/nf-core/seqkit/seq/meta.yml new file mode 100644 index 00000000..8d4e2b16 --- /dev/null +++ b/modules/nf-core/seqkit/seq/meta.yml @@ -0,0 +1,48 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "seqkit_seq" +description: Transforms sequences (extract ID, filter by length, remove gaps, reverse complement...) +keywords: + - genomics + - fasta + - fastq + - transform + - filter + - gaps + - complement +tools: + - "seqkit": + description: "A cross-platform and ultrafast toolkit for FASTA/Q file manipulation" + homepage: "https://bioinf.shenwei.me/seqkit/" + documentation: "https://bioinf.shenwei.me/seqkit/usage/" + tool_dev_url: "https://github.com/shenwei356/seqkit" + doi: "10.1371/journal.pone.0163962" + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - fastx: + type: file + description: Input fasta/fastq file + pattern: "*.{fsa,fas,fa,fasta,fastq,fq,fsa.gz,fas.gz,fa.gz,fasta.gz,fastq.gz,fq.gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - fastx: + type: file + description: Output fasta/fastq file + pattern: "*.{fasta,fasta.gz,fastq,fastq.gz}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/nf-core/seqkit/seq/tests/main.nf.test b/modules/nf-core/seqkit/seq/tests/main.nf.test new file mode 100644 index 00000000..9fd1c085 --- /dev/null +++ b/modules/nf-core/seqkit/seq/tests/main.nf.test @@ -0,0 +1,145 @@ +nextflow_process { + + name "Test Process SEQKIT_SEQ" + script "../main.nf" + process "SEQKIT_SEQ" + config './nextflow.config' + + tag "modules" + tag "modules_nfcore" + tag "seqkit" + tag "seqkit/seq" + + test("sarscov2-genome_fasta") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + + } + + test("sarscov2-genome_fasta_gz") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + + } + + test("sarscov2-test_1_fastq_gz") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + + } + + test("file_name_conflict-fail_with_error") { + when { + process { + """ + input[0] = [ + [ id:'test_1' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("Input and output names are the same") } + ) + } + + } + + test("sarscov2-genome_fasta-stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + + } + + test("file_name_conflict-fail_with_error-stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("Input and output names are the same") } + ) + } + + } + +} diff --git a/modules/nf-core/seqkit/seq/tests/main.nf.test.snap b/modules/nf-core/seqkit/seq/tests/main.nf.test.snap new file mode 100644 index 00000000..e6910966 --- /dev/null +++ b/modules/nf-core/seqkit/seq/tests/main.nf.test.snap @@ -0,0 +1,134 @@ +{ + "sarscov2-genome_fasta-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd" + ], + "fastx": [ + [ + { + "id": "test" + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-08T08:52:18.220051903" + }, + "sarscov2-test_1_fastq_gz": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd" + ], + "fastx": [ + [ + { + "id": "test" + }, + "test.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-08T08:51:55.607826581" + }, + "sarscov2-genome_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fasta:md5,483f4a5dfe60171c86ee9b7e6dff908b" + ] + ], + "1": [ + "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd" + ], + "fastx": [ + [ + { + "id": "test" + }, + "test.fasta:md5,483f4a5dfe60171c86ee9b7e6dff908b" + ] + ], + "versions": [ + "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-08T08:51:27.717072933" + }, + "sarscov2-genome_fasta_gz": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fasta.gz:md5,483f4a5dfe60171c86ee9b7e6dff908b" + ] + ], + "1": [ + "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd" + ], + "fastx": [ + [ + { + "id": "test" + }, + "test.fasta.gz:md5,483f4a5dfe60171c86ee9b7e6dff908b" + ] + ], + "versions": [ + "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-08T08:51:37.917560104" + } +} \ No newline at end of file diff --git a/modules/nf-core/seqkit/seq/tests/nextflow.config b/modules/nf-core/seqkit/seq/tests/nextflow.config new file mode 100644 index 00000000..d8e3c66a --- /dev/null +++ b/modules/nf-core/seqkit/seq/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args2 = '-n' +} diff --git a/modules/nf-core/seqkit/seq/tests/tags.yml b/modules/nf-core/seqkit/seq/tests/tags.yml new file mode 100644 index 00000000..5eeca7e3 --- /dev/null +++ b/modules/nf-core/seqkit/seq/tests/tags.yml @@ -0,0 +1,2 @@ +seqkit/seq: + - "modules/nf-core/seqkit/seq/**" diff --git a/modules/nf-core/tabix/bgzip/environment.yml b/modules/nf-core/tabix/bgzip/environment.yml new file mode 100644 index 00000000..56cc0fb1 --- /dev/null +++ b/modules/nf-core/tabix/bgzip/environment.yml @@ -0,0 +1,8 @@ +name: tabix_bgzip +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::tabix=1.11 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/tabix/bgzip/main.nf b/modules/nf-core/tabix/bgzip/main.nf index 8c47d9e2..67991c74 100644 --- a/modules/nf-core/tabix/bgzip/main.nf +++ b/modules/nf-core/tabix/bgzip/main.nf @@ -2,10 +2,10 @@ process TABIX_BGZIP { tag "$meta.id" label 'process_single' - conda "bioconda::tabix=1.11" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/tabix:1.11--hdfd78af_0' : - 'biocontainers/tabix:1.11--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/htslib:1.20--h5efdd21_2' : + 'biocontainers/htslib:1.20--h5efdd21_2' }" input: tuple val(meta), path(input) @@ -44,7 +44,8 @@ process TABIX_BGZIP { output = in_bgzip ? input.getBaseName() : "${prefix}.${input.getExtension()}.gz" """ - touch ${output} + echo "" | gzip > ${output} + touch ${output}.gzi cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/tabix/bgzip/meta.yml b/modules/nf-core/tabix/bgzip/meta.yml index c3ea2107..621d49ea 100644 --- a/modules/nf-core/tabix/bgzip/meta.yml +++ b/modules/nf-core/tabix/bgzip/meta.yml @@ -45,3 +45,8 @@ authors: - "@drpatelh" - "@maxulysse" - "@nvnieuwk" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@maxulysse" + - "@nvnieuwk" diff --git a/modules/nf-core/tabix/bgzip/tests/bgzip_compress.config b/modules/nf-core/tabix/bgzip/tests/bgzip_compress.config new file mode 100644 index 00000000..6b6ff55f --- /dev/null +++ b/modules/nf-core/tabix/bgzip/tests/bgzip_compress.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_BGZIP { + ext.args = ' -i' + } +} diff --git a/modules/nf-core/tabix/bgzip/tests/main.nf.test b/modules/nf-core/tabix/bgzip/tests/main.nf.test new file mode 100644 index 00000000..d784aa07 --- /dev/null +++ b/modules/nf-core/tabix/bgzip/tests/main.nf.test @@ -0,0 +1,111 @@ +nextflow_process { + + name "Test Process TABIX_BGZIP" + script "modules/nf-core/tabix/bgzip/main.nf" + process "TABIX_BGZIP" + + tag "modules" + tag "modules_nfcore" + tag "tabix" + tag "tabix/bgzip" + + test("sarscov2_vcf_bgzip_compress") { + when { + process { + """ + input[0] = [ + [ id:'bgzip_test' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.output[0][1]).name + ).match("bgzip_test") + } + ) + } + } + + test("homo_genome_bedgz_compress") { + when { + process { + """ + input[0] = [ + [ id:'bedgz_test' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed.gz', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.output[0][1]).name + ).match("bedgz_test") + } + ) + } + } + + test("sarscov2_vcf_bgzip_compress_stub") { + options '-stub' + config "./bgzip_compress.config" + + when { + process { + """ + input[0] = [ + [ id:"test_stub" ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.output[0][1]).name + ).match("test_stub") + } + ) + } + } + + test("sarscov2_vcf_bgzip_compress_gzi") { + config "./bgzip_compress.config" + when { + process { + """ + input[0] = [ + [ id:"gzi_compress_test" ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.gzi[0][1]).name + ).match("gzi_compress_test") + } + ) + } + } +} diff --git a/modules/nf-core/tabix/bgzip/tests/main.nf.test.snap b/modules/nf-core/tabix/bgzip/tests/main.nf.test.snap new file mode 100644 index 00000000..0748143f --- /dev/null +++ b/modules/nf-core/tabix/bgzip/tests/main.nf.test.snap @@ -0,0 +1,218 @@ +{ + "gzi_compress_test": { + "content": [ + "gzi_compress_test.vcf.gz.gzi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-19T14:52:29.328146" + }, + "homo_genome_bedgz_compress": { + "content": [ + { + "0": [ + [ + { + "id": "bedgz_test" + }, + "bedgz_test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ], + "gzi": [ + + ], + "output": [ + [ + { + "id": "bedgz_test" + }, + "bedgz_test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "versions": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T11:28:34.159992362" + }, + "test_stub": { + "content": [ + "test_stub.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-19T14:52:20.811489" + }, + "sarscov2_vcf_bgzip_compress": { + "content": [ + { + "0": [ + [ + { + "id": "bgzip_test" + }, + "bgzip_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ], + "gzi": [ + + ], + "output": [ + [ + { + "id": "bgzip_test" + }, + "bgzip_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T11:28:22.087769106" + }, + "sarscov2_vcf_bgzip_compress_gzi": { + "content": [ + { + "0": [ + [ + { + "id": "gzi_compress_test" + }, + "gzi_compress_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "1": [ + [ + { + "id": "gzi_compress_test" + }, + "gzi_compress_test.vcf.gz.gzi:md5,26fd00d4e26141cd11561f6e7d4a2ad0" + ] + ], + "2": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ], + "gzi": [ + [ + { + "id": "gzi_compress_test" + }, + "gzi_compress_test.vcf.gz.gzi:md5,26fd00d4e26141cd11561f6e7d4a2ad0" + ] + ], + "output": [ + [ + { + "id": "gzi_compress_test" + }, + "gzi_compress_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T11:28:57.15091665" + }, + "bgzip_test": { + "content": [ + "bgzip_test.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-19T14:52:03.768295" + }, + "bedgz_test": { + "content": [ + "bedgz_test.bed" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-19T14:52:12.453855" + }, + "sarscov2_vcf_bgzip_compress_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_stub" + }, + "test_stub.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test_stub" + }, + "test_stub.vcf.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ], + "gzi": [ + [ + { + "id": "test_stub" + }, + "test_stub.vcf.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "output": [ + [ + { + "id": "test_stub" + }, + "test_stub.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T11:28:45.219404786" + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/bgzip/tests/tags.yml b/modules/nf-core/tabix/bgzip/tests/tags.yml new file mode 100644 index 00000000..de0eec86 --- /dev/null +++ b/modules/nf-core/tabix/bgzip/tests/tags.yml @@ -0,0 +1,2 @@ +tabix/bgzip: + - "modules/nf-core/tabix/bgzip/**" diff --git a/modules/nf-core/tabix/bgzip/tests/vcf_none.config b/modules/nf-core/tabix/bgzip/tests/vcf_none.config new file mode 100644 index 00000000..f3a3c467 --- /dev/null +++ b/modules/nf-core/tabix/bgzip/tests/vcf_none.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_BGZIP { + ext.args = '' + } +} diff --git a/modules/nf-core/untar/environment.yml b/modules/nf-core/untar/environment.yml new file mode 100644 index 00000000..4f498244 --- /dev/null +++ b/modules/nf-core/untar/environment.yml @@ -0,0 +1,9 @@ +name: untar +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::grep=3.11 + - conda-forge::sed=4.8 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf index 61461c39..c651bdad 100644 --- a/modules/nf-core/untar/main.nf +++ b/modules/nf-core/untar/main.nf @@ -2,7 +2,7 @@ process UNTAR { tag "$archive" label 'process_single' - conda "conda-forge::sed=4.7 conda-forge::grep=3.11 conda-forge::tar=1.34" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : 'nf-core/ubuntu:20.04' }" @@ -52,8 +52,29 @@ process UNTAR { stub: prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) """ - mkdir $prefix - touch ${prefix}/file.txt + mkdir ${prefix} + ## Dry-run untaring the archive to get the files and place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + for i in `tar -tf ${archive}`; + do + if [[ \$(echo "\${i}" | grep -E "/\$") == "" ]]; + then + touch \${i} + else + mkdir -p \${i} + fi + done + else + for i in `tar -tf ${archive}`; + do + if [[ \$(echo "\${i}" | grep -E "/\$") == "" ]]; + then + touch ${prefix}/\${i} + else + mkdir -p ${prefix}/\${i} + fi + done + fi cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml index db241a6e..a9a2110f 100644 --- a/modules/nf-core/untar/meta.yml +++ b/modules/nf-core/untar/meta.yml @@ -39,3 +39,8 @@ authors: - "@drpatelh" - "@matthdsm" - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" diff --git a/modules/nf-core/untar/tests/main.nf.test b/modules/nf-core/untar/tests/main.nf.test new file mode 100644 index 00000000..c957517a --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test @@ -0,0 +1,85 @@ +nextflow_process { + + name "Test Process UNTAR" + script "../main.nf" + process "UNTAR" + tag "modules" + tag "modules_nfcore" + tag "untar" + + test("test_untar") { + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } + + test("test_untar_onlyfiles") { + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } + + test("test_untar - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } + + test("test_untar_onlyfiles - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } +} diff --git a/modules/nf-core/untar/tests/main.nf.test.snap b/modules/nf-core/untar/tests/main.nf.test.snap new file mode 100644 index 00000000..ceb91b79 --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test.snap @@ -0,0 +1,158 @@ +{ + "test_untar_onlyfiles": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:28.231047" + }, + "test_untar_onlyfiles - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hello.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hello.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:45.773103" + }, + "test_untar - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:36.777441" + }, + "test_untar": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", + "opts.k2d:md5,a033d00cf6759407010b21700938f543", + "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", + "opts.k2d:md5,a033d00cf6759407010b21700938f543", + "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:19.377674" + } +} \ No newline at end of file diff --git a/modules/nf-core/untar/tests/tags.yml b/modules/nf-core/untar/tests/tags.yml new file mode 100644 index 00000000..feb6f15c --- /dev/null +++ b/modules/nf-core/untar/tests/tags.yml @@ -0,0 +1,2 @@ +untar: + - modules/nf-core/untar/** diff --git a/modules/nf-core/untar/untar.diff b/modules/nf-core/untar/untar.diff new file mode 100644 index 00000000..457dd66d --- /dev/null +++ b/modules/nf-core/untar/untar.diff @@ -0,0 +1,16 @@ +Changes in module 'nf-core/untar' +--- modules/nf-core/untar/main.nf ++++ modules/nf-core/untar/main.nf +@@ -4,8 +4,8 @@ + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? +- 'https://depot.galaxyproject.org/singularity/ubuntu:22.04' : +- 'nf-core/ubuntu:22.04' }" ++ 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : ++ 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(archive) + +************************************************************ diff --git a/nextflow.config b/nextflow.config index 5d8d969a..dea546e0 100644 --- a/nextflow.config +++ b/nextflow.config @@ -12,207 +12,250 @@ params { // Input options input = null - // References - Not used in funcscan, left for template purposes - genome = null - igenomes_base = 's3://ngi-igenomes/igenomes' - igenomes_ignore = false + // MultiQC options + multiqc_config = null + multiqc_title = null + multiqc_logo = null + max_multiqc_email_size = '25.MB' + multiqc_methods_description = null + // Boilerplate options + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + version = false + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + + // To stop the random warning coming from nf-validation, remove on upgrade to nf-schema + monochromeLogs = null + + // Taxonomy classification options + run_taxa_classification = false + taxa_classification_tool = 'mmseqs2' + + taxa_classification_mmseqs_db = null + taxa_classification_mmseqs_db_id = 'Kalamari' + taxa_classification_mmseqs_db_savetmp = false + + taxa_classification_mmseqs_taxonomy_savetmp = false + taxa_classification_mmseqs_taxonomy_searchtype = 2 + taxa_classification_mmseqs_taxonomy_lcaranks = 'kingdom,phylum,class,order,family,genus,species' + taxa_classification_mmseqs_taxonomy_taxlineage = 1 + taxa_classification_mmseqs_taxonomy_sensitivity = '5.0' + taxa_classification_mmseqs_taxonomy_orffilters = '2.0' + taxa_classification_mmseqs_taxonomy_lcamode = 3 + taxa_classification_mmseqs_taxonomy_votemode = 1 // Annotation options - annotation_tool = 'pyrodigal' - save_annotations = false - - annotation_prodigal_singlemode = false - annotation_prodigal_closed = false - annotation_prodigal_transtable = 11 - annotation_prodigal_forcenonsd = false - - annotation_pyrodigal_singlemode = false - annotation_pyrodigal_closed = false - annotation_pyrodigal_transtable = 11 - annotation_pyrodigal_forcenonsd = false - - annotation_bakta_db_localpath = null - annotation_bakta_db_downloadtype = 'full' - annotation_bakta_mincontiglen = 1 - annotation_bakta_translationtable = 11 - annotation_bakta_gram = '?' - annotation_bakta_complete = false - annotation_bakta_renamecontigheaders = false - annotation_bakta_compliant = false - annotation_bakta_trna = false - annotation_bakta_tmrna = false - annotation_bakta_rrna = false - annotation_bakta_ncrna = false - annotation_bakta_ncrnaregion = false - annotation_bakta_crispr = false - annotation_bakta_skipcds = false - annotation_bakta_pseudo = false - annotation_bakta_skipsorf = false - annotation_bakta_gap = false - annotation_bakta_ori = false - annotation_bakta_activate_plot = false - - annotation_prokka_singlemode = false - annotation_prokka_rawproduct = false - annotation_prokka_kingdom = 'Bacteria' - annotation_prokka_gcode = 11 - annotation_prokka_cdsrnaolap = false - annotation_prokka_rnammer = false - annotation_prokka_mincontiglen = 1 - annotation_prokka_evalue = 1E-06 - annotation_prokka_coverage = 80 - annotation_prokka_compliant = true - annotation_prokka_addgenes = false - annotation_prokka_retaincontigheaders = false + annotation_tool = 'pyrodigal' + save_annotations = false + + annotation_prodigal_singlemode = false + annotation_prodigal_closed = false + annotation_prodigal_transtable = 11 + annotation_prodigal_forcenonsd = false + + annotation_pyrodigal_singlemode = false + annotation_pyrodigal_closed = false + annotation_pyrodigal_transtable = 11 + annotation_pyrodigal_forcenonsd = false + + annotation_bakta_db = null + annotation_bakta_db_downloadtype = 'full' + annotation_bakta_singlemode = false + annotation_bakta_mincontiglen = 1 + annotation_bakta_translationtable = 11 + annotation_bakta_gram = '?' + annotation_bakta_complete = false + annotation_bakta_renamecontigheaders = false + annotation_bakta_compliant = false + annotation_bakta_trna = false + annotation_bakta_tmrna = false + annotation_bakta_rrna = false + annotation_bakta_ncrna = false + annotation_bakta_ncrnaregion = false + annotation_bakta_crispr = false + annotation_bakta_skipcds = false + annotation_bakta_pseudo = false + annotation_bakta_skipsorf = false + annotation_bakta_gap = false + annotation_bakta_ori = false + annotation_bakta_activate_plot = false + + annotation_prokka_singlemode = false + annotation_prokka_rawproduct = false + annotation_prokka_kingdom = 'Bacteria' + annotation_prokka_gcode = 11 + annotation_prokka_cdsrnaolap = false + annotation_prokka_rnammer = false + annotation_prokka_mincontiglen = 1 + annotation_prokka_evalue = 0.000001 + annotation_prokka_coverage = 80 + annotation_prokka_compliant = true + annotation_prokka_addgenes = false + annotation_prokka_retaincontigheaders = false // Database downloading options - save_databases = false + save_db = false // AMP options - run_amp_screening = false - - amp_skip_amplify = false - - amp_skip_macrel = false - - amp_skip_ampir = false - amp_ampir_model = 'precursor' - amp_ampir_minlength = 10 - - amp_skip_hmmsearch = false - amp_hmmsearch_models = null - amp_hmmsearch_savealignments = false - amp_hmmsearch_savetargets = false - amp_hmmsearch_savedomains = false - - amp_ampcombi_db = null - amp_ampcombi_cutoff = 0 + run_amp_screening = false + + amp_skip_amplify = false + + amp_skip_macrel = false + + amp_skip_ampir = false + amp_ampir_model = 'precursor' + amp_ampir_minlength = 10 + + amp_run_hmmsearch = false + amp_hmmsearch_models = null + amp_hmmsearch_savealignments = false + amp_hmmsearch_savetargets = false + amp_hmmsearch_savedomains = false + + amp_ampcombi_db = null + amp_ampcombi_parsetables_cutoff = 0.6 + amp_ampcombi_parsetables_ampir = '.ampir.tsv' + amp_ampcombi_parsetables_amplify = '.amplify.tsv' + amp_ampcombi_parsetables_macrel = '.macrel.prediction' + amp_ampcombi_parsetables_hmmsearch = '.hmmer_hmmsearch.txt' + amp_ampcombi_parsetables_aalength = 100 + amp_ampcombi_parsetables_dbevalue = 5 + amp_ampcombi_parsetables_hmmevalue = 0.06 + amp_ampcombi_parsetables_windowstopcodon = 60 + amp_ampcombi_parsetables_windowtransport = 11 + amp_ampcombi_parsetables_removehitswostopcodons = false + amp_ampcombi_cluster_covmode = 0 + amp_ampcombi_cluster_mode = 1 + amp_ampcombi_cluster_coverage = 0.8 + amp_ampcombi_cluster_seqid = 0.4 + amp_ampcombi_cluster_sensitivity = 4.0 + amp_ampcombi_cluster_removesingletons = false + amp_ampcombi_cluster_minmembers = 0 // ARG options - run_arg_screening = false - - arg_skip_fargene = false - arg_fargene_hmmmodel = 'class_a,class_b_1_2,class_b_3,class_c,class_d_1,class_d_2,qnr,tet_efflux,tet_rpg,tet_enzyme' - arg_fargene_savetmpfiles = false - arg_fargene_minorflength = 90 - arg_fargene_score = null - arg_fargene_translationformat = 'pearson' - arg_fargene_orffinder = false - - arg_skip_rgi = false - arg_rgi_savejson = false - arg_rgi_savetmpfiles = false - arg_rgi_alignmenttool = 'BLAST' - arg_rgi_includeloose = true - arg_rgi_excludenudge = true - arg_rgi_lowquality = false - arg_rgi_data = 'NA' - - arg_skip_amrfinderplus = false - arg_amrfinderplus_db = null - arg_amrfinderplus_identmin = -1 - arg_amrfinderplus_coveragemin = 0.5 - arg_amrfinderplus_translationtable = 11 - arg_amrfinderplus_plus = false - arg_amrfinderplus_name = false - - arg_skip_deeparg = false - arg_deeparg_data = null - arg_deeparg_data_version = 2 // Make sure to update on module version bump! - arg_deeparg_model = 'LS' - arg_deeparg_minprob = 0.8 - arg_deeparg_alignmentidentity = 50 - arg_deeparg_alignmentevalue = 1E-10 - arg_deeparg_alignmentoverlap = 0.8 - arg_deeparg_numalignmentsperentry = 1000 - - arg_skip_abricate = false - arg_abricate_db = 'ncbi' - arg_abricate_minid = 80 - arg_abricate_mincov = 80 - - arg_hamronization_summarizeformat = 'tsv' + run_arg_screening = false + + arg_skip_fargene = false + arg_fargene_hmmmodel = 'class_a,class_b_1_2,class_b_3,class_c,class_d_1,class_d_2,qnr,tet_efflux,tet_rpg,tet_enzyme' + arg_fargene_savetmpfiles = false + arg_fargene_minorflength = 90 + arg_fargene_score = null + arg_fargene_translationformat = 'pearson' + arg_fargene_orffinder = false + + arg_skip_rgi = false + arg_rgi_db = null + arg_rgi_savejson = false + arg_rgi_savetmpfiles = false + arg_rgi_alignmenttool = 'BLAST' + arg_rgi_includeloose = false + arg_rgi_includenudge = false + arg_rgi_lowquality = false + arg_rgi_data = 'NA' + arg_rgi_split_prodigal_jobs = true + + arg_skip_amrfinderplus = false + arg_amrfinderplus_db = null + arg_amrfinderplus_identmin = -1 + arg_amrfinderplus_coveragemin = 0.5 + arg_amrfinderplus_translationtable = 11 + arg_amrfinderplus_plus = false + arg_amrfinderplus_name = false + + arg_skip_deeparg = false + arg_deeparg_db = null + arg_deeparg_db_version = 2 // Make sure to update on module version bump! + arg_deeparg_model = 'LS' + arg_deeparg_minprob = 0.8 + arg_deeparg_alignmentidentity = 50 + arg_deeparg_alignmentevalue = 1e-10 + arg_deeparg_alignmentoverlap = 0.8 + arg_deeparg_numalignmentsperentry = 1000 + + arg_skip_abricate = false + arg_abricate_db_id = 'ncbi' + arg_abricate_db = null + arg_abricate_minid = 80 + arg_abricate_mincov = 80 + + arg_hamronization_summarizeformat = 'tsv' + + arg_skip_argnorm = false // BGC options - run_bgc_screening = false - - bgc_skip_antismash = false - bgc_antismash_databases = null - bgc_antismash_installationdirectory = null - bgc_antismash_cbgeneral = false - bgc_antismash_cbknownclusters = false - bgc_antismash_cbsubclusters = false - bgc_antismash_smcogtrees = false - bgc_antismash_ccmibig = false - bgc_antismash_contigminlength = 1000 - bgc_antismash_hmmdetectionstrictness = 'relaxed' - bgc_antismash_taxon = 'bacteria' - bgc_antismash_sampleminlength = 1000 - - bgc_skip_deepbgc = false - bgc_deepbgc_database = null - bgc_deepbgc_score = 0.5 - bgc_deepbgc_prodigalsinglemode = false - bgc_deepbgc_mergemaxproteingap = 0 - bgc_deepbgc_mergemaxnuclgap = 0 - bgc_deepbgc_minnucl = 1 - bgc_deepbgc_minproteins = 1 - bgc_deepbgc_mindomains = 1 - bgc_deepbgc_minbiodomains = 0 - bgc_deepbgc_classifierscore = 0.5 - - bgc_skip_gecco = false - bgc_gecco_cds = 3 - bgc_gecco_threshold = 0.8 - bgc_gecco_pfilter = 1E-9 - bgc_gecco_edgedistance = 0 - bgc_gecco_mask = false - - bgc_skip_hmmsearch = false - bgc_hmmsearch_models = null - bgc_hmmsearch_savealignments = false - bgc_hmmsearch_savetargets = false - bgc_hmmsearch_savedomains = false - - // MultiQC options - multiqc_config = null - multiqc_title = null - multiqc_logo = null - max_multiqc_email_size = '25.MB' - multiqc_methods_description = null - - // Boilerplate options - outdir = null - publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - hook_url = null - help = false - version = false + run_bgc_screening = false + + bgc_mincontiglength = 3000 + bgc_savefilteredcontigs = false + + bgc_skip_antismash = false + bgc_antismash_db = null + bgc_antismash_installdir = null + bgc_antismash_cbgeneral = false + bgc_antismash_cbknownclusters = false + bgc_antismash_cbsubclusters = false + bgc_antismash_smcogtrees = false + bgc_antismash_ccmibig = false + bgc_antismash_contigminlength = 3000 + bgc_antismash_hmmdetectionstrictness = 'relaxed' + bgc_antismash_pfam2go = false + bgc_antismash_rre = false + bgc_antismash_taxon = 'bacteria' + bgc_antismash_tfbs = false + + bgc_skip_deepbgc = false + bgc_deepbgc_db = null + bgc_deepbgc_score = 0.5 + bgc_deepbgc_prodigalsinglemode = false + bgc_deepbgc_mergemaxproteingap = 0 + bgc_deepbgc_mergemaxnuclgap = 0 + bgc_deepbgc_minnucl = 1 + bgc_deepbgc_minproteins = 1 + bgc_deepbgc_mindomains = 1 + bgc_deepbgc_minbiodomains = 0 + bgc_deepbgc_classifierscore = 0.5 + + bgc_skip_gecco = false + bgc_gecco_cds = 3 + bgc_gecco_threshold = 0.8 + bgc_gecco_pfilter = 0.000000001 + bgc_gecco_edgedistance = 0 + bgc_gecco_mask = false + + bgc_run_hmmsearch = false + bgc_hmmsearch_models = null + bgc_hmmsearch_savealignments = false + bgc_hmmsearch_savetargets = false + bgc_hmmsearch_savedomains = false // Config options - config_profile_name = null - config_profile_description = null - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_contact = null - config_profile_url = null + config_profile_name = null + config_profile_description = null + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null // Max resource options // Defaults only, expecting to be overwritten - max_memory = '128.GB' - max_cpus = 16 - max_time = '240.h' + max_memory = '128.GB' + max_cpus = 16 + max_time = '240.h' // Schema validation default options - validationFailUnrecognisedParams = false - validationLenientMode = false - validationSchemaIgnoreParams = 'genomes,igenomes_base' - validationShowHiddenParams = false - validate_params = true + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = 'genomes,igenomes_base,fasta,monochromeLogs' + validationShowHiddenParams = false + validate_params = true } @@ -227,110 +270,124 @@ try { } // Load nf-core/funcscan custom profiles from different institutions. -// Warning: Uncomment only if a pipeline-specific instititutional config already exists on nf-core/configs! try { - includeConfig "${params.custom_config_base}/pipeline/funcscan.config" + includeConfig "${params.custom_config_base}/pipeline/funcscan.config" } catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config/funcscan profiles: ${params.custom_config_base}/pipeline/funcscan.config") + System.err.println("WARNING: Could not load nf-core/config/funcscan profiles: ${params.custom_config_base}/pipeline/funcscan.config") } profiles { debug { - dumpHashes = true - process.beforeScript = 'echo $HOSTNAME' - cleanup = false + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false + nextflow.enable.configProcessNamesValidation = true } conda { - conda.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + conda.channels = ['conda-forge', 'bioconda', 'defaults'] + apptainer.enabled = false } mamba { - conda.enabled = true - conda.useMamba = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } docker { - conda.enabled = false - docker.enabled = true - docker.userEmulation = true - conda.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { - singularity.enabled = true - singularity.autoMounts = true - conda.enabled = false - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + singularity.enabled = true + singularity.autoMounts = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } podman { - podman.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } shifter { - shifter.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } charliecloud { - charliecloud.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - apptainer.enabled = false + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false } apptainer { - apptainer.enabled = true - apptainer.autoMounts = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + wave { + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' } gitpod { - executor.name = 'local' - executor.cpus = 4 - executor.memory = 8.GB + executor.name = 'local' + executor.cpus = 4 + executor.memory = 8.GB } - test { includeConfig 'conf/test.config' } - test_bgc { includeConfig 'conf/test_bgc.config' } - test_full { includeConfig 'conf/test_full.config' } - test_deeparg { includeConfig 'conf/test_deeparg.config' } - test_nothing { includeConfig 'conf/test_nothing.config' } + test { includeConfig 'conf/test.config' } + test_bakta { includeConfig 'conf/test_bakta.config' } + test_prokka { includeConfig 'conf/test_prokka.config' } + test_bgc_bakta { includeConfig 'conf/test_bgc_bakta.config' } + test_bgc_prokka { includeConfig 'conf/test_bgc_prokka.config' } + test_bgc_pyrodigal { includeConfig 'conf/test_bgc_pyrodigal.config' } + test_taxonomy_bakta { includeConfig 'conf/test_taxonomy_bakta.config' } + test_taxonomy_prokka { includeConfig 'conf/test_taxonomy_prokka.config' } + test_taxonomy_pyrodigal { includeConfig 'conf/test_taxonomy_pyrodigal.config' } + test_full { includeConfig 'conf/test_full.config' } + test_nothing { includeConfig 'conf/test_nothing.config' } + test_preannotated { includeConfig 'conf/test_preannotated.config' } + test_preannotated_bgc { includeConfig 'conf/test_preannotated_bgc.config' } } // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile @@ -346,13 +403,6 @@ plugins { id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet } -// Load igenomes.config if required -if (!params.igenomes_ignore) { - includeConfig 'conf/igenomes.config' -} else { - params.genomes = [:] -} - // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -367,6 +417,9 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false + def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true @@ -392,7 +445,7 @@ manifest { description = """Pipeline for screening for functional components of assembled contigs""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '1.1.5' + version = '2.0.0' doi = '10.5281/zenodo.7643099' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 60f5e16f..ca01d496 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -16,11 +16,11 @@ "type": "string", "format": "file-path", "exists": true, + "schema": "assets/schema_input.json", "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", - "schema": "assets/schema_input.json", - "description": "Path to comma-separated file containing information sample names and paths to corresponding FASTA files.", - "help_text": "Before running the pipeline, you will need to create a design file with information about the samples to be scanned by nf-core/funcscan, containing `sample name` and `path/to/your/contigs.fasta`. Use this parameter to specify its location. It has to be a comma-separated file with 2 columns, and a header row (`sample, fasta`). See [usage docs](https://nf-co.re/funcscan/usage#samplesheet-input).", + "description": "Path to comma-separated file containing sample names and paths to corresponding FASTA files, and optional annotation files.", + "help_text": "Before running the pipeline, you will need to create a design file with information about the samples to be scanned by nf-core/funcscan, containing at a minimum sample names and paths to contigs. Use this parameter to specify its location. It has to be a two or four column comma-separated file with a header row (`sample,fasta` or `sample,fasta,protein,gbk`). See [usage docs](https://nf-co.re/funcscan/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" }, "outdir": { @@ -45,14 +45,14 @@ "help_text": "" }, "screening_type_activation": { - "title": "Screening Type Activation", + "title": "Screening type activation", "type": "object", "description": "These parameters influence which workflow (ARG, AMP and/or BGC) to activate.", "default": "", "properties": { "run_amp_screening": { "type": "boolean", - "description": "Activate antimicrobial peptide screening tools.", + "description": "Activate antimicrobial peptide genes screening tools.", "fa_icon": "fas fa-check-circle" }, "run_arg_screening": { @@ -66,10 +66,124 @@ "fa_icon": "fas fa-check-circle" } }, - "fa_icon": "fas fa-network-wired" + "fa_icon": "fa fa-list-ol" + }, + "taxonomic_classification_general_options": { + "title": "Taxonomic classification: general options", + "type": "object", + "description": "These options influence whether to activate the taxonomic classification of the input nucleotide sequences.", + "default": "", + "properties": { + "run_taxa_classification": { + "type": "boolean", + "description": "Activates the taxonomic classification of input nucleotide sequences.", + "help_text": "This flag turns on the taxonomic classification of input nucleotide sequences. The taxonomic annotations should be turned on if the input metagenomes' bacterial sources are unknown, which can help identify the source of the AMP, BGC or ARG hit obtained for laboratory experiments. This flag should be turned off (which is by default) if the input nucleotide sequences represent a single known genome or *nf-core/mag* was run beforehand. Turning on this flag relatively decreases the pipeline speed and requires >8GB RAM. Due to the size of the resulting table, the final summary is in a zipped format.", + "fa_icon": "fas fa-check-circle" + }, + "taxa_classification_tool": { + "type": "string", + "default": "mmseqs2", + "help_text": "This flag specifies which tool for taxonomic classification should be activated. At the moment only 'MMseqs2' is incorporated in the pipeline.", + "description": "Specifies the tool used for taxonomic classification.", + "fa_icon": "fas fa-tools" + } + }, + "fa_icon": "fas fa-tag" + }, + "taxonomic_classification_mmseqs2_databases": { + "title": "Taxonomic classification: MMseqs2 databases", + "type": "object", + "description": "These parameters influence the database to be used in classifying the taxonomy.", + "default": "", + "properties": { + "taxa_classification_mmseqs_db": { + "type": "string", + "description": "Specify a path to MMseqs2-formatted database.", + "help_text": "Specify a path to a database that is prepared in MMseqs2 format as detailed in the [documentation](https://mmseqs.com/latest/userguide.pdf).\n\nThe contents of the directory should have files such as `.version` and `.taxonomy` in the top level.", + "fa_icon": "fas fa-database" + }, + "taxa_classification_mmseqs_db_id": { + "type": "string", + "default": "Kalamari", + "help_text": "Specify which MMseqs2-formatted database to use to classify the input contigs. This can be a nucleotide or amino acid database that includes taxonomic classifications. For example, both GTDB (an amico acid database) and SILVA (a nucleotide database) are supported by MMseqs2. More details can be found in the [documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - mmseqs databases ", + "description": "Specify the label of the database to be used.", + "fa_icon": "fas fa-address-card" + }, + "taxa_classification_mmseqs_db_savetmp": { + "type": "boolean", + "help_text": "This flag saves the temporary files from downloading the database and formatting it in the MMseqs2 format into the output folder. More details can be found in the [documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - mmseqs databases: `--remove-tmp-files`", + "description": "Specify whether the temporary files should be saved.", + "fa_icon": "fas fa-save" + } + }, + "fa_icon": "fas fa-tag" + }, + "taxonomic_classification_mmseqs2_taxonomy": { + "title": "Taxonomic classification: MMseqs2 taxonomy", + "type": "object", + "description": "These parameters influence the taxonomic classification step.", + "default": "", + "properties": { + "taxa_classification_mmseqs_taxonomy_savetmp": { + "type": "boolean", + "help_text": "This flag saves the temporary files from creating the taxonomy database and the final `tsv` file into the output folder. More details can be found in the [documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - mmseqs taxonomy: `--remove-tmp-files`", + "description": "Specify whether to save the temporary files.", + "fa_icon": "fas fa-save" + }, + "taxa_classification_mmseqs_taxonomy_searchtype": { + "type": "integer", + "default": 2, + "help_text": "Specify the type of alignment to be carried out between the query database and the reference MMseqs2 database. This can be set to '0' for automatic detection, '1' for amino acid alignment, '2' for translating the inputs and running the alignment on the translated sequences, '3' nucleotide based alignment and '4' for the translated nucleotide sequences alignment. More details can be found in the [documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - mmseqs taxonomy: `--search-type`", + "description": "Specify the alignment type between database and query.", + "fa_icon": "fas fa-align-center" + }, + "taxa_classification_mmseqs_taxonomy_lcaranks": { + "type": "string", + "default": "kingdom,phylum,class,order,family,genus,species", + "help_text": "Specify the taxonomic ranks to include in the taxonomic lineage column in the final `.tsv` file. For example, 'kingdom,phylum,class,order,family,genus,species'. More details can be found in the [documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - mmseqs taxonomy: `--lca-ranks`", + "description": "Specify the taxonomic levels to display in the result table.", + "fa_icon": "fas fa-stream" + }, + "taxa_classification_mmseqs_taxonomy_taxlineage": { + "type": "integer", + "default": 1, + "help_text": "This flag specifies whether the taxonomic lineage should be included in the output `.tsv` file. The taxonomic lineage is obtained from the internal module of `mmseqs/taxonomy` that infers the last common ancestor to classify the taxonomy. A value of '0' writes no taxonomic lineage, a value of '1' adds a column with the full lineage names prefixed with abbreviation of the lineage level, e.g. `k_Prokaryotes;p_Bacteroidetes;c_....;o_....;f_....;g_....;s_....,` while a value of '2' adds a column with the full NCBI taxids lineage,e.g. `1324;2345;4546;5345`. More details can be found in the [documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - mmseqs taxonomy: `--tax-lineage`", + "description": "Specify whether to include or remove the taxonomic lineage.", + "fa_icon": "fab fa-audible" + }, + "taxa_classification_mmseqs_taxonomy_sensitivity": { + "type": "string", + "default": "5.0", + "help_text": "This flag specifies the speed and sensitivity of the taxonomic search. It stands for how many kmers should be produced during the preliminary seeding stage. A very fast search requires a low value e.g. '1.0' and a a very sensitive search requires e.g. '7.0'. More details can be found in the [documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - mmseqs taxonomy: `--s`", + "description": "Specify the speed and sensitivity for taxonomy assignment.", + "fa_icon": "fas fa-history" + }, + "taxa_classification_mmseqs_taxonomy_orffilters": { + "type": "string", + "default": "2.0", + "help_text": "This flag specifies the sensitivity used for prefiltering the query ORF. Before the taxonomy-assigning step, MMseqs2 searches the predicted ORFs against the provided database. This value influences the speed with which the search is carried out. More details can be found in the [documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - mmseqs taxonomy: `--orf-filter-s`", + "description": "Specify the ORF search sensitivity in the prefilter step.", + "fa_icon": "fas fa-history" + }, + "taxa_classification_mmseqs_taxonomy_lcamode": { + "type": "integer", + "default": 3, + "help_text": "This flag specifies the strategy used for assigning the last common ancestor (LCA). MMseqs2 assigns taxonomy based on an accelerated approximation of the 2bLCA protocol and uses the value of '3'. In this mode, the taxonomic assignment is based not only on usual alignment parameters but also considers the taxonomic classification of the LCA. When the value '4' is used the LCA is assigned based on all the equal scoring top hits. If the value '1' is used the LCA assignment is disregarded and the taxonomic assignment is based on usual alignment parameters like E-value and coverage. More details can be found in the [documentation](https://mmseqs.com/latest/userguide.pdf). \n\n> Modifies tool parameter(s):\n> - mmseqs taxonomy: `--lca-mode`", + "description": "Specify the mode to assign the taxonomy.", + "fa_icon": "fas fa-broom" + }, + "taxa_classification_mmseqs_taxonomy_votemode": { + "type": "integer", + "default": 1, + "help_text": "This flag assigns the mode value with which the weights are computed. The value of '0' stands for uniform weights of taxonomy assignments, the value of '1' uses the minus log E-value and '2' the actual score. More details can be found in the [documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - mmseqs taxonomy: `--vote-mode`", + "description": "Specify the weights of the taxonomic assignment.", + "fa_icon": "fas fa-balance-scale-right" + } + }, + "fa_icon": "fas fa-tag" }, - "annotation": { - "title": "Annotation", + "annotation_general_options": { + "title": "Annotation: general options", "type": "object", "description": "These options influence the generation of annotation files required for downstream steps in ARG, AMP, and BGC workflows.", "default": "", @@ -87,26 +201,34 @@ "fa_icon": "fas fa-save" } }, - "fa_icon": "fas fa-file-signature" + "fa_icon": "fas fa-file-signature", + "help_text": "" }, "annotation_bakta": { "title": "Annotation: BAKTA", "type": "object", - "description": "These parameters influence the annotation algorithm of Bacteria used by BAKTA.", + "description": "BAKTA is a tool developed to annotate bacterial genomes and plasmids from both isolates and MAGs. More info: https://github.com/oschwengers/bakta", "default": "", "properties": { - "annotation_bakta_db_localpath": { + "annotation_bakta_db": { "type": "string", "fa_icon": "fas fa-database", - "description": "Specify a path to BAKTA database.", - "help_text": "Specify a path to a database that is prepared in a BAKTA format." + "description": "Specify a path to a local copy of a BAKTA database.", + "help_text": "If a local copy of a BAKTA database exists, specify the path to that database which is prepared in a BAKTA format. Otherwise this will be downloaded for you.\n\nThe contents of the directory should have files such as `*.dmnd` in the top level." }, "annotation_bakta_db_downloadtype": { "type": "string", "description": "Download full or light version of the Bakta database if not supplying own database.", - "help_text": "If you want the pipeline to download the Bakta database for you, you can choose between the full (33.1 GB) and light (1.3 GB) version. The full version is generally recommended for best annotation results, because it contains all of these:\n\n- UPS: unique protein sequences identified via length and MD5 hash digests (100% coverage & 100% sequence identity)\n- IPS: identical protein sequences comprising seeds of UniProt's UniRef100 protein sequence clusters\n- PSC: protein sequences clusters comprising seeds of UniProt's UniRef90 protein sequence clusters\n- PSCC: protein sequences clusters of clusters comprising annotations of UniProt's UniRef50 protein sequence clusters\n\nIf download bandwidth, storage, memory, or run duration requirements become an issue, go for the light version (which only contains PSCCs) by modifying the `annotation_bakta_db_downloadtype` flag.\nMore details can be found in the [documentation](https://github.com/oschwengers/bakta#database)\n\n> Modifies tool parameter(s):\n> - BAKTA_DBDOWNLOAD: `--type`", + "help_text": "If you want the pipeline to download the Bakta database for you, you can choose between the full (33.1 GB) and light (1.3 GB) version. The full version is generally recommended for best annotation results, because it contains all of these:\n\n- UPS: unique protein sequences identified via length and MD5 hash digests (100% coverage & 100% sequence identity)\n- IPS: identical protein sequences comprising seeds of UniProt's UniRef100 protein sequence clusters\n- PSC: protein sequences clusters comprising seeds of UniProt's UniRef90 protein sequence clusters\n- PSCC: protein sequences clusters of clusters comprising annotations of UniProt's UniRef50 protein sequence clusters\n\nIf download bandwidth, storage, memory, or run duration requirements become an issue, go for the light version (which only contains PSCCs) by modifying the `annotation_bakta_db_downloadtype` flag.\n\nMore details can be found in the [documentation](https://github.com/oschwengers/bakta#database)\n\n> Modifies tool parameter(s):\n> - BAKTA_DBDOWNLOAD: `--type`", "fa_icon": "fas fa-database", - "enum": ["full", "light"] + "enum": ["full", "light"], + "default": "full" + }, + "annotation_bakta_singlemode": { + "type": "boolean", + "description": "Use the default genome-length optimised mode (rather than the metagenome mode).", + "help_text": "By default, Bakta's `--meta` mode is used in the pipeline to improve the gene prediction of highly fragmented metagenomes.\n\nBy specifying this parameter Bakta will instead use its default mode that is optimised for singular 'complete' genome sequences.\n\nMore details can be found in the [documentation](https://github.com/oschwengers/bakta/blob/main/README.md#usage).\n\n> Modifies tool parameter(s):\n> - BAKTA: `--meta`", + "fa_icon": "fas fa-dna" }, "annotation_bakta_mincontiglen": { "type": "integer", @@ -136,20 +258,20 @@ "annotation_bakta_complete": { "type": "boolean", "description": "Specify that all contigs are complete replicons.", - "help_text": "This flag expects contigs that make up complete chromosomes and/or plasmids. By calling it, the user ensured that the contigs are complete replicons. More details can be found in the [documentation](https://github.com/oschwengers/bakta/blob/main/README.md#usage).\n\n> Modifies tool parameter(s):\n> - BAKTA: `--complete`", + "help_text": "This flag expects contigs that make up complete chromosomes and/or plasmids. By calling it, the user ensures that the contigs are complete replicons. More details can be found in the [documentation](https://github.com/oschwengers/bakta/blob/main/README.md#usage).\n\n> Modifies tool parameter(s):\n> - BAKTA: `--complete`", "fa_icon": "far fa-circle" }, "annotation_bakta_renamecontigheaders": { "type": "boolean", "description": "Changes the original contig headers.", "help_text": "This flag specifies that the contig headers should be rewritten. More details can be found in the [documentation](https://github.com/oschwengers/bakta/blob/main/README.md#usage).\n\n> Modifies tool parameter(s):\n> - BAKTA: `--keep-contig-headers`", - "fa_icon": "far fa-list-alt" + "fa_icon": "fas fa-remove-format" }, "annotation_bakta_compliant": { "type": "boolean", "description": "Clean the result annotations to standardise them to Genbank/ENA conventions.", - "help_text": "The resulting annotations are cleaned up to standardise them to Genbank/ENA/DDJB conventions. CDS without any attributed hits and those without gene symbols or product descriptions different from hypothetical will be marked as 'hypothetical'.\nWhen activated the '--min-contig-length' will be set to 200. More info can be found [here](https://github.com/oschwengers/bakta).\n\n> Modifies tool parameter(s):\n> - BAKTA: `--compliant`", - "fa_icon": "fas fa-check" + "help_text": "The resulting annotations are cleaned up to standardise them to Genbank/ENA/DDJB conventions. CDS without any attributed hits and those without gene symbols or product descriptions different from hypothetical will be marked as 'hypothetical'.\nWhen activated the `--min-contig-length` will be set to 200. More info can be found [here](https://github.com/oschwengers/bakta).\n\n> Modifies tool parameter(s):\n> - BAKTA: `--compliant`", + "fa_icon": "fas fa-check-circle" }, "annotation_bakta_trna": { "type": "boolean", @@ -172,7 +294,7 @@ "annotation_bakta_ncrna": { "type": "boolean", "description": "Activate ncRNA detection & annotation.", - "help_text": "This flag activates [Infernal vs. Rfam ncRNA covariance models](http://eddylab.org/infernal/) that predicts ncRNA genes.\nBAKTA distinguishes between ncRNA genes and (cis-regulatory) regions to enable the distinction of feature overlap detection.\nThis including distinguishing between ncRNA gene types: sRNA, antisense, ribozyme and antitoxin. More details can be found in the [documentation](https://github.com/oschwengers/bakta/blob/main/README.md#usage).\n\n> Modifies tool parameter(s):\n> - BAKTA: `--ncrna`", + "help_text": "This flag activates [Infernal vs. Rfam ncRNA covariance models](http://eddylab.org/infernal/) that predicts ncRNA genes.\nBAKTA distinguishes between ncRNA genes and (cis-regulatory) regions to enable the distinction of feature overlap detection.\nThis includes distinguishing between ncRNA gene types: sRNA, antisense, ribozyme and antitoxin. More details can be found in the [documentation](https://github.com/oschwengers/bakta/blob/main/README.md#usage).\n\n> Modifies tool parameter(s):\n> - BAKTA: `--ncrna`", "fa_icon": "fas fa-forward" }, "annotation_bakta_ncrnaregion": { @@ -190,13 +312,13 @@ "annotation_bakta_skipcds": { "type": "boolean", "description": "Skip CDS detection & annotation.", - "help_text": "This flag skips CDS prediction that is done by [PYRODIGAL](https://github.com/althonos/pyrodigal) with which the distinct prediction for complete replicons and uncompleted contigs is done.\nFor more information on how BAKTA predicts CDS please refer to BAKTA [documentation](https://github.com/oschwengers/bakta).\n\n> Modifies tool parameter(s):\n> - BAKTA: `--skip-cds`", + "help_text": "This flag skips CDS prediction that is done by [PYRODIGAL](https://github.com/althonos/pyrodigal) with which the distinct prediction for complete replicons and uncompleted contigs is done.\nFor more information on how BAKTA predicts CDS please refer to the BAKTA [documentation](https://github.com/oschwengers/bakta).\n\n> Modifies tool parameter(s):\n> - BAKTA: `--skip-cds`", "fa_icon": "fas fa-forward" }, "annotation_bakta_pseudo": { "type": "boolean", "description": "Activate pseudogene detection & annotation.", - "help_text": "This flag activates the search for reference Phytochelatin Synthase genes (PCSs) using hypothetical CDS as seed sequences, then aligns the translated PCSs against up-/downstream-elongated CDS regions. For more info refer to BAKTA [documentation](https://github.com/oschwengers/bakta). \n\n> Modifies tool parameter(s):\n> - BAKTA: `--skip-pseudo`", + "help_text": "This flag activates the search for reference Phytochelatin Synthase genes (PCSs) using 'hypothetical' CDS as seed sequences, then aligns the translated PCSs against up-/downstream-elongated CDS regions. More details can be found in the BAKTA [documentation](https://github.com/oschwengers/bakta). \n\n> Modifies tool parameter(s):\n> - BAKTA: `--skip-pseudo`", "fa_icon": "fas fa-forward" }, "annotation_bakta_skipsorf": { @@ -208,7 +330,7 @@ "annotation_bakta_gap": { "type": "boolean", "description": "Activate gap detection & annotation.", - "help_text": "Activates any gene annotation found within contig assembly gaps. For more info. please refer to BAKTA [documentation](https://github.com/oschwengers/bakta). \n\n> Modifies tool parameter(s):\n> - BAKTA: `--skip-gap`", + "help_text": "Activates any gene annotation found within contig assembly gaps. More details can be found in the BAKTA [documentation](https://github.com/oschwengers/bakta). \n\n> Modifies tool parameter(s):\n> - BAKTA: `--skip-gap`", "fa_icon": "fas fa-forward" }, "annotation_bakta_ori": { @@ -224,33 +346,32 @@ "help_text": "Activate this flag to generate genome plots (might be memory-intensive).\n\n> Modifies tool parameter(s):\n> - BAKTA: `--skip-plot`" } }, - "fa_icon": "fas fa-file-signature", - "help_text": "BAKTA is a tool developed to annotate bacterial genomes and plasmids from both isolates and MAGs. \n\nDocumentation: [https://github.com/oschwengers/bakta](https://github.com/oschwengers/bakta)" + "fa_icon": "fas fa-file-signature" }, "annotation_prokka": { "title": "Annotation: Prokka", "type": "object", - "description": "These parameters influence the annotation algorithm used by Prokka.", + "description": "Prokka annotates genomic sequences belonging to bacterial, archaeal and viral genomes. More info: https://github.com/tseemann/prokka", "default": "", "properties": { "annotation_prokka_singlemode": { "type": "boolean", "description": "Use the default genome-length optimised mode (rather than the metagenome mode).", - "help_text": "By default, Prokka's --metagenome mode is used in the pipeline to improve the gene prediction of highly fragmented metagenomes.\n\nBy specifying this parameter Prokka will instead use it's default mode that is optimised for singular 'complete' genome sequences.\n\nFor more information, please check Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--metagenome`", + "help_text": "By default, Prokka's `--metagenome` mode is used in the pipeline to improve the gene prediction of highly fragmented metagenomes.\n\nBy specifying this parameter Prokka will instead use its default mode that is optimised for singular 'complete' genome sequences.\n\nFor more information, please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--metagenome`", "fa_icon": "fas fa-braille" }, "annotation_prokka_rawproduct": { "type": "boolean", "description": "Suppress the default clean-up of the gene annotations.", - "help_text": "By default, annotation in Prokka is carried out by alignment to other proteins in its database, or the databases the user provides via the tools `--proteins` flag. The resulting annotations are then cleaned up to standardise them to Genbank/ENA conventions.\n'Vague names' are set to 'hypothetical proteins', 'possible/probable/predicted' are set to 'putative' and 'EC/CPG and locus tag ids' are removed.\n\nBy supplying this flag you stop such clean up leaving the original annotation names.\n\nFor more information please check Prokka [documentation](https://github.com/tseemann/prokka).\n\nThis flag suppresses this default behavior of Prokka (which is to perform the cleaning).\n\n> Modifies tool parameter(s):\n> - Prokka: `--rawproduct`", + "help_text": "By default, annotation in Prokka is carried out by alignment to other proteins in its database, or the databases the user provides via the tools `--proteins` flag. The resulting annotations are then cleaned up to standardise them to Genbank/ENA conventions.\n'Vague names' are set to 'hypothetical proteins', 'possible/probable/predicted' are set to 'putative' and 'EC/CPG and locus tag ids' are removed.\n\nBy supplying this flag you stop such clean up leaving the original annotation names.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\nThis flag suppresses this default behavior of Prokka (which is to perform the cleaning).\n\n> Modifies tool parameter(s):\n> - Prokka: `--rawproduct`", "fa_icon": "fab fa-product-hunt" }, "annotation_prokka_kingdom": { "type": "string", "default": "Bacteria", - "fa_icon": "fab fa-accusoft", + "fa_icon": "fas fa-crown", "description": "Specify the kingdom that the input represents.", - "help_text": "Specifies the kingdom that the input sample is derived from and/or you wish to screen for\n\n> ⚠️ Prokka cannot annotate Eukaryotes.\n\nFor more information please check Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--kingdom`", + "help_text": "Specifies the kingdom that the input sample is derived from and/or you wish to screen for\n\n> \u26a0\ufe0f Prokka cannot annotate Eukaryotes.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--kingdom`", "enum": ["Archaea", "Bacteria", "Mitochondria", "Viruses"] }, "annotation_prokka_gcode": { @@ -259,28 +380,28 @@ "minimum": 0, "maximum": 25, "description": "Specify the translation table used to annotate the sequences.", - "help_text": "Specify the translation table used to annotate the sequences. All possible genetic codes (1-25) used for gene annotation can be found [here](https://en.wikipedia.org/wiki/List_of_genetic_codes). This flag is required if the flag `--kingdom` is assigned.\n\nFor more information please check Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--gcode`", - "fa_icon": "fas fa-border-none" + "help_text": "Specify the translation table used to annotate the sequences. All possible genetic codes (1-25) used for gene annotation can be found [here](https://en.wikipedia.org/wiki/List_of_genetic_codes). This flag is required if the flag `--kingdom` is assigned.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--gcode`", + "fa_icon": "fas fa-border-all" }, "annotation_prokka_mincontiglen": { "type": "integer", "default": 1, "description": "Minimum contig size required for annotation (bp).", - "help_text": "Specify the minimum contig lengths to carry out annotations on. The Prokka developers recommend that this should be >= 200 bp, if you plan to submit such annotations to NCBI.\n\nFor more information please check Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--mincontiglen`", + "help_text": "Specify the minimum contig lengths to carry out annotations on. The Prokka developers recommend that this should be \u2265 200 bp, if you plan to submit such annotations to NCBI.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--mincontiglen`", "fa_icon": "fas fa-ruler-horizontal" }, "annotation_prokka_evalue": { "type": "number", - "default": 0.000001, - "description": "Minimum e-value cut-off.", - "help_text": "Specifiy the minimum e-value used for filtering the alignment hits.\n\nFor more information please check Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--evalue`", + "default": 1e-6, + "description": "E-value cut-off.", + "help_text": "Specifiy the maximum E-value used for filtering the alignment hits.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--evalue`", "fa_icon": "fas fa-sort-amount-down" }, "annotation_prokka_coverage": { "type": "integer", "default": 80, "description": "Set the assigned minimum coverage.", - "help_text": "Specify the minimum coverage percent of the annotated genome. This must be set between 0-100.\n\nFor more information please check Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--coverage`", + "help_text": "Specify the minimum coverage percent of the annotated genome. This must be set between 0-100.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--coverage`", "fa_icon": "fas fa-align-right", "minimum": 0, "maximum": 100 @@ -288,118 +409,116 @@ "annotation_prokka_cdsrnaolap": { "type": "boolean", "description": "Allow transfer RNA (trRNA) to overlap coding sequences (CDS).", - "help_text": "Allow transfer RNA (trRNA) to overlap coding sequences (CDS). Transfer RNAs are short stretches of nucleotide sequences that link mRNA and the amino acid sequence of proteins. Their presence helps in the annotation of the sequences, because each trRNA can only be attached to one type of amino acid.\n\nFor more information please check Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--cdsrnaolap`", + "help_text": "Allow transfer RNA (trRNA) to overlap coding sequences (CDS). Transfer RNAs are short stretches of nucleotide sequences that link mRNA and the amino acid sequence of proteins. Their presence helps in the annotation of the sequences, because each trRNA can only be attached to one type of amino acid.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--cdsrnaolap`", "fa_icon": "fas fa-align-justify" }, "annotation_prokka_rnammer": { "type": "boolean", "description": "Use RNAmmer for rRNA prediction.", - "help_text": "Activates [RNAmmer](https://services.healthtech.dtu.dk/service.php?RNAmmer-1.2) instead of the Prokka default [Barrnap](https://github.com/tseemann/barrnap) for rRNA prediction during the annotation process. RNAmmer classifies ribosomal RNA genes in genome sequences by using two levels of Hidden Markov Models. Barrnap uses the nhmmer tool that includes HMMER 3.1 for HMM searching in RNA:DNA style.\n\nFor more information please check Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--rnammer`", - "fa_icon": "fas fa-adjust" + "help_text": "Activates [RNAmmer](https://services.healthtech.dtu.dk/service.php?RNAmmer-1.2) instead of the Prokka default [Barrnap](https://github.com/tseemann/barrnap) for rRNA prediction during the annotation process. RNAmmer classifies ribosomal RNA genes in genome sequences by using two levels of Hidden Markov Models. Barrnap uses the nhmmer tool that includes HMMER 3.1 for HMM searching in RNA:DNA style.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--rnammer`", + "fa_icon": "fas fa-cogs" }, "annotation_prokka_compliant": { "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Force contig name to Genbank/ENA/DDJB naming rules.", - "help_text": "Force the contig headers to conform to the Genbank/ENA/DDJB contig header standards. This is activated in combination with `--centre [X]` when contig headers supplied by the user are non-conforming and therefore need to be renamed before Prokka can start annotation. This flag activates `--genes --mincontiglen 200`. For more information please check Prokka [documentation](https://github.com/tseemann/prokka). \n\n> Modifies tool parameter(s):\n> - Prokka: `--compliant`" + "help_text": "Force the contig headers to conform to the Genbank/ENA/DDJB contig header standards. This is activated in combination with `--centre [X]` when contig headers supplied by the user are non-conforming and therefore need to be renamed before Prokka can start annotation. This flag activates `--genes --mincontiglen 200`. For more information please check the Prokka [documentation](https://github.com/tseemann/prokka). \n\n> Modifies tool parameter(s):\n> - Prokka: `--compliant`", + "default": true }, "annotation_prokka_addgenes": { "type": "boolean", "fa_icon": "fas fa-dna", "description": "Add the gene features for each CDS hit.", - "help_text": "For every CDS annotated, this flag adds the gene that encodes for that CDS region. For more information please check Prokka [documentation](https://github.com/tseemann/prokka). \n\n> Modifies tool parameter(s):\n> - Prokka: `--addgenes`" + "help_text": "For every CDS annotated, this flag adds the gene that encodes for that CDS region. For more information please check the Prokka [documentation](https://github.com/tseemann/prokka). \n\n> Modifies tool parameter(s):\n> - Prokka: `--addgenes`" }, "annotation_prokka_retaincontigheaders": { "type": "boolean", - "fa_icon": "fas fa-remove-format", + "fa_icon": "fas fa-font", "help_text": "This parameter allows prokka to retain the original contig names by activating `PROKKA`'s `--force` flag. If this parameter is set to `false` it activates `PROKKA`'s flags `--locus-tag PROKKA --centre CENTER` so the locus tags (contig names) will be PROKKA_# and the center tag will be CENTER. By default `PROKKA` changes contig headers to avoid errors that might rise due to long contig headers, so this must be turned on if the user has short contig names that should be retained by `PROKKA`. \n\n> Modifies tool parameter(s):\n> - Prokka: `--locus-tag PROKKA --centre CENTER`\n> - Prokka: `--force`", "description": "Retains contig names." } }, - "fa_icon": "fas fa-tools", - "help_text": "Prokka annotates genomic sequences belonging to bacterial, archaeal and viral genomes.\n\nDocumentation: https://github.com/tseemann/prokka" + "fa_icon": "fas fa-file-signature" }, "annotation_prodigal": { "title": "Annotation: Prodigal", "type": "object", - "description": "These parameters influence the annotation algorithm used by Prodigal.", + "description": "Prodigal is a protein-coding gene prediction tool developed to run on bacterial and archaeal genomes. More info: https://github.com/hyattpd/prodigal/wiki", "default": "", "properties": { "annotation_prodigal_singlemode": { "type": "boolean", "description": "Specify whether to use Prodigal's single-genome mode for long sequences.", - "help_text": "By default Prodigal runs in 'single genome' mode that requires sequence lengths to be equal or longer than 20000 characters.\n\nHowever, more fragmented reads from MAGs often result in contigs shorter than this. Therefore, nf-core/funcscan will run with the `meta` mode by default, but providing this parameter allows to override this and run in single genome mode again.\n\nFor more information check Prodigal [documentation](https://github.com/hyattpd/prodigal/wiki).\n\n> Modifies tool parameter(s): \n> -PRODIGAL: `-p`", + "help_text": "By default Prodigal runs in 'single genome' mode that requires sequence lengths to be equal or longer than 20000 characters.\n\nHowever, more fragmented reads from MAGs often result in contigs shorter than this. Therefore, nf-core/funcscan will run with the `meta` mode by default. Providing this parameter allows to override this and run in single genome mode again.\n\nFor more information check the Prodigal [documentation](https://github.com/hyattpd/prodigal/wiki).\n\n> Modifies tool parameter(s): \n> -PRODIGAL: `-p`", "fa_icon": "far fa-circle" }, "annotation_prodigal_closed": { "type": "boolean", "description": "Does not allow partial genes on contig edges.", - "help_text": "Suppresses partial genes from being on contig edge, resulting in closed ends. Should only be activated for genomes where it is sure the first and last bases of the sequence(s) do not fall inside a gene. Run together with `-p normal` (former `-p single`) .\n\nFor more information check Prodigal [documentation](https://github.com/hyattpd/prodigal/wiki).\n\n> Modifies tool parameter(s):\n> - PRODIGAL: `-c`", - "fa_icon": "fas fa-circle" + "help_text": "Suppresses partial genes from being on contig edge, resulting in closed ends. Should only be activated for genomes where it is sure the first and last bases of the sequence(s) do not fall inside a gene. Run together with `-p normal` (former `-p single`) .\n\nFor more information check the Prodigal [documentation](https://github.com/hyattpd/prodigal/wiki).\n\n> Modifies tool parameter(s):\n> - PRODIGAL: `-c`", + "fa_icon": "fas fa-arrows-alt-h" }, "annotation_prodigal_transtable": { "type": "integer", "default": 11, "description": "Specifies the translation table used for gene annotation.", - "help_text": "Specifies which translation table should be used for seqeunce annotation. All possible genetic code translation tables can be found [here](https://en.wikipedia.org/wiki/List_of_genetic_codes). The default is set at 11, which is used for standard Bacteria/Archeae.\n\nFor more information check Prodigal [documentation](https://github.com/hyattpd/prodigal/wiki).\n\n> Modifies tool parameter(s):\n> - PRODIGAL: `-g`", + "help_text": "Specifies which translation table should be used for seqeunce annotation. All possible genetic code translation tables can be found [here](https://en.wikipedia.org/wiki/List_of_genetic_codes). The default is set at 11, which is used for standard Bacteria/Archeae.\n\nFor more information check the Prodigal [documentation](https://github.com/hyattpd/prodigal/wiki).\n\n> Modifies tool parameter(s):\n> - PRODIGAL: `-g`", "fa_icon": "fas fa-border-all" }, "annotation_prodigal_forcenonsd": { "type": "boolean", "description": "Forces Prodigal to scan for motifs.", - "help_text": "Forces PRODIGAL to a full scan for motifs rather than activating the Shine-Dalgarno RBS finder, the default scanner for PRODIGAL to train for motifs.\n\nFor more information check Prodigal [documentation](https://github.com/hyattpd/prodigal/wiki).\n\n> Modifies tool parameter(s):\n> - PRODIGAL: `-n`", + "help_text": "Forces PRODIGAL to a full scan for motifs rather than activating the Shine-Dalgarno RBS finder, the default scanner for PRODIGAL to train for motifs.\n\nFor more information check the Prodigal [documentation](https://github.com/hyattpd/prodigal/wiki).\n\n> Modifies tool parameter(s):\n> - PRODIGAL: `-n`", "fa_icon": "fas fa-barcode" } }, - "fa_icon": "fas fa-tools", - "help_text": "Prodigal is a protein-coding gene prediction tool developed to run on bacterial and archaeal genomes.\n\nDocumentation: https://github.com/hyattpd/prodigal/wiki" + "fa_icon": "fas fa-file-signature" }, "annotation_pyrodigal": { "title": "Annotation: Pyrodigal", "type": "object", - "description": "These parameters influence the annotation algorithm used by Pyrodigal.", + "description": "Pyrodigal is a resource-optimized wrapper around Prodigal, producing protein-coding gene predictions of bacterial and archaeal genomes. Read more at the Pyrodigal GitHub repository (https://github.com/althonos/pyrodigal) or its documentation (https://pyrodigal.readthedocs.io).", "default": "", "properties": { "annotation_pyrodigal_singlemode": { "type": "boolean", "fa_icon": "far fa-circle", "description": "Specify whether to use Pyrodigal's single-genome mode for long sequences.", - "help_text": "By default Pyrodigal runs in 'single genome' mode that requires sequence lengths to be equal or longer than 20000 characters.\n\nHowever, more fragmented reads from MAGs often result in contigs shorter than this. Therefore, nf-core/funcscan will run with the `meta` mode by default, but providing this parameter allows to override this and run in single genome mode again.\n\nFor more information check Pyrodigal [documentation](https://pyrodigal.readthedocs.io).\n\n> Modifies tool parameter(s): \n> -PYRODIGAL: `-p`" + "help_text": "By default Pyrodigal runs in 'single genome' mode that requires sequence lengths to be equal or longer than 20000 characters.\n\nHowever, more fragmented reads from MAGs often result in contigs shorter than this. Therefore, nf-core/funcscan will run with the `meta` mode by default, but providing this parameter allows to override this and run in single genome mode again.\n\nFor more information check the Pyrodigal [documentation](https://pyrodigal.readthedocs.io).\n\n> Modifies tool parameter(s): \n> - PYRODIGAL: `-p`" }, "annotation_pyrodigal_closed": { "type": "boolean", - "fa_icon": "fas fa-circle", + "fa_icon": "fas fa-arrows-alt-h", "description": "Does not allow partial genes on contig edges.", - "help_text": "Suppresses partial genes from being on contig edge, resulting in closed ends. Should only be activated for genomes where it is sure the first and last bases of the sequence(s) do not fall inside a gene. Run together with `-p single` .\n\nFor more information check Pyrodigal [documentation](https://pyrodigal.readthedocs.io).\n\n> Modifies tool parameter(s):\n> - PYRODIGAL: `-c`" + "help_text": "Suppresses partial genes from being on contig edge, resulting in closed ends. Should only be activated for genomes where it is sure the first and last bases of the sequence(s) do not fall inside a gene. Run together with `-p single` .\n\nFor more information check the Pyrodigal [documentation](https://pyrodigal.readthedocs.io).\n\n> Modifies tool parameter(s):\n> - PYRODIGAL: `-c`" }, "annotation_pyrodigal_transtable": { "type": "integer", "default": 11, "fa_icon": "fas fa-border-all", "description": "Specifies the translation table used for gene annotation.", - "help_text": "Specifies which translation table should be used for seqeunce annotation. All possible genetic code translation tables can be found [here](https://en.wikipedia.org/wiki/List_of_genetic_codes). The default is set at 11, which is used for standard Bacteria/Archeae.\n\nFor more information check Pyrodigal [documentation](https://pyrodigal.readthedocs.io).\n\n> Modifies tool parameter(s):\n> - PYRODIGAL: `-g`" + "help_text": "Specifies which translation table should be used for seqeunce annotation. All possible genetic code translation tables can be found [here](https://en.wikipedia.org/wiki/List_of_genetic_codes). The default is set at 11, which is used for standard Bacteria/Archeae.\n\nFor more information check the Pyrodigal [documentation](https://pyrodigal.readthedocs.io).\n\n> Modifies tool parameter(s):\n> - PYRODIGAL: `-g`" }, "annotation_pyrodigal_forcenonsd": { "type": "boolean", "fa_icon": "fas fa-barcode", "description": "Forces Pyrodigal to scan for motifs.", - "help_text": "Forces Pyrodigal to a full scan for motifs rather than activating the Shine-Dalgarno RBS finder, the default scanner for Pyrodigal to train for motifs.\n\nFor more information check Pyrodigal [documentation](https://pyrodigal.readthedocs.io).\n\n> Modifies tool parameter(s):\n> - PYRODIGAL: `-n`" + "help_text": "Forces Pyrodigal to a full scan for motifs rather than activating the Shine-Dalgarno RBS finder, the default scanner for Pyrodigal to train for motifs.\n\nFor more information check the Pyrodigal [documentation](https://pyrodigal.readthedocs.io).\n\n> Modifies tool parameter(s):\n> - PYRODIGAL: `-n`" } }, - "fa_icon": "fas fa-tools", - "help_text": "Pyrodigal produces protein-coding gene predictions of bacterial and archaeal genomes, based on the tool Prodigal being resource-optimized. Read more at the [Pyrodigal GitHub](https://github.com/althonos/pyrodigal)\n\nDocumentation: https://pyrodigal.readthedocs.io" + "fa_icon": "fas fa-file-signature" }, "database_downloading_options": { "title": "Database downloading options", "type": "object", - "description": "Generic options for database downloading", + "description": "General options for database downloading", "default": "", "properties": { - "save_databases": { + "save_db": { "type": "boolean", "fa_icon": "fas fa-save", "description": "Specify whether to save pipeline-downloaded databases in your results directory.", - "help_text": "While nf-core/funcscan can download databases for you, often these are very large and can significantly slow-down pipeline runtime if the databases have to be downloaded every run.\n\nSpecifying `--save_databases` while save the pipeline-downloaded databases in your results directory. This applies to: BAKTA, DeepBGC, DeepARG, AMRFinderPlus, antiSMASH, and DRAMP.\n\nYou can then move the resulting directories/files to a central cache directory of your choice for re-use in the future.\n\nIf you do not specify these flags, the database files will remain in your `work/` directory and will be deleted if `cleanup = true` is specified in your config, or if you run `nextflow clean`.\n" + "help_text": "While nf-core/funcscan can download databases for you, often these are very large and can significantly slow-down pipeline runtime if the databases have to be downloaded every run.\n\nSpecifying `--save_db` will save the pipeline-downloaded databases in your results directory. This applies to: AMRFinderPlus, antiSMASH, Bakta, CARD (for RGI), DeepARG, DeepBGC, and DRAMP (for AMPcombi2).\n\nYou can then move the resulting directories/files to a central cache directory of your choice for re-use in the future.\n\nIf you do not specify these flags, the database files will remain in your `work/` directory and will be deleted if `cleanup = true` is specified in your config, or if you run `nextflow clean`.\n" } }, "fa_icon": "fas fa-database" @@ -407,34 +526,33 @@ "amp_amplify": { "title": "AMP: AMPlify", "type": "object", - "description": "Antimicrobial Peptide detection using a deep learning model.", + "description": "Antimicrobial Peptide detection using a deep learning model. More info: https://github.com/bcgsc/AMPlify", "default": "", "properties": { "amp_skip_amplify": { "type": "boolean", - "description": "Skip AMPlify during AMP-screening.", + "description": "Skip AMPlify during AMP screening.", "fa_icon": "fas fa-ban" } }, - "fa_icon": "fas fa-tools", - "help_text": "AMPlify is an attentive deep learning model for antimicrobial peptide prediction. It takes in annotated contigs (.faa) and classifies them as either AMP or non-AMP.\n\nDocumentation: https://github.com/bcgsc/AMPlify" + "fa_icon": "fa fa-plus-square" }, "amp_ampir": { "title": "AMP: ampir", "type": "object", - "description": "Antimicrobial Peptide detection using machine learning", + "description": "Antimicrobial Peptide detection using machine learning. ampir uses a supervised statistical machine learning approach to predict AMPs. It incorporates two support vector machine classification models, 'precursor' and 'mature' that have been trained on publicly available antimicrobial peptide data. More info: https://github.com/Legana/ampir", "default": "", "properties": { "amp_skip_ampir": { "type": "boolean", - "description": "Skip AMPir during AMP-screening.", + "description": "Skip ampir during AMP screening.", "fa_icon": "fas fa-ban" }, "amp_ampir_model": { "type": "string", "default": "precursor", "description": "Specify which machine learning classification model to use.", - "help_text": "AMPir uses a supervised statistical machine learning approach to predict AMPs. It incorporates two support vector machine classification models, \"precursor\" and \"mature\". \n\nThe precursor module is better for predicted proteins from a translated transcriptome or translated gene models. The alternative model (mature) is best suited for AMP sequences after post-translational processing, typically from direct proteomic sequencing.\n\nMore information can be found in the AMPir [documentation](https://ampir.marine-omics.net/).\n\n> Modifies tool parameter(s):\n> - AMPir: `model =`", + "help_text": "Ampir uses a supervised statistical machine learning approach to predict AMPs. It incorporates two support vector machine classification models, \"precursor\" and \"mature\". \n\nThe precursor module is better for predicted proteins from a translated transcriptome or translated gene models. The alternative model (mature) is best suited for AMP sequences after post-translational processing, typically from direct proteomic sequencing.\n\nMore information can be found in the ampir [documentation](https://ampir.marine-omics.net/).\n\n> Modifies tool parameter(s):\n> - AMPir: `model =`", "enum": ["precursor", "mature"], "fa_icon": "fas fa-layer-group" }, @@ -442,113 +560,236 @@ "type": "integer", "default": 10, "description": "Specify minimum protein length for prediction calculation.", - "help_text": "Filters result for minimum protein length.\nNote that amino acid sequences that are shorter than 10 amino acids long and/or contain anything other than the standard 20 amino acids are not evaluated and will contain an NA as their prob_AMP value\n\nMore information can be found in the AMPir [documentation](https://ampir.marine-omics.net/).\n\n> Modifies tool parameter(s):\n> - AMPir parameter: `min_length` in the `calculate_features()` function", + "help_text": "Filters result for minimum protein length.\nNote that amino acid sequences that are shorter than 10 amino acids long and/or contain anything other than the standard 20 amino acids are not evaluated and will contain an NA as their \"prob_AMP value.\"\n\nMore information can be found in the ampir [documentation](https://ampir.marine-omics.net/).\n\n> Modifies tool parameter(s):\n> - AMPir parameter: `min_length` in the `calculate_features()` function", "fa_icon": "fas fa-ruler-horizontal" } }, - "fa_icon": "fas fa-tools", - "help_text": "ampir (antimicrobial peptide prediction in r) is an r package designed to predict antimicrobial peptides (AMPs) from any given size protein dataset. ampir uses a supervised statistical machine learning approach to predict AMPs. It incorporates two support vector machine classification models, 'precursor' and 'mature' that have been trained on publicly available antimicrobial peptide data.\n\nDocumentation: https://github.com/Legana/ampir" + "fa_icon": "fa fa-plus-square" }, "amp_hmmsearch": { - "title": "AMP: HMMSearch", + "title": "AMP: hmmsearch", "type": "object", - "description": "Antimicrobial Peptide detection based on predefined HMM models", + "description": "Antimicrobial Peptide detection based on predefined HMM models. This tool implements methods using probabilistic models called profile hidden Markov models (profile HMMs) to search against a sequence database. More info: http://eddylab.org/software/hmmer/Userguide.pdf", "default": "", "properties": { - "amp_skip_hmmsearch": { + "amp_run_hmmsearch": { "type": "boolean", - "description": "Skip HMMsearch during AMP-screening.", + "description": "Run hmmsearch during AMP screening.", + "help_text": "hmmsearch is not run by default because HMM model files must be provided by the user with the flag `amp_hmmsearch_models`.", "fa_icon": "fas fa-ban" }, "amp_hmmsearch_models": { "type": "string", "description": "Specify path to the AMP hmm model file(s) to search against. Must have quotes if wildcard used.", - "help_text": "HMMSearch performs biosequence analysis using profile hidden Markov Models.\nThe models are specified in`.hmm` files that are specified with this parameter\n\ne.g. \n\n```\n--amp_hmmsearch_models '////*.hmm'\n```\n\nYou must wrap the path in quotes if you use a wildcard, to ensure Nextflow expansion _not_ bash!\n\nFor more information check HMMER [documentation](http://hmmer.org/).", + "help_text": "hmmsearch performs biosequence analysis using profile hidden Markov Models.\nThe models are specified in`.hmm` files that are specified with this parameter\n\ne.g. \n\n```\n--amp_hmmsearch_models '////*.hmm'\n```\n\nYou must wrap the path in quotes if you use a wildcard, to ensure Nextflow expansion _not_ bash! When using quotes, the absolute path to the HMM file(s) has to be given.\n\nFor more information check the HMMER [documentation](http://hmmer.org/).", "fa_icon": "fas fa-layer-group" }, "amp_hmmsearch_savealignments": { "type": "boolean", - "help_text": "Save a multiple alignment of all significant hits (those satisfying inclusion thresholds) to a file\n\nFor more information check HMMER [documentation](http://hmmer.org/).\n\n> Modifies tool parameter(s):\n> - HMMsearch: `-A`", + "help_text": "Save a multiple alignment of all significant hits (those satisfying inclusion thresholds) to a file\n\nFor more information check the HMMER [documentation](http://hmmer.org/).\n\n> Modifies tool parameter(s):\n> - hmmsearch: `-A`", "description": "Saves a multiple alignment of all significant hits to a file.", - "fa_icon": "far fa-save" + "fa_icon": "fas fa-save" }, "amp_hmmsearch_savetargets": { "type": "boolean", - "help_text": "Save a simple tabular (space-delimited) file summarizing the per-target output, with one data line per homologous target sequence found.\n\nFor more information check HMMER [documentation](http://hmmer.org/).\n\n> Modifies tool parameter(s)\n> - HMMsearch: `--tblout`", + "help_text": "Save a simple tabular (space-delimited) file summarizing the per-target output, with one data line per homologous target sequence found.\n\nFor more information check the HMMER [documentation](http://hmmer.org/).\n\n> Modifies tool parameter(s)\n> - hmmsearch: `--tblout`", "description": "Save a simple tabular file summarising the per-target output.", - "fa_icon": "far fa-save" + "fa_icon": "fas fa-save" }, "amp_hmmsearch_savedomains": { "type": "boolean", - "help_text": "Save a simple tabular (space-delimited) file summarizing the per-domain output, with one data line per homologous domain detected in a query sequence for each homologous model.\n\nFor more information check HMMER [documentation](http://hmmer.org/).\n\n> Modifies tool parameter(s):\n> - HMMsearch: `--domtblout`", + "help_text": "Save a simple tabular (space-delimited) file summarizing the per-domain output, with one data line per homologous domain detected in a query sequence for each homologous model.\n\nFor more information check the HMMER [documentation](http://hmmer.org/).\n\n> Modifies tool parameter(s):\n> - hmmsearch: `--domtblout`", "description": "Save a simple tabular file summarising the per-domain output.", - "fa_icon": "far fa-save" + "fa_icon": "fas fa-save" } }, - "fa_icon": "fas fa-tools", - "help_text": "HMMER/hmmsearch is used for searching sequence databases for sequence homologs, and for making sequence alignments. It implements methods using probabilistic models called profile hidden Markov models (profile HMMs). `hmmsearch` is used to search one or more profiles against a sequence database.\n\nFor more information check HMMER [documentation](http://hmmer.org/).\n\n" + "fa_icon": "fa fa-plus-square", + "help_text": "HMMER/hmmsearch is used for searching sequence databases for sequence homologs, and for making sequence alignments. It implements methods using probabilistic models called profile hidden Markov models (profile HMMs). `hmmsearch` is used to search one or more profiles against a sequence database.\n\nFor more information check the HMMER [documentation](http://hmmer.org/).\n\n" }, "amp_macrel": { "title": "AMP: Macrel", "type": "object", - "description": "Antimicrobial Peptide detection mining from metagenomes", + "description": "Antimicrobial peptide detection from metagenomes. More info: https://github.com/BigDataBiology/macrel", "default": "", "properties": { "amp_skip_macrel": { "type": "boolean", - "description": "Skip Macrel during AMP-screening.", + "description": "Skip Macrel during AMP screening.", "fa_icon": "fas fa-ban" } }, - "fa_icon": "fas fa-tools", - "help_text": "Macrel is a tool that mines antimicrobial peptides (AMPs) from (meta)genomes by predicting peptides from genomes (provided as contigs) and outputs all the predicted anti-microbial peptides found.\n\nDocumentation: https://github.com/BigDataBiology/macrel" + "fa_icon": "fa fa-plus-square" }, - "amp_ampcombi": { - "title": "AMP: AMPcombi", + "amp_ampcombi2_parsetables": { + "title": "AMP: ampcombi2 parsetables", "type": "object", - "description": "AntiMicrobial Peptides parsing and functional classification tool", + "description": "Antimicrobial peptides parsing, filtering, and annotating submodule of AMPcombi2. More info: https://github.com/Darcy220606/AMPcombi", "default": "", - "fa_icon": "fas fa-filter", "properties": { "amp_ampcombi_db": { "type": "string", "description": "Path to AMPcombi reference database directory (DRAMP).", - "help_text": "AMPcombi uses the 'general AMPs' dataset of the (DRAMP database)[http://dramp.cpu-bioinfor.org/downloads/] for taxonomic classification. If you have a local version of it, you can provide the path to the folder containing the reference database files:\n1. a fasta file with a `.fasta` file extension\n2. the corresponding table with with functional and taxonomic classifications in `.tsv` file extension.\n\nFor more information check AMPcombi [documentation](https://github.com/Darcy220606/AMPcombi).", + "help_text": "AMPcombi uses the 'general AMPs' dataset of the [DRAMP database](http://dramp.cpu-bioinfor.org/downloads/) for taxonomic classification. If you have a local version of it, you can provide the path to the directory(!) that contains the following reference database files:\n1. fasta file with `.fasta` file extension\n2. the corresponding table with with functional and taxonomic classifications in `.tsv` file extension.\n\nThe contents of the directory should have files such as `*.dmnd` and `*.fasta` in the top level.\n\nFor more information check the AMPcombi [documentation](https://github.com/Darcy220606/AMPcombi).", "fa_icon": "fas fa-address-book" }, - "amp_ampcombi_cutoff": { + "amp_ampcombi_parsetables_cutoff": { + "type": "number", + "default": 0.6, + "description": "Specifies the prediction tools' cut-offs.", + "help_text": "This converts any prediction score below this cut-off to '0'. By doing so only values above this value will be used in the final AMPcombi2 summary table. This applies to all prediction tools except for hmmsearch, which uses e-value. To change the e-value cut-off use instead `--amp_ampcombi_parsetables_hmmevalue`.\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--amp_cutoff`", + "fa_icon": "fas fa-address-card" + }, + "amp_ampcombi_parsetables_aalength": { + "type": "integer", + "default": 100, + "description": "Filter out all amino acid fragments shorter than this number.", + "help_text": "Any AMP hit that does not satisfy this length cut-off will be removed from the final AMPcombi2 summary table.\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--aminoacid_length`", + "fa_icon": "fas fa-ruler-horizontal" + }, + "amp_ampcombi_parsetables_dbevalue": { + "type": "number", + "default": 5.0, + "description": "Remove all DRAMP annotations that have an e-value greater than this value.", + "help_text": "This e-value is used as a cut-off for the annotations from the internal Diamond alignment step (against the DRAMP database by default). Any e-value below this value will only remove the DRAMP classification and not the entire hit.\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--db_evalue`", + "fa_icon": "fas fa-sort-numeric-down" + }, + "amp_ampcombi_parsetables_hmmevalue": { + "type": "number", + "default": 0.06, + "description": "Retain HMM hits that have an e-value lower than this.", + "help_text": "This converts any prediction score below this cut-off to '0'. By doing so only values above this value will be used in the final AMPcombi2 summary table. To change the prediction score cut-off for all other AMP prediction tools, use instead `--amp_cutoff`.\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--hmm_evalue`", + "fa_icon": "fas fa-sort-numeric-up" + }, + "amp_ampcombi_parsetables_windowstopcodon": { + "type": "integer", + "default": 60, + "description": "Assign the number of codons used to look for stop codons, upstream and downstream of the AMP hit.", + "help_text": "This assigns the length of the window size required to look for stop codons downstream and upstream of the CDS hits. In the default case, it looks 60 codons downstream and upstream of the AMP hit and reports whether a stop codon was found.\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--window_size_stop_codon`", + "fa_icon": "fas fa-stop-circle" + }, + "amp_ampcombi_parsetables_windowtransport": { + "type": "integer", + "default": 11, + "description": "Assign the number of CDSs upstream and downstream of the AMP to look for a transport protein.", + "help_text": "This assigns the length of the window size required to look for a 'transporter' (e.g. ABC transporter) downstream and upstream of the CDS hits. This is done on CDS classification level.\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--window_size_transporter`", + "fa_icon": "fas fa-car-side" + }, + "amp_ampcombi_parsetables_removehitswostopcodons": { + "type": "boolean", + "description": "Remove hits that have no stop codon upstream and downstream of the AMP.", + "help_text": "Removes any hits/CDSs that don't have a stop codon found in the window downstream or upstream of the CDS assigned by `--amp_ampcombi_parsetables_windowstopcodon`. We recommend to turn it on if the results will be used downstream experimentally.\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--remove_stop_codons`", + "fa_icon": "fas fa-ban" + }, + "amp_ampcombi_parsetables_ampir": { + "type": "string", + "default": ".ampir.tsv", + "description": "Assigns the file extension used to identify AMPIR output.", + "help_text": "Assigns the file extension of the input files to allow AMPcombi2 to identify the tool output from the list of input files.\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--ampir_file`", + "fa_icon": "fas fa-address-card" + }, + "amp_ampcombi_parsetables_amplify": { + "type": "string", + "default": ".amplify.tsv", + "description": "Assigns the file extension used to identify AMPLIFY output.", + "help_text": "Assigns the file extension of the input files to allow AMPcombi2 to identify the tool output from the list of input files.\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--amplify_file`", + "fa_icon": "fas fa-address-card" + }, + "amp_ampcombi_parsetables_macrel": { + "type": "string", + "default": ".macrel.prediction", + "description": "Assigns the file extension used to identify MACREL output.", + "help_text": "Assigns the file extension of the input files to allow AMPcombi2 to identify the tool output from the list of input files.\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--macrel_file`", + "fa_icon": "fas fa-address-card" + }, + "amp_ampcombi_parsetables_hmmsearch": { + "type": "string", + "default": ".hmmer_hmmsearch.txt", + "description": "Assigns the file extension used to identify HMMER/HMMSEARCH output.", + "help_text": "Assigns the file extension of the input files to allow AMPcombi2 to identify the tool output from the list of input files.\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--hmmsearch_file`", + "fa_icon": "fas fa-address-card" + } + }, + "fa_icon": "fa fa-plus-square" + }, + "amp_ampcombi2_cluster": { + "title": "AMP: ampcombi2 cluster", + "type": "object", + "description": "Clusters the AMP candidates identified with AMPcombi. More info: https://github.com/Darcy220606/AMPcombi", + "default": "", + "properties": { + "amp_ampcombi_cluster_covmode": { + "type": "number", + "default": 0.0, + "description": "MMseqs2 coverage mode.", + "help_text": "This assigns the coverage mode to the MMseqs2 cluster module. This determines how AMPs are grouped into clusters. More details can be found in the [MMseqs2 documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--cluster_cov_mode`", + "fa_icon": "far fa-circle" + }, + "amp_ampcombi_cluster_sensitivity": { + "type": "number", + "default": 4.0, + "description": "Remove hits that have no stop codon upstream and downstream of the AMP.", + "help_text": "This assigns the sensitivity of alignment to the MMseqs2 cluster module. This determines how AMPs are grouped into clusters. More information can be obtained in the [MMseqs2 documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--cluster_sensitivity`", + "fa_icon": "fas fa-arrows-alt-h" + }, + "amp_ampcombi_cluster_minmembers": { + "type": "integer", + "default": 0, + "description": "Remove clusters that don't have more AMP hits than this number.", + "help_text": "Removes all clusters with this number of AMP hits and less.\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--cluster_min_member`", + "fa_icon": "fas fa-book-dead" + }, + "amp_ampcombi_cluster_mode": { + "type": "number", + "default": 1.0, + "description": "MMseqs2 clustering mode.", + "help_text": "This assigns the cluster mode to the MMseqs2 cluster module. This determines how AMPs are grouped into clusters. More information can be obtained in the [MMseqs2 documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--cluster_mode`", + "fa_icon": "fas fa-circle" + }, + "amp_ampcombi_cluster_coverage": { + "type": "number", + "default": 0.8, + "description": "MMseqs2 alignment coverage.", + "help_text": "This assigns the coverage to the MMseqs2 cluster module. This determines how AMPs are grouped into clusters. More information can be obtained in[MMseqs2 documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--cluster_coverage`", + "fa_icon": "far fa-arrow-alt-circle-right" + }, + "amp_ampcombi_cluster_seqid": { "type": "number", "default": 0.4, - "description": "Specify probability cutoff to filter AMPs", - "help_text": "Specify the minimum probability an AMP hit must have to be retained in the final output file. Anything below this threshold will be removed.\n\nFor more information check AMPcombi [documentation](https://github.com/Darcy220606/AMPcombi).\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--cutoff`", - "fa_icon": "fas fa-sort-amount-up" + "description": "MMseqs2 sequence identity.", + "help_text": "This assigns the cluster sequence identity to the MMseqs2 cluster module. This determines how AMPs are grouped into clusters. More information can be obtained in the [MMseqs2 documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--cluster_seq_id`", + "fa_icon": "far fa-address-card" + }, + "amp_ampcombi_cluster_removesingletons": { + "type": "boolean", + "description": "Remove any hits that form a single member cluster.", + "help_text": "Removes any AMP hits that form a single-member cluster.\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--cluster_remove_singletons`", + "fa_icon": "fas fa-book-dead" } }, - "help_text": "AMPcombi : AntiMicrobial Peptides parsing and functional classification tool.\n\nDocumentation: https://github.com/Darcy220606/AMPcombi" + "fa_icon": "fa fa-plus-square" }, "arg_amrfinderplus": { "title": "ARG: AMRFinderPlus", "type": "object", - "description": "Antimicrobial resistance gene detection based on NCBI's curated Reference Gene Database and curated collection of Hidden Markov Models", + "description": "Antimicrobial resistance gene detection based on NCBI's curated Reference Gene Database and curated collection of Hidden Markov Models. identifies AMR genes, resistance-associated point mutations, and select other classes of genes using protein annotations and/or assembled nucleotide sequences. More info: https://github.com/ncbi/amr/wiki", "default": "", - "help_text": "NCBI has developed AMRFinderPlus, a tool that identifies AMR genes, resistance-associated point mutations, and select other classes of genes using protein annotations and/or assembled nucleotide sequence. AMRFinderPlus is used in the Pathogen Detection pipeline, and these data are displayed in NCBI's Isolate Browser. AMRFinderPlus relies on NCBI's curated Reference Gene Database and curated collection of Hidden Markov Models.\n\nDocumentation: https://github.com/ncbi/amr/wiki", - "fa_icon": "fas fa-tools", + "fa_icon": "fas fa-bacteria", "properties": { "arg_skip_amrfinderplus": { "type": "boolean", - "description": "Skip AMRFinderPlus during the ARG-screening.", + "description": "Skip AMRFinderPlus during the ARG screening.", "fa_icon": "fas fa-ban" }, "arg_amrfinderplus_db": { "type": "string", "fa_icon": "fas fa-layer-group", - "help_text": "Specify the path to a local version of the ARMFinderPlus database. If no input is given, the pipeline will download the database for you.\n\n See the nf-core/funcscan usage [documentation](https://nf-co.re/funcscan/usage) for more information.", - "description": "Specify the path to a local version of the ARMfinderPlus database." + "help_text": "Specify the path to a local version of the ARMFinderPlus database.\n\nYou must give the `latest` directory to the pipeline, and the contents of the directory should include files such as `*.nbd`, `*.nhr`, `versions.txt` etc. in the top level.\n\nIf no input is given, the pipeline will download the database for you.\n\n See the nf-core/funcscan usage [documentation](https://nf-co.re/funcscan/usage) for more information.\n\n> Modifies tool parameter(s):\n> - AMRFinderPlus: `--database`", + "description": "Specify the path to a local version of the ARMFinderPlus database." }, "arg_amrfinderplus_identmin": { "type": "number", - "default": -1, - "help_text": "Specify the minimum percentage amino-acid identity to reference protein or nucleotide identity for nucleotide reference must have if a BLAST alignment (based on methods: BLAST or PARTIAL) was detected, otherwise NA.\n\n If you specify `-1`, this means use a curated threshold if it exists and `0.9` otherwise.\n\nSetting this value to something other than `-1` will override any curated similarity cutoffs. For BLAST: alignment is > 90% of length and > 90% identity to a protein in the AMRFinderPlus database. For PARTIAL: alignment is > 50% of length, but < 90% of length and > 90% identity to the reference, and does not end at a contig boundary.\n\nFor more information check AMRFinderPlus [documentation](https://github.com/ncbi/amr/wiki/Running-AMRFinderPlus#--organism-option).\n\n> Modifies tool parameter(s):\n> - AMRFinderPlus: `--ident_min`", + "default": -1.0, + "help_text": "Specify the minimum percentage amino-acid identity to reference protein or nucleotide identity for nucleotide reference must have if a BLAST alignment (based on methods: BLAST or PARTIAL) was detected, otherwise NA.\n\n If you specify `-1`, this means use a curated threshold if it exists and `0.9` otherwise.\n\nSetting this value to something other than `-1` will override any curated similarity cutoffs. For BLAST: alignment is > 90% of length and > 90% identity to a protein in the AMRFinderPlus database. For PARTIAL: alignment is > 50% of length, but < 90% of length and > 90% identity to the reference, and does not end at a contig boundary.\n\nFor more information check the AMRFinderPlus [documentation](https://github.com/ncbi/amr/wiki/Running-AMRFinderPlus#--organism-option).\n\n> Modifies tool parameter(s):\n> - AMRFinderPlus: `--ident_min`", "description": "Minimum percent identity to reference sequence.", "fa_icon": "fas fa-angle-left" }, @@ -556,7 +797,7 @@ "type": "number", "default": 0.5, "description": "Minimum coverage of the reference protein.", - "help_text": "Minimum proportion of reference gene covered for a BLAST-based hit analysis if a BLAST alignment was detected, otherwise NA.\n\nFor BLAST-based hit analysis: alignment is > 90% of length and > 90% identity to a protein in the AMRFinderPlus database or for PARTIAL: alignment is > 50% of length, but < 90% of length and > 90% identity to the reference, and does not end at a contig boundary.\n\nFor more information check AMRFinderPlus [documentation](https://github.com/ncbi/amr/wiki/Running-AMRFinderPlus#--organism-option).\n\n> Modifies tool parameter(s):\n> - AMRFinderPlus: `--coverage_min`", + "help_text": "Minimum proportion of reference gene covered for a BLAST-based hit analysis if a BLAST alignment was detected, otherwise NA.\n\nFor BLAST-based hit analysis: alignment is > 90% of length and > 90% identity to a protein in the AMRFinderPlus database or for PARTIAL: alignment is > 50% of length, but < 90% of length and > 90% identity to the reference, and does not end at a contig boundary.\n\nFor more information check the AMRFinderPlus [documentation](https://github.com/ncbi/amr/wiki/Running-AMRFinderPlus#--organism-option).\n\n> Modifies tool parameter(s):\n> - AMRFinderPlus: `--coverage_min`", "fa_icon": "fas fa-arrow-alt-circle-down", "minimum": 0, "maximum": 1 @@ -565,7 +806,7 @@ "type": "integer", "default": 11, "description": "Specify which NCBI genetic code to use for translated BLAST.", - "help_text": "NCBI genetic code for translated BLAST. Number from 1 to 33 to represent the translation table used for BLASTX.\n\nSee [translation table](https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi) for more details on which table to use. \n\nFor more information check AMRFinderPlus [documentation](https://github.com/ncbi/amr/wiki/Running-AMRFinderPlus#--organism-option).\n\n> Modifies tool parameter(s):\n> - AMRFinderPlus: `--translation_table`", + "help_text": "NCBI genetic code for translated BLAST. Number from 1 to 33 to represent the translation table used for BLASTX.\n\nSee [translation table](https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi) for more details on which table to use. \n\nFor more information check the AMRFinderPlus [documentation](https://github.com/ncbi/amr/wiki/Running-AMRFinderPlus#--organism-option).\n\n> Modifies tool parameter(s):\n> - AMRFinderPlus: `--translation_table`", "fa_icon": "fas fa-border-all", "minimum": 1, "maximum": 33 @@ -587,91 +828,90 @@ "arg_deeparg": { "title": "ARG: DeepARG", "type": "object", - "description": "Antimicrobial resistance gene detection using a deep learning model", + "description": "Antimicrobial resistance gene detection using a deep learning model. DeepARG is composed of two models for two types of input: short sequence reads and gene-like sequences. In this pipeline we use the `ls` model, which is suitable for annotating full sequence genes and to discover novel antibiotic resistance genes from assembled samples. The tool `Diamond` is used as an aligner. More info: https://bitbucket.org/gusphdproj/deeparg-ss/src/master", "default": "", "properties": { "arg_skip_deeparg": { "type": "boolean", - "description": "Skip DeepARG during the ARG-screening.", + "description": "Skip DeepARG during the ARG screening.", "fa_icon": "fas fa-ban" }, - "arg_deeparg_data": { + "arg_deeparg_db": { "type": "string", - "fa_icon": "fab fa-deezer", + "fa_icon": "fas fa-database", "description": "Specify the path to the DeepARG database.", - "help_text": "Specify the path to a local version of the DeepARG database (see the pipelines' usage [documentation](https://nf-co.re/funcscan/usage)). If no input is given, the module will download the database for you, however this is not recommended, as the database is large and this will take time." + "help_text": "Specify the path to a local version of the DeepARG database (see the pipelines' usage [documentation](https://nf-co.re/funcscan/dev/docs/usage#databases-and-reference-files)).\n\nThe contents of the directory should include directories such as `database`, `moderl`, and files such as `deeparg.gz` etc. in the top level.\n\nIf no input is given, the module will download the database for you, however this is not recommended, as the database is large and this will take time.\n\n> Modifies tool parameter(s):\n> - DeepARG: `--data-path`" }, - "arg_deeparg_data_version": { + "arg_deeparg_db_version": { "type": "integer", "default": 2, "description": "Specify the numeric version number of a user supplied DeepaRG database.", "fa_icon": "fas fa-code-branch", - "help_text": "The DeepARG tool itself does not report explicit the database version it uses. We assume the latest version (as downloaded by the tool's database download module), however if you supply a different database, you must supply the version with this parameter for use with the downstream hAMRonization tool.\n\nThe version number must be without any leading `v` etc." + "help_text": "The DeepARG tool itself does not report explicitly the database version it uses. We assume the latest version (as downloaded by the tool's database download module), however if you supply a different database, you must supply the version with this parameter for use with the downstream hAMRonization tool.\n\nThe version number must be without any leading `v` etc." }, "arg_deeparg_model": { "type": "string", "default": "LS", "enum": ["LS", "SS"], "description": "Specify which model to use (short or long sequences).", - "help_text": "Specify which model to use: short sequences for reads (`SS`), or long sequences for genes (`LS`). In the vast majority of cases we recommend using the `LS` model when using funcscan\n\nFor more information check DeepARG [documentation](https://bitbucket.org/gusphdproj/deeparg-ss/src/master/deeparg/).\n\n> Modifies tool parameter(s):\n> - DeepARG: `--model`", + "help_text": "Specify which model to use: short sequences for reads (`SS`), or long sequences for genes (`LS`). In the vast majority of cases we recommend using the `LS` model when using funcscan\n\nFor more information check the DeepARG [documentation](https://bitbucket.org/gusphdproj/deeparg-ss/src/master/deeparg/).\n\n> Modifies tool parameter(s):\n> - DeepARG: `--model`", "fa_icon": "fas fa-layer-group" }, "arg_deeparg_minprob": { "type": "number", "default": 0.8, "description": "Specify minimum probability cutoff under which hits are discarded.", - "help_text": "Sets the minimum probability cutoff below which hits are discarded.\n\nFor more information check DeepARG [documentation](https://bitbucket.org/gusphdproj/deeparg-ss/src/master/deeparg/).\n\n> Modifies tool parameter(s):\n> - DeepARG: `--min-prob`", + "help_text": "Sets the minimum probability cutoff below which hits are discarded.\n\nFor more information check the DeepARG [documentation](https://bitbucket.org/gusphdproj/deeparg-ss/src/master/deeparg/).\n\n> Modifies tool parameter(s):\n> - DeepARG: `--min-prob`", "fa_icon": "fas fa-dice" }, "arg_deeparg_alignmentevalue": { "type": "number", "default": 1e-10, "description": "Specify E-value cutoff under which hits are discarded.", - "help_text": "Sets the cutoff value for Evalue below which hits are discarded\n\nFor more information check DeepARG [documentation](https://bitbucket.org/gusphdproj/deeparg-ss/src/master/deeparg/).\n\n> Modifies tool parameter(s):\n> - DeepARG: `--arg-alignment-evalue`", + "help_text": "Sets the cutoff value for Evalue below which hits are discarded.\n\nFor more information check the DeepARG [documentation](https://bitbucket.org/gusphdproj/deeparg-ss/src/master/deeparg/).\n\n> Modifies tool parameter(s):\n> - DeepARG: `--arg-alignment-evalue`", "fa_icon": "fas fa-align-center" }, "arg_deeparg_alignmentidentity": { "type": "integer", "default": 50, "description": "Specify percent identity cutoff for sequence alignment under which hits are discarded.", - "help_text": "Sets the value for Identity cutoff for sequence alignment\n\nFor more information check DeepARG [documentation](https://bitbucket.org/gusphdproj/deeparg-ss/src/master/deeparg/).\n\n> Modifies tool parameter(s):\n> - DeepARG: `--arg-alignment-identity`", + "help_text": "Sets the value for Identity cutoff for sequence alignment.\n\nFor more information check the DeepARG [documentation](https://bitbucket.org/gusphdproj/deeparg-ss/src/master/deeparg/).\n\n> Modifies tool parameter(s):\n> - DeepARG: `--arg-alignment-identity`", "fa_icon": "fas fa-align-center" }, "arg_deeparg_alignmentoverlap": { "type": "number", "default": 0.8, "description": "Specify alignment read overlap.", - "help_text": "Sets the value for the allowed alignment read overlap.\n\nFor more information check DeepARG [documentation](https://bitbucket.org/gusphdproj/deeparg-ss/src/master/deeparg/).\n\n> Modifies tool parameter(s):\n> - DeepARG: `--arg-alignment-overlap`", + "help_text": "Sets the value for the allowed alignment read overlap.\n\nFor more information check the DeepARG [documentation](https://bitbucket.org/gusphdproj/deeparg-ss/src/master/deeparg/).\n\n> Modifies tool parameter(s):\n> - DeepARG: `--arg-alignment-overlap`", "fa_icon": "fas fa-align-center" }, "arg_deeparg_numalignmentsperentry": { "type": "integer", "default": 1000, "description": "Specify minimum number of alignments per entry for DIAMOND step of DeepARG.", - "help_text": "Sets the value of minimum number of alignments per entry for DIAMOND.\n\nFor more information check DeepARG [documentation](https://bitbucket.org/gusphdproj/deeparg-ss/src/master/deeparg/).\n\n> Modifies tool parameter(s):\n> - DeepARG: `--arg-num-alignments-per-entry`", + "help_text": "Sets the value of minimum number of alignments per entry for DIAMOND.\n\nFor more information check the DeepARG [documentation](https://bitbucket.org/gusphdproj/deeparg-ss/src/master/deeparg/).\n\n> Modifies tool parameter(s):\n> - DeepARG: `--arg-num-alignments-per-entry`", "fa_icon": "far fa-gem" } }, - "fa_icon": "fas fa-tools", - "help_text": "deepARG uses deep learning to characterize and annotate antibiotic resistance genes in metagenomes. It is composed of two models for two types of input: short sequence reads and gene-like sequences. In this pipeline we use the `ls` model, which is suitable for annotating full sequence genes and to discover novel antibiotic resistance genes from assembled samples. The tool `Diamond` is used as an aligner.\n\nDocumentation: https://bitbucket.org/gusphdproj/deeparg-ss/src/master/" + "fa_icon": "fas fa-bacteria" }, "arg_fargene": { "title": "ARG: fARGene", "type": "object", - "description": "Antimicrobial resistance gene detection using a deep learning model", + "description": "Antimicrobial resistance gene detection using a deep learning model. The tool includes developed and optimised models for a number or resistance gene types, and the functionality to create and optimize models of your own choice of resistance genes. More info: https://github.com/fannyhb/fargene", "default": "", - "help_text": "fARGene (Fragmented Antibiotic Resistance Gene Identifier) is a tool that takes either fragmented metagenomic data or longer sequences as input and predicts and delivers full-length antiobiotic resistance genes as output. The tool includes developed and optimised models for a number or resistance gene types, and the functionality to create and optimize models of your own choice of resistance genes. \n\nDocumentation: [https://github.com/fannyhb/fargene](https://github.com/fannyhb/fargene)", "properties": { "arg_skip_fargene": { "type": "boolean", - "description": "Skip fARGene during the ARG-screening.", + "description": "Skip fARGene during the ARG screening.", "fa_icon": "fas fa-ban" }, "arg_fargene_hmmmodel": { "type": "string", "default": "class_a,class_b_1_2,class_b_3,class_c,class_d_1,class_d_2,qnr,tet_efflux,tet_rpg,tet_enzyme", + "pattern": "^(class_a|class_b_1_2|class_b_3|class_c|class_d_1|class_d_2|qnr|tet_efflux|tet_rpg|tet_enzyme)(,(class_a|class_b_1_2|class_b_3|class_c|class_d_1|class_d_2|qnr|tet_efflux|tet_rpg|tet_enzyme))*$", "description": "Specify comma-separated list of which pre-defined HMM models to screen against", - "help_text": "Specify via a comma separated list any of the hmm-models of the pre-defined models:\n - Class A beta-lactamases: `class_a`\n - Subclass B1 and B2 beta-lactamases: `class_b_1_2`\n - Subclass B3 beta-lactamases: `class_b_3`\n - Class C beta-lactamases: `class_c\n - Class D beta-lactamases: `class_d_1`, `class_d_2`\n - qnr: `qnr`\n - Tetracycline resistance genes `tet_efflux`, `tet_rpg`, `tet_enzyme`\n\nFor more information check fARGene [documentation](https://github.com/fannyhb/fargene).\n\n For example: `--arg_fargenemodel 'class_a,qnr,tet_enzyme'`\n\n> Modifies tool parameter(s):\n\n> - fARGene: `--hmm-model`", + "help_text": "Specify via a comma separated list any of the hmm-models of the pre-defined models:\n- Class A beta-lactamases: `class_a`\n- Subclass B1 and B2 beta-lactamases: `class_b_1_2`\n- Subclass B3 beta-lactamases: `class_b_3`\n- Class C beta-lactamases: `class_c`\n- Class D beta-lactamases: `class_d_1`, `class_d_2`\n- qnr: `qnr`\n- Tetracycline resistance genes `tet_efflux`, `tet_rpg`, `tet_enzyme`\n\nFor more information check the fARGene [documentation](https://github.com/fannyhb/fargene).\n\n For example: `--arg_fargenemodel 'class_a,qnr,tet_enzyme'`\n\n>Modifies tool parameter(s):\n> - fARGene: `--hmm-model`", "fa_icon": "fas fa-layer-group" }, "arg_fargene_savetmpfiles": { @@ -689,128 +929,131 @@ "arg_fargene_minorflength": { "type": "integer", "default": 90, - "help_text": "The minimum length of a predicted ORF retrieved from annotating the nucleotide sequences. By default the pipeline assigns this to 90% of the assigned hmm_model sequence length. \n\nFor more information check fARGene [documentation](https://github.com/fannyhb/fargene).\n\n> Modifies tool parameter(s):\n> - fARGene: `--min-orf-length`", + "help_text": "The minimum length of a predicted ORF retrieved from annotating the nucleotide sequences. By default the pipeline assigns this to 90% of the assigned hmm_model sequence length. \n\nFor more information check the fARGene [documentation](https://github.com/fannyhb/fargene).\n\n> Modifies tool parameter(s):\n> - fARGene: `--min-orf-length`", "description": "The minimum length of a predicted ORF retrieved from annotating the nucleotide sequences.", - "fa_icon": "fas fa-caret-down", + "fa_icon": "fas fa-ruler-horizontal", "minimum": 1, "maximum": 100 }, "arg_fargene_orffinder": { "type": "boolean", "description": "Defines which ORF finding algorithm to use.", - "help_text": "By default, pipeline uses prodigal/prokka for the prediction of ORFs from nucleotide sequences. Another option is the NCBI ORFfinder tool that is built into fARGene, the use of which is activated by this flag.\n\nFor more information check fARGene [documentation](https://github.com/fannyhb/fargene).\n\n> Modifies tool parameter(s):\n> - fARGene: `--orf-finder`", - "fa_icon": "fab fa-adn" + "help_text": "By default, pipeline uses prodigal/prokka for the prediction of ORFs from nucleotide sequences. Another option is the NCBI ORFfinder tool that is built into fARGene, the use of which is activated by this flag.\n\nFor more information check the fARGene [documentation](https://github.com/fannyhb/fargene).\n\n> Modifies tool parameter(s):\n> - fARGene: `--orf-finder`", + "fa_icon": "fas fa-project-diagram" }, "arg_fargene_translationformat": { "type": "string", "default": "pearson", "description": "The translation table/format to use for sequence annotation.", - "help_text": "The translation format that transeq should use for amino acid annotation from the nucleotide sequences. More sequence formats can be found in [transeq 'input sequence formats'](https://emboss.sourceforge.net/docs/themes/SequenceFormats.html).\n\nFor more information check fARGene [documentation](https://github.com/fannyhb/fargene).\n\n> Modifies tool parameter(s):\n> - fARGene: `--translation-format`", + "help_text": "The translation format that transeq should use for amino acid annotation from the nucleotide sequences. More sequence formats can be found in [transeq 'input sequence formats'](https://emboss.sourceforge.net/docs/themes/SequenceFormats.html).\n\nFor more information check the fARGene [documentation](https://github.com/fannyhb/fargene).\n\n> Modifies tool parameter(s):\n> - fARGene: `--translation-format`", "fa_icon": "fas fa-border-none" } }, - "fa_icon": "fas fa-tools" + "fa_icon": "fas fa-bacteria" }, "arg_rgi": { "title": "ARG: RGI", "type": "object", - "description": "Antimicrobial resistance gene detection, based on alignment to the CARD database", + "description": "Antimicrobial resistance gene detection, based on alignment to the CARD database based on homology and SNP models. More info: https://github.com/arpcard/rgi", "default": "", - "help_text": "RGI (Resistance Gene Identifier) predicts resistome(s) from protein or nucleotide data based on homology and SNP models. It uses reference data from the Comprehensive Antibiotic Resistance Database (CARD).\n\nDocumentation: https://github.com/arpcard/rgi", "properties": { "arg_skip_rgi": { "type": "boolean", - "description": "Skip RGI during the ARG-screening.", + "description": "Skip RGI during the ARG screening.", "fa_icon": "fas fa-ban" }, + "arg_rgi_db": { + "type": "string", + "description": "Path to user-defined local CARD database.", + "fa_icon": "fas fa-database", + "help_text": "You can pre-download the CARD database to your machine and pass the path of it to this parameter.\n\nThe contents of the directory should include files such as `card.json`, `aro_index.tsv`, `snps.txt` etc. in the top level.\n\nSee the pipeline [documentation](https://nf-co.re/funcscan/usage#rgi) for details on how to download this.\n\n> Modifies tool parameter(s):\n> - RGI_CARDANNOTATION: `--input`" + }, "arg_rgi_savejson": { "type": "boolean", "description": "Save RGI output .json file.", "help_text": "When activated, this flag saves the `.json` file in the RGI output directory. The `.json` file contains the ARG predictions in a format that can be can be uploaded to the CARD website for visualization. See [RGI documentation](https://github.com/arpcard/rgi) for more details. By default, the `.json` file is generated in the working directory but not saved in the results directory to save disk space (`.json` file is quite large and not required downstream in the pipeline). ", - "fa_icon": "fas fa-ad" + "fa_icon": "fas fa-save" }, "arg_rgi_savetmpfiles": { "type": "boolean", "fa_icon": "fas fa-save", - "description": "Specify to save intermediate temporary files the results directory.", - "help_text": "RGI generates many additional temporary files which in most cases won't be useful so by default are not saved.\n\nBy specifying this parameter, the files including `temp` in the name will be also saved in the output directory for closer inspection by the user, if necessary." + "description": "Specify to save intermediate temporary files in the results directory.", + "help_text": "RGI generates many additional temporary files which in most cases won't be useful, thus are not saved by default.\n\nBy specifying this parameter, files including `temp` in their name will be also saved in the output directory for closer inspection by the user." }, "arg_rgi_alignmenttool": { "type": "string", "default": "BLAST", "description": "Specify the alignment tool to be used.", - "help_text": "Specifies the alignment tool to be used. By default RGI runs BLAST and this is also set as default in the nf-core/funcscan pipeline. Using this flag the user can activate the alignment by DIAMOND again.\n\nFor more information check RGI [documentation](https://github.com/arpcard/rgi).\n\n> Modifies tool parameter(s):\n> - RGI: `--alignment_tool`", + "help_text": "Specifies the alignment tool to be used. By default RGI runs BLAST and this is also set as default in the nf-core/funcscan pipeline. With this flag the user can choose between BLAST and DIAMOND for the alignment step.\n\nFor more information check the RGI [documentation](https://github.com/arpcard/rgi).\n\n> Modifies tool parameter(s):\n> - RGI_MAIN: `--alignment_tool`", "enum": ["BLAST", "DIAMOND"], "fa_icon": "fas fa-align-justify" }, "arg_rgi_includeloose": { "type": "boolean", - "description": "Include all of loose, strict and perfect hits (i.e. >=95% identity) found by RGI.", - "help_text": "When activated it includes 'Loose' hits (a.k.a. Discovery) in addition to strict and perfect hits. All 'Loose' matches of 95% identity or better are automatically listed as 'Strict', regardless of alignment length (RGI v. <6.0.0). This behaviour can be overrun by using the --exclude_nudge flag. The 'Loose' algorithm works outside of the detection model cut-offs to provide detection of new, emergent threats and more distant homologs of AMR genes, but will also catalog homologous sequences and spurious partial matches that may not have a role in AMR.\n\nFor more information check RGI [documentation](https://github.com/arpcard/rgi).\n\n> Modifies tool parameter(s):\n> - RGI: `--include_loose`", - "fa_icon": "far fa-hand-scissors", - "default": true + "description": "Include all of loose, strict and perfect hits (i.e. \u2265 95% identity) found by RGI.", + "help_text": "When activated RGI output will include 'Loose' hits in addition to 'Strict' and 'Perfect' hits. The 'Loose' algorithm works outside of the detection model cut-offs to provide detection of new, emergent threats and more distant homologs of AMR genes, but will also catalog homologous sequences and spurious partial matches that may not have a role in AMR.\n\nFor more information check the RGI [documentation](https://github.com/arpcard/rgi).\n\n> Modifies tool parameter(s):\n> - RGI_MAIN: `--include_loose`", + "fa_icon": "far fa-hand-scissors" }, - "arg_rgi_excludenudge": { + "arg_rgi_includenudge": { "type": "boolean", "description": "Suppresses the default behaviour of RGI with `--arg_rgi_includeloose`.", - "help_text": "This flag suppresses the default behaviour of RGI with `--include_loose`, which lists all 'Loose' matches of >= 95% identity as 'Strict', regardless of alignment length. With this strict and perfect labels are added. This is discontinued in future versions of RGI.\n\nFor more information check RGI [documentation](https://github.com/arpcard/rgi).\n\n> Modifies tool parameter(s):\n> - RGI: `--exclude_nudge`", - "fa_icon": "fas fa-hand-scissors", - "default": true + "help_text": "This flag suppresses the default behaviour of RGI, by listing all 'Loose' matches of \u2265 95% identity as 'Strict' or 'Perfect', regardless of alignment length.\n\nFor more information check the RGI [documentation](https://github.com/arpcard/rgi).\n\n> Modifies tool parameter(s):\n> - RGI_MAIN: `--include_nudge`", + "fa_icon": "fas fa-hand-scissors" }, "arg_rgi_lowquality": { "type": "boolean", "description": "Include screening of low quality contigs for partial genes.", - "help_text": "This flag should be used only when the contigs are of poor quality (e.g. short) to predict partial genes.\n\nFor more information check RGI [documentation](https://github.com/arpcard/rgi).\n\n> Modifies tool parameter(s):\n> - RGI: `--low_quality`", + "help_text": "This flag should be used only when the contigs are of poor quality (e.g. short) to predict partial genes.\n\nFor more information check the RGI [documentation](https://github.com/arpcard/rgi).\n\n> Modifies tool parameter(s):\n> - RGI_MAIN: `--low_quality`", "fa_icon": "fas fa-angle-double-down" }, "arg_rgi_data": { "type": "string", "default": "NA", - "description": "Specify a more specific data-type of input (e.g. plasmid, chromosome)", - "help_text": "This flag is used to specify the data type used as input to RGI. By default this is set as 'NA', which makes no assumptions on input data.\n\nFor more information check RGI [documentation](https://github.com/arpcard/rgi).\n\n> Modifies tool parameter(s):\n> - RGI: `--data`", + "description": "Specify a more specific data-type of input (e.g. plasmid, chromosome).", + "help_text": "This flag is used to specify the data type used as input to RGI. By default this is set as 'NA', which makes no assumptions on input data.\n\nFor more information check the RGI [documentation](https://github.com/arpcard/rgi).\n\n> Modifies tool parameter(s):\n> - RGI_MAIN: `--data`", "enum": ["NA", "wgs", "plasmid", "chromosome"], - "fa_icon": "fas fa-database" + "fa_icon": "fas fa-dna" + }, + "arg_rgi_split_prodigal_jobs": { + "type": "boolean", + "description": "Run multiple prodigal jobs simultaneously for contigs in a fasta file.", + "help_text": "For more information check the RGI [documentation](https://github.com/arpcard/rgi).\n\nModifies tool parameter:\n> - RGI_MAIN: `--split_prodigal_jobs`", + "fa_icon": "fas fa-angle-double-down", + "default": true } }, - "fa_icon": "fas fa-tools" + "fa_icon": "fas fa-bacteria" }, "arg_abricate": { "title": "ARG: ABRicate", "type": "object", - "description": "Antimicrobial resistance gene detection, based on alignment to CBI, CARD, ARG-ANNOT, Resfinder, MEGARES, EcOH, PlasmidFinder, Ecoli_VF and VFDB.", + "description": "Antimicrobial resistance gene detection based on alignment to CBI, CARD, ARG-ANNOT, ResFinder, MEGARES, EcOH, PlasmidFinder, Ecoli_VF and VFDB. More info: https://github.com/tseemann/abricate", "default": "", - "fa_icon": "fas fa-tools", + "fa_icon": "fas fa-bacteria", "properties": { "arg_skip_abricate": { "type": "boolean", "fa_icon": "fas fa-ban", - "description": "Skip ABRicate during the ARG-screening." + "description": "Skip ABRicate during the ARG screening." }, - "arg_abricate_db": { + "arg_abricate_db_id": { "type": "string", "default": "ncbi", - "fa_icon": "fas fa-layer-group", - "description": "Specify which of the provided public databases to use by ABRicate.", - "enum": [ - "argannot", - "card", - "ecoh", - "ecoli_vf", - "megares", - "ncbi", - "plasmidfinder", - "resfinder", - "vfdb", - "bacmet2", - "victors" - ], - "help_text": "Specifies which database to use from dedicated list of databases available by ABRicate. \n\nFor more information check ABRicate [documentation](https://github.com/tseemann/abricate).\n\n> Modifies tool parameter(s):\n> - ABRicate: `--db`" + "fa_icon": "fas fa-database", + "description": "Specify the name of the ABRicate database to use. Names of non-default databases can be supplied if `--arg_abricate_db` provided.", + "help_text": "Specifies which database to use from dedicated list of databases available by ABRicate.\n\nDefault supported are one of: `argannot`, `card`, `ecoh`, `ecoli_vf`, `megares`, `ncbi`, `plasmidfinder`, `resfinder`, `vfdb`. Other options can be supplied if you have installed a custom one within the directory you have supplied to `--arg_abricate_db`.\n\nFor more information check the ABRicate [documentation](https://github.com/tseemann/abricate).\n\n> Modifies tool parameter(s):\n> - ABRicate: `--db`" + }, + "arg_abricate_db": { + "type": "string", + "description": "Path to user-defined local ABRicate database directory for using custom databases.", + "fa_icon": "far fa-folder-open", + "help_text": "Supply this only if you want to use additional custom databases you yourself have added to your ABRicate installation following the instructions [here](https://github.com/tseemann/abricate?tab=readme-ov-file#making-your-own-database).\n\nThe contents of the directory should have a directory named with the database name in the top level (e.g. `bacmet2/`).\n\nYou must also specify the name of the custom database with `--arg_abricate_db_id`.\n\n> Modifies tool parameter(s):\n> - ABRicate: `--datadir`" }, "arg_abricate_minid": { "type": "integer", "default": 80, "description": "Minimum percent identity of alignment required for a hit to be considered.", - "help_text": "Specifies the minimum percent identity used to classify an ARG hit using BLAST alignment.\n\nFor more information check ABRicate [documentation](https://github.com/tseemann/abricate).\n\n> Modifies tool parameter(s):\n> - ABRicate: `--minid`", + "help_text": "Specifies the minimum percent identity used to classify an ARG hit using BLAST alignment.\n\nFor more information check the ABRicate [documentation](https://github.com/tseemann/abricate).\n\n> Modifies tool parameter(s):\n> - ABRicate: `--minid`", "fa_icon": "far fa-arrow-alt-circle-down", "minimum": 1, "maximum": 100 @@ -819,50 +1062,95 @@ "type": "integer", "default": 80, "description": "Minimum percent coverage of alignment required for a hit to be considered.", - "help_text": "Specifies the minimum coverage of the nucleotide sequence to be assigned an ARG hit using BLAST alignment. In the ABRicate matrix, an absent gene is assigned (`.`) and if present, it is assigned the estimated coverage (`#`).\n\nFor more information check ABRicate [documentation](https://github.com/tseemann/abricate).\n\n> Modifies tool parameter(s):\n> - ABRicate: `--mincov`", + "help_text": "Specifies the minimum coverage of the nucleotide sequence to be assigned an ARG hit using BLAST alignment. In the ABRicate matrix, an absent gene is assigned (`.`) and if present, it is assigned the estimated coverage (`#`).\n\nFor more information check the ABRicate [documentation](https://github.com/tseemann/abricate).\n\n> Modifies tool parameter(s):\n> - ABRicate: `--mincov`", "fa_icon": "far fa-arrow-alt-circle-down", "minimum": 1, "maximum": 100 } + } + }, + "arg_hamronization": { + "title": "ARG: hAMRonization", + "type": "object", + "description": "Influences parameters required for the ARG summary by hAMRonization.", + "default": "", + "properties": { + "arg_hamronization_summarizeformat": { + "type": "string", + "default": "tsv", + "enum": ["interactive", "tsv", "json"], + "help_text": "Specifies which summary report format to apply with `hamronize summarize`: tsv, json or interactive (html)\n\n> Modifies tool parameter(s)\n> - hamronize summarize: `-t`, `--summary_type`", + "description": "Specifies summary output format.", + "fa_icon": "far fa-file-code" + } + }, + "fa_icon": "fas fa-bacteria", + "help_text": "" + }, + "arg_argnorm": { + "title": "ARG: argNorm", + "type": "object", + "description": "Influences parameters required for the normalization of ARG annotations by argNorm. More info: https://github.com/BigDataBiology/argNorm", + "default": "", + "properties": { + "arg_skip_argnorm": { + "type": "boolean", + "fa_icon": "fas fa-ban", + "description": "Skip argNorm during ARG screening." + } }, - "help_text": "ABRicate screens for antimicrobial resistance or virulence genes based on NCBI, CARD, ARG-ANNOT, Resfinder, MEGARES, EcOH, PlasmidFinder, Ecoli_VF and VFDB databases.\n\nDocumentation: https://github.com/tseemann/abricate" + "fa_icon": "fas fa-bacteria" + }, + "bgc_general_options": { + "title": "BGC: general options", + "type": "object", + "description": "These parameters influence general BGC settings like minimum input sequence length.", + "default": "", + "fa_icon": "fa fa-sliders", + "properties": { + "bgc_mincontiglength": { + "type": "integer", + "default": 3000, + "fa_icon": "fas fa-ruler-horizontal", + "description": "Specify the minimum length of contigs that go into BGC screening.", + "help_text": "Specify the minimum length of contigs that go into BGC screening.\n\nIf BGC screening is turned on, nf-core/funcscan will generate for each input sample a second FASTA file of only contigs that are longer than the specified minimum length.\nThis is due to an (approximate) 'biological' minimum length that nucleotide sequences would need to have to code for a valid BGC (e.g. not on the edge of a contig), as well as to speeding up BGC screening sections of the pipeline by screening only meaningful contigs.\n\nNote this only affects BGCs. For ARG and AMPs no filtering is performed and all contigs are screened." + }, + "bgc_savefilteredcontigs": { + "type": "boolean", + "fa_icon": "fas fa-save", + "description": "Specify to save the length-filtered (unannotated) FASTAs used for BGC screening." + } + } }, "bgc_antismash": { "title": "BGC: antiSMASH", "type": "object", - "description": "Biosynthetic gene cluster detection", + "description": "Biosynthetic gene cluster detection. More info: https://docs.antismash.secondarymetabolites.org", "default": "", "properties": { "bgc_skip_antismash": { "type": "boolean", - "description": "Skip antiSMASH during the BGC screening", + "description": "Skip antiSMASH during the BGC screening.", "fa_icon": "fas fa-ban" }, - "bgc_antismash_databases": { + "bgc_antismash_db": { "type": "string", "description": "Path to user-defined local antiSMASH database.", - "fa_icon": "fas fa-layer-group", - "help_text": "It is recommend to pre-download the antiSMASH databases to your machine and pass the path of it to this parameter, as this can take a long time to download - particularly when running lots of pipeline runs. \n\nSee the pipeline [documentation](https://nf-co.re/funcscan/usage#antismash) for details on how to download this. If running with docker or singularity, please also check `--bgc_antismash_installationdirectory` for important information." + "fa_icon": "fas fa-database", + "help_text": "It is recommend to pre-download the antiSMASH databases to your machine and pass the path of it to this parameter, as this can take a long time to download - particularly when running lots of pipeline runs.\n\nThe contents of the database directory should include directories such as `as-js/`, `clusterblast/`, `clustercompare/` etc. in the top level.\n\nSee the pipeline [documentation](https://nf-co.re/funcscan/usage#antismash-1) for details on how to download this. If running with docker or singularity, please also check `--bgc_antismash_installdir` for important information." }, - "bgc_antismash_installationdirectory": { + "bgc_antismash_installdir": { "type": "string", "description": "Path to user-defined local antiSMASH directory. Only required when running with docker/singularity.", "fa_icon": "far fa-folder-open", - "help_text": "This is required when running with **docker and singularity** (not required for conda), due to attempted 'modifications' of files during database checks in the installation directory, something that cannot be done in immutable docker/singularity containers.\n\nTherefore, a local installation directory needs to be mounted (including all modified files from the downloading step) to the container as a workaround." - }, - "bgc_antismash_sampleminlength": { - "type": "integer", - "default": 1000, - "description": "Minimum longest-contig length a sample must have to be screened with antiSMASH.", - "fa_icon": "fas fa-ruler-horizontal", - "help_text": "This specifies the minimum length that the longest contig must have for the entire sample to be screened by antiSMASH.\n\nAny samples that do not reach this length will be not be sent to antiSMASH, therefore you will not receive output for these samples in your `--outdir`.\n\n> ⚠️ This is not the same as `--bgc_antismash_contigminlength`, which specifies to only analyse contigs above that threshold but _within_ a sample that has already passed `--bgc_antismash_sampleminlength` sample filter!" + "help_text": "This is required when running with **docker and singularity** (not required for conda), due to attempted 'modifications' of files during database checks in the installation directory, something that cannot be done in immutable docker/singularity containers.\n\nTherefore, a local installation directory needs to be mounted (including all modified files from the downloading step) to the container as a workaround.\n\nThe contents of the installation directory should include directories such as `common/` `config/` and files such as `custom_typing.py` `custom_typing.pyi` etc. in the top level.\n\nSee the pipeline [documentation](https://nf-co.re/funcscan/usage#antismash-1) for details on how to download this. If running with docker or singularity, please also check `--bgc_antismash_installdir` for important information." }, "bgc_antismash_contigminlength": { "type": "integer", - "default": 1000, + "default": 3000, "description": "Minimum length a contig must have to be screened with antiSMASH.", "fa_icon": "fas fa-align-center", - "help_text": "This specifies the minimum length that a contig must have for the contig to be screened by antiSMASH.\n\nFor more information see the antiSMASH [documentation](https://docs.antismash.secondarymetabolites.org/).\n\nThis will only apply to samples that are screened with antiSMASH (i.e., those samples that have not been removed by `--bgc_antismash_sampleminlength`).\n\nYou may wish to increase this value compared to that of `--bgc_antismash_sampleminlength`, in cases where you wish to screen higher-quality (i.e., longer) contigs, or speed up runs by not screening lower quality/less informative contigs.\n\n> Modifies tool parameter(s):\n> - antiSMASH: `--minlength`" + "help_text": "This specifies the minimum length that a contig must have for the contig to be screened by antiSMASH.\n\nFor more information see the antiSMASH [documentation](https://docs.antismash.secondarymetabolites.org/).\n\nThis will only apply to samples that are screened with antiSMASH (i.e., those samples that have not been removed by `--bgc_antismash_sampleminlength`).\n\nYou may wish to increase this value compared to that of `--bgc_antismash_sampleminlength`, in cases where you wish to screen higher-quality (i.e. longer) contigs, or speed up runs by not screening lower quality/less informative contigs.\n\n> Modifies tool parameter(s):\n> - antiSMASH: `--minlength`" }, "bgc_antismash_cbgeneral": { "type": "boolean", @@ -874,7 +1162,7 @@ "type": "boolean", "description": "Turn on clusterblast comparison against known gene clusters from the MIBiG database.", "fa_icon": "fas fa-puzzle-piece", - "help_text": "This will turn on comparing identified clusters against known gene clusters from the MIBiG database using the clusterblast algorithm.\n\n[MIBiG](https://mibig.secondarymetabolites.org/) is a curated datbase of experimentally characterised gene clusters and with rich associated metadata.\n\nFor more information see the antiSMASH [documentation](https://docs.antismash.secondarymetabolites.org/).\n\n> Modifies tool parameter(s):\n> - antiSMASH: `--cb-knownclusters`" + "help_text": "This will turn on comparing identified clusters against known gene clusters from the MIBiG database using the clusterblast algorithm.\n\n[MIBiG](https://mibig.secondarymetabolites.org/) is a curated database of experimentally characterised gene clusters and with rich associated metadata.\n\nFor more information see the antiSMASH [documentation](https://docs.antismash.secondarymetabolites.org/).\n\n> Modifies tool parameter(s):\n> - antiSMASH: `--cb-knownclusters`" }, "bgc_antismash_cbsubclusters": { "type": "boolean", @@ -892,55 +1180,76 @@ "type": "boolean", "description": "Generate phylogenetic trees of secondary metabolite group orthologs.", "fa_icon": "fas fa-tree", - "help_text": "Turning this on will activate the generation of additional functional and phyogenetic analysis of genes, via comparison against databases of protein orthologs.\n\nFor more information see the antiSMASH [documentation](https://docs.antismash.secondarymetabolites.org/).\n\n> Modifies tool parameter(s):\n> - antiSMASH: `--cb-smcog-trees`" + "help_text": "Turning this on will activate the generation of additional functional and phylogenetic analysis of genes, via comparison against databases of protein orthologs.\n\nFor more information see the antiSMASH [documentation](https://docs.antismash.secondarymetabolites.org/).\n\n> Modifies tool parameter(s):\n> - antiSMASH: `--cb-smcog-trees`" }, "bgc_antismash_hmmdetectionstrictness": { "type": "string", "default": "relaxed", - "description": "Defines which level of strictness to use for HMM-based cluster detection", - "help_text": "Defines which level of strictness to use for HMM-based cluster detection. \n\nThese correspond to screening of different groups of 'how well-defined' clusters are. For example, `loose` will include screening for 'poorly defined' clusters (e.g. saccharides), `relaxed` for partially present clusters (e.g. certain types of NRPS), whereas `strict` will screen for well-defined clusters such as Ketosynthases.\n\nYou can see the rules for the levels of strictness [here](https://github.com/antismash/antismash/tree/master/antismash/detection/hmm_detection/cluster_rules).\n\nFor more information see the antiSMASH [documentation](https://docs.antismash.secondarymetabolites.org/).\n\n> Modifies tool parameter(s):\n> - antiSMASH: `--hmmdetection-strictness`", + "description": "Defines which level of strictness to use for HMM-based cluster detection.", + "help_text": "Levels of strictness correspond to screening different groups of 'how well-defined' clusters are. For example, `loose` will include screening for 'poorly defined' clusters (e.g. saccharides), `relaxed` for partially present clusters (e.g. certain types of NRPS), whereas `strict` will screen for well-defined clusters such as Ketosynthases.\n\nYou can see the rules for the levels of strictness [here](https://github.com/antismash/antismash/tree/master/antismash/detection/hmm_detection/cluster_rules).\n\nFor more information see the antiSMASH [documentation](https://docs.antismash.secondarymetabolites.org/).\n\n> Modifies tool parameter(s):\n> - antiSMASH: `--hmmdetection-strictness`", "fa_icon": "fas fa-search", "enum": ["relaxed", "strict", "loose"] }, + "bgc_antismash_pfam2go": { + "type": "boolean", + "default": false, + "description": "Run Pfam to Gene Ontology mapping module.", + "help_text": "This maps the proteins to Pfam database to annotate BGC modules with functional information based on the protein families they contain. For more information see the antiSMASH [documentation](https://docs.antismash.secondarymetabolites.org/).\n\n> Modifies tool parameter(s):\n> - antiSMASH: `--pfam2go`", + "fa_icon": "fas fa-search" + }, + "bgc_antismash_rre": { + "type": "boolean", + "default": false, + "description": "Run RREFinder precision mode on all RiPP gene clusters.", + "help_text": "This enables the prediction of regulatory elements on the BGC that help in the control of protein expression. For more information see the antiSMASH [documentation](https://docs.antismash.secondarymetabolites.org/).\n\n> Modifies tool parameter(s):\n> - antiSMASH: `--rre`", + "fa_icon": "fas fa-search" + }, "bgc_antismash_taxon": { "type": "string", "default": "bacteria", - "description": "Specify which taxonomic classification of input sequence to use", + "description": "Specify which taxonomic classification of input sequence to use.", "help_text": "This specifies which set of secondary metabolites to screen for, based on the taxon type the secondary metabolites are from.\n\nThis will run different pipelines depending on whether the input sequences are from bacteria or fungi.\n\nFor more information see the antiSMASH [documentation](https://docs.antismash.secondarymetabolites.org/).\n\n> Modifies tool parameter(s):\n> - antiSMASH: `--taxon`", "fa_icon": "fas fa-bacteria", "enum": ["bacteria", "fungi"] + }, + "bgc_antismash_tfbs": { + "type": "boolean", + "default": false, + "description": "Run TFBS finder on all gene clusters.", + "help_text": "This enables the prediction of transcription factor binding sites which control the gene expression. For more information see the antiSMASH [documentation](https://docs.antismash.secondarymetabolites.org/).\n\n> Modifies tool parameter(s):\n> - antiSMASH: `--tfbs`", + "fa_icon": "fas fa-search" } }, - "fa_icon": "fas fa-tools", - "help_text": "The antibiotics and Secondary Metabolite Analysis SHell (antiSMASH) carries out a genome-wide screening, annotation and analysis of secondary metabolite biosynthesis gene clusters in bacterial and fungal genomes. \n\nDocumentation: https://antismash.secondarymetabolites.org/#!/about" + "fa_icon": "fa fa-sliders" }, "bgc_deepbgc": { - "title": "BGC: deepBGC", + "title": "BGC: DeepBGC", "type": "object", - "description": "A deep learning genome-mining strategy for biosynthetic gene cluster prediction", + "description": "A deep learning genome-mining strategy for biosynthetic gene cluster prediction. More info: https://github.com/Merck/deepbgc/tree/master/deepbgc", "default": "", "properties": { "bgc_skip_deepbgc": { "type": "boolean", "fa_icon": "fas fa-ban", - "description": "Skip deepBGC during the BGC screening." + "description": "Skip DeepBGC during the BGC screening." }, - "bgc_deepbgc_database": { + "bgc_deepbgc_db": { "type": "string", - "fa_icon": "fas fa-layer-group", - "description": "Path to local deepBGC database folder." + "fa_icon": "fas fa-database", + "description": "Path to local DeepBGC database folder.", + "help_text": "The contents of the database directory should include directories such as `common`, `0.1.0` in the top level.\n\nFor more information see the DeepBGC [documentation](https://github.com/Merck/deepbgc).\n\n> Modifies tool parameter(s)\n> - DeepBGC: environment variable `DEEPBGC_DOWNLOADS_DIR`" }, "bgc_deepbgc_score": { "type": "number", "default": 0.5, "description": "Average protein-wise DeepBGC score threshold for extracting BGC regions from Pfam sequences.", "fa_icon": "fas fa-list-ol", - "help_text": "The DeepBGC score threshold for extracting BGC regions from Pfam sequences based on average protein-wise value. This is a prediction score that the domain is a part of a BGC.\n\nFor more information see the DeepBGC [documentation](https://github.com/Merck/deepbgc).\n\n> Modifies tool parameter(s)\n> - DeepBGC: `--score`" + "help_text": "The DeepBGC score threshold for extracting BGC regions from Pfam sequences based on average protein-wise value.\n\nFor more information see the DeepBGC [documentation](https://github.com/Merck/deepbgc).\n\n> Modifies tool parameter(s)\n> - DeepBGC: `--score`" }, "bgc_deepbgc_prodigalsinglemode": { "type": "boolean", "description": "Run DeepBGC's internal Prodigal step in `single` mode to restrict detecting genes to long contigs", - "help_text": "By default DeepBGC's Prodigal runs in 'single genome' mode that requires sequence lengths to be equal or longer than 20000 characters.\n\nHowever, more fragmented reads from MAGs often result in contigs shorter than this. Therefore, nf-core/funcscan will run with the `meta` mode by default, but providing this parameter allows to override this and run in single genome mode again.\n\nFor more information check Prodigal [documentation](https://github.com/hyattpd/prodigal/wiki).\n\n> Modifies tool parameter(s)\n> - DeepBGC: `--prodigal-meta-mode`", + "help_text": "By default DeepBGC's Prodigal runs in 'single genome' mode that requires sequence lengths to be equal or longer than 20000 characters.\n\nHowever, more fragmented reads from MAGs often result in contigs shorter than this. Therefore, nf-core/funcscan will run with the `meta` mode by default, but providing this parameter allows to override this and run in single genome mode again.\n\nFor more information check the Prodigal [documentation](https://github.com/hyattpd/prodigal/wiki).\n\n> Modifies tool parameter(s)\n> - DeepBGC: `--prodigal-meta-mode`", "fa_icon": "fas fa-compress-alt" }, "bgc_deepbgc_mergemaxproteingap": { @@ -955,7 +1264,7 @@ "default": 0, "description": "Merge detected BGCs within given number of nucleotides.", "fa_icon": "fas fa-angle-double-up", - "help_text": "Merge detected BGCs within given number of proteins.\n\nFor more information see the DeepBGC [documentation](https://github.com/Merck/deepbgc).\n\n> Modifies tool parameter(s)\n> - DeepBGC: `--merge-max-nucl-gap`" + "help_text": "Merge detected BGCs within given number of nucleotides.\n\nFor more information see the DeepBGC [documentation](https://github.com/Merck/deepbgc).\n\n> Modifies tool parameter(s)\n> - DeepBGC: `--merge-max-nucl-gap`" }, "bgc_deepbgc_minnucl": { "type": "integer", @@ -993,12 +1302,12 @@ "help_text": "DeepBGC classification score threshold for assigning classes to BGCs.\n\nFor more information see the DeepBGC [documentation](https://github.com/Merck/deepbgc).\n\n> Modifies tool parameter(s)\n> - DeepBGC: `--classifier-score`" } }, - "help_text": "DeepBGC screens for BGCs in bacterial and fungal genomes using deep learning.\n\nDocumentation: https://github.com/Merck/deepbgc/tree/master/deepbgc" + "fa_icon": "fa fa-sliders" }, "bgc_gecco": { - "title": "BGC: gecco", + "title": "BGC: GECCO", "type": "object", - "description": "Biosynthetic gene cluster detection", + "description": "Biosynthetic gene cluster detection using Conditional Random Fields (CRFs). More info: https://gecco.embl.de", "default": "", "properties": { "bgc_skip_gecco": { @@ -1017,7 +1326,7 @@ "default": 3, "description": "The minimum number of coding sequences a valid cluster must contain.", "fa_icon": "fas fa-align-right", - "help_text": "Specify the number of consecutive genes a hit must have to be considered a part of a possible BGC region during BGC extraction.\n\nFor more information see the GECCO [documentation](https://github.com/zellerlab/GECCO).\n\n> Modifies tool parameter(s):\n> - GECCO: `--cds`" + "help_text": "Specify the number of consecutive genes a hit must have to be considered as part of a possible BGC region during BGC extraction.\n\nFor more information see the GECCO [documentation](https://github.com/zellerlab/GECCO).\n\n> Modifies tool parameter(s):\n> - GECCO: `--cds`" }, "bgc_gecco_pfilter": { "type": "number", @@ -1031,7 +1340,7 @@ "default": 0.8, "description": "The probability threshold for cluster detection.", "fa_icon": "fas fa-angle-double-up", - "help_text": "Specify the minimum probability a predicted gene must have to be considered a part of a BGC during BGC extraction.\n\nReducing this value may increase number and length of hits, but will reduce the accuracy of the predictions.\n\nFor more information see the GECCO [documentation](https://github.com/zellerlab/GECCO).\n\n> Modifies tool parameter(s):\n> - GECCO: `--threshold`" + "help_text": "Specify the minimum probability a predicted gene must have to be considered as part of a BGC during BGC extraction.\n\nReducing this value may increase number and length of hits, but will reduce the accuracy of the predictions.\n\nFor more information see the GECCO [documentation](https://github.com/zellerlab/GECCO).\n\n> Modifies tool parameter(s):\n> - GECCO: `--threshold`" }, "bgc_gecco_edgedistance": { "type": "integer", @@ -1040,96 +1349,47 @@ "help_text": "The minimum number of annotated genes that must separate a possible BGC cluster from the edge. Edge clusters will still be included if they are longer. A lower number will increase the number of false positives on small contigs. Used during BGC extraction.\n\nFor more information see the GECCO [documentation](https://github.com/zellerlab/GECCO).\n\n> Modifies tool parameter(s):\n> - GECCO: `--edge-distance`", "fa_icon": "fas fa-ruler-horizontal" } - } + }, + "fa_icon": "fa fa-sliders" }, "bgc_hmmsearch": { - "title": "BGC: HMMSearch", + "title": "BGC: hmmsearch", "type": "object", - "description": "Biosynthetic Gene Cluster detection based on predefined HMM models", + "description": "Biosynthetic Gene Cluster detection based on predefined HMM models. This tool implements methods using probabilistic models called profile hidden Markov models (profile HMMs) to search against a sequence database. More info: http://eddylab.org/software/hmmer/Userguide.pdf", "default": "", "properties": { - "bgc_skip_hmmsearch": { + "bgc_run_hmmsearch": { "type": "boolean", - "description": "Skip HMMsearch during BGC-screening.", + "description": "Run hmmsearch during BGC screening.", + "help_text": "hmmsearch is not run by default because HMM model files must be provided by the user with the flag `bgc_hmmsearch_models`.", "fa_icon": "fas fa-ban" }, "bgc_hmmsearch_models": { "type": "string", "description": "Specify path to the BGC hmm model file(s) to search against. Must have quotes if wildcard used.", - "help_text": "HMMSearch performs biosequence analysis using profile hidden Markov Models.\nThe models are specified in`.hmm` files that are specified with this parameter\n\ne.g. \n\n```\n--bgc_hmmsearch_models '////*.hmm'\n```\n\nYou must wrap the path in quotes if you use a wildcard, to ensure Nextflow expansion _not_ bash!\n\nFor more information check HMMER [documentation](http://hmmer.org/).", + "help_text": "hmmsearch performs biosequence analysis using profile hidden Markov Models.\nThe models are specified in`.hmm` files that are specified with this parameter, e.g.:\n\n```\n--bgc_hmmsearch_models '////*.hmm'\n```\n\nYou must wrap the path in quotes if you use a wildcard, to ensure Nextflow expansion _not_ bash! When using quotes, the absolute path to the HMM file(s) has to be given.\n\nFor more information check the HMMER [documentation](http://hmmer.org/).", "fa_icon": "fas fa-layer-group" }, "bgc_hmmsearch_savealignments": { "type": "boolean", - "help_text": "Save a multiple alignment of all significant hits (those satisfying inclusion thresholds) to a file\n\nFor more information check HMMER [documentation](http://hmmer.org/).\n\n> Modifies tool parameter(s):\n> - HMMsearch: `-A`", + "help_text": "Save a multiple alignment of all significant hits (those satisfying inclusion thresholds) to a file.\n\nFor more information check the HMMER [documentation](http://hmmer.org/).\n\n> Modifies tool parameter(s):\n> - hmmsearch: `-A`", "description": "Saves a multiple alignment of all significant hits to a file.", - "fa_icon": "far fa-save" + "fa_icon": "fas fa-save" }, "bgc_hmmsearch_savetargets": { "type": "boolean", - "help_text": "Save a simple tabular (space-delimited) file summarizing the per-target output, with one data line per homologous target sequence found.\n\nFor more information check HMMER [documentation](http://hmmer.org/).\n\n> Modifies tool parameter(s)\n> - HMMsearch: `--tblout`", + "help_text": "Save a simple tabular (space-delimited) file summarizing the per-target output, with one data line per homologous target sequence found.\n\nFor more information check the HMMER [documentation](http://hmmer.org/).\n\n> Modifies tool parameter(s)\n> - hmmsearch: `--tblout`", "description": "Save a simple tabular file summarising the per-target output.", - "fa_icon": "far fa-save" + "fa_icon": "fas fa-save" }, "bgc_hmmsearch_savedomains": { "type": "boolean", - "help_text": "Save a simple tabular (space-delimited) file summarizing the per-domain output, with one data line per homologous domain detected in a query sequence for each homologous model.\n\nFor more information check HMMER [documentation](http://hmmer.org/).\n\n> Modifies tool parameter(s)\n> - HMMsearch:`--domtblout`", + "help_text": "Save a simple tabular (space-delimited) file summarizing the per-domain output, with one data line per homologous domain detected in a query sequence for each homologous model.\n\nFor more information check the HMMER [documentation](http://hmmer.org/).\n\n> Modifies tool parameter(s)\n> - hmmsearch:`--domtblout`", "description": "Save a simple tabular file summarising the per-domain output.", - "fa_icon": "far fa-save" + "fa_icon": "fas fa-save" } }, - "help_text": "HMMER/hmmsearch is used for searching sequence databases for sequence homologs, and for making sequence alignments. It implements methods using probabilistic models called profile hidden Markov models (profile HMMs). `hmmsearch` is used to search one or more profiles against a sequence database.\n\nFor more information check HMMER [documentation](http://hmmer.org/)." - }, - "reporting": { - "title": "Reporting", - "type": "object", - "description": "Influences parameters required for the reporting workflow.", - "default": "", - "properties": { - "arg_hamronization_summarizeformat": { - "type": "string", - "default": "tsv", - "enum": ["interactive", "tsv", "json"], - "help_text": "Specifies which summary report format to generate with `hamronize summarize`: tsv, json or interactive (html)\n\n> Modifies tool parameter(s)\n> - HMMsearch: `-t`, `--summary_type`", - "description": "Specifies summary output format", - "fa_icon": "far fa-file-code" - } - }, - "fa_icon": "fas fa-file-import", - "help_text": "" - }, - "reference_genome_options": { - "title": "Reference genome options", - "type": "object", - "fa_icon": "fas fa-dna", - "description": "Reference genome related files and options required for the workflow.", - "properties": { - "genome": { - "type": "string", - "description": "Name of iGenomes reference.", - "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details.", - "hidden": true - }, - "fasta": { - "type": "string", - "format": "file-path", - "exists": false, - "mimetype": "text/plain", - "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", - "description": "Path to FASTA genome file.", - "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", - "fa_icon": "far fa-file-code", - "hidden": true - }, - "igenomes_ignore": { - "type": "boolean", - "description": "Do not load the iGenomes reference config.", - "fa_icon": "fas fa-ban", - "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`.", - "hidden": true - } - } + "fa_icon": "fa fa-sliders" }, "institutional_config_options": { "title": "Institutional config options", @@ -1322,14 +1582,18 @@ "description": "Validation of parameters in lenient more.", "hidden": true, "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." + }, + "pipelines_testdata_base_path": { + "type": "string", + "fa_icon": "far fa-check-circle", + "description": "Base URL or local path to location of pipeline test dataset files", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/", + "hidden": true } } } }, "allOf": [ - { - "$ref": "#/definitions/annotation_pyrodigal" - }, { "$ref": "#/definitions/input_output_options" }, @@ -1337,7 +1601,16 @@ "$ref": "#/definitions/screening_type_activation" }, { - "$ref": "#/definitions/annotation" + "$ref": "#/definitions/taxonomic_classification_general_options" + }, + { + "$ref": "#/definitions/taxonomic_classification_mmseqs2_databases" + }, + { + "$ref": "#/definitions/taxonomic_classification_mmseqs2_taxonomy" + }, + { + "$ref": "#/definitions/annotation_general_options" }, { "$ref": "#/definitions/annotation_bakta" @@ -1348,6 +1621,9 @@ { "$ref": "#/definitions/annotation_prodigal" }, + { + "$ref": "#/definitions/annotation_pyrodigal" + }, { "$ref": "#/definitions/database_downloading_options" }, @@ -1364,7 +1640,10 @@ "$ref": "#/definitions/amp_macrel" }, { - "$ref": "#/definitions/amp_ampcombi" + "$ref": "#/definitions/amp_ampcombi2_parsetables" + }, + { + "$ref": "#/definitions/amp_ampcombi2_cluster" }, { "$ref": "#/definitions/arg_amrfinderplus" @@ -1382,22 +1661,25 @@ "$ref": "#/definitions/arg_abricate" }, { - "$ref": "#/definitions/bgc_antismash" + "$ref": "#/definitions/arg_hamronization" }, { - "$ref": "#/definitions/bgc_deepbgc" + "$ref": "#/definitions/arg_argnorm" }, { - "$ref": "#/definitions/bgc_gecco" + "$ref": "#/definitions/bgc_general_options" }, { - "$ref": "#/definitions/bgc_hmmsearch" + "$ref": "#/definitions/bgc_antismash" }, { - "$ref": "#/definitions/reporting" + "$ref": "#/definitions/bgc_deepbgc" }, { - "$ref": "#/definitions/reference_genome_options" + "$ref": "#/definitions/bgc_gecco" + }, + { + "$ref": "#/definitions/bgc_hmmsearch" }, { "$ref": "#/definitions/institutional_config_options" diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 00000000..03cfcfa8 --- /dev/null +++ b/nf-test.config @@ -0,0 +1,19 @@ +config { + // location for all nf-tests + testsDir "tests/" + + // nf-test directory including temporary files for each test + workDir ".nf-test" + + // location of library folder that is added automatically to the classpath + libDir "tests/pipeline/lib/" + + // location of an optional nextflow.config file specific for executing tests + configFile "nextflow.config" + + // run all test with the defined docker profile from the main nextflow.config + profile "" + + // add Nextflow options + //options "-resume" +} diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 0d62beb6..00000000 --- a/pyproject.toml +++ /dev/null @@ -1,10 +0,0 @@ -# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. -# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. -[tool.black] -line-length = 120 -target_version = ["py37", "py38", "py39", "py310"] - -[tool.isort] -profile = "black" -known_first_party = ["nf_core"] -multi_line_output = 3 diff --git a/subworkflows/local/amp.nf b/subworkflows/local/amp.nf index 6add2e7f..88f75393 100644 --- a/subworkflows/local/amp.nf +++ b/subworkflows/local/amp.nf @@ -2,120 +2,155 @@ Run AMP screening tools */ -include { MACREL_CONTIGS } from '../../modules/nf-core/macrel/contigs/main' -include { HMMER_HMMSEARCH as AMP_HMMER_HMMSEARCH } from '../../modules/nf-core/hmmer/hmmsearch/main' -include { AMPLIFY_PREDICT } from '../../modules/nf-core/amplify/predict/main' -include { AMPIR } from '../../modules/nf-core/ampir/main' -include { DRAMP_DOWNLOAD } from '../../modules/local/dramp_download' -include { AMPCOMBI } from '../../modules/nf-core/ampcombi/main' -include { GUNZIP as GUNZIP_MACREL_PRED ; GUNZIP as GUNZIP_HMMER ; GUNZIP as GUNZIP_MACREL_ORFS } from '../../modules/nf-core/gunzip/main' -include { TABIX_BGZIP } from '../../modules/nf-core/tabix/bgzip/main' +include { MACREL_CONTIGS } from '../../modules/nf-core/macrel/contigs/main' +include { HMMER_HMMSEARCH as AMP_HMMER_HMMSEARCH } from '../../modules/nf-core/hmmer/hmmsearch/main' +include { AMPLIFY_PREDICT } from '../../modules/nf-core/amplify/predict/main' +include { AMPIR } from '../../modules/nf-core/ampir/main' +include { DRAMP_DOWNLOAD } from '../../modules/local/dramp_download' +include { AMPCOMBI2_PARSETABLES } from '../../modules/nf-core/ampcombi2/parsetables' +include { AMPCOMBI2_COMPLETE } from '../../modules/nf-core/ampcombi2/complete' +include { AMPCOMBI2_CLUSTER } from '../../modules/nf-core/ampcombi2/cluster' +include { GUNZIP as GUNZIP_MACREL_PRED ; GUNZIP as GUNZIP_MACREL_ORFS } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as AMP_GUNZIP_HMMER_HMMSEARCH } from '../../modules/nf-core/gunzip/main' +include { TABIX_BGZIP as AMP_TABIX_BGZIP } from '../../modules/nf-core/tabix/bgzip/main' +include { MERGE_TAXONOMY_AMPCOMBI } from '../../modules/local/merge_taxonomy_ampcombi' workflow AMP { take: - contigs // tuple val(meta), path(contigs) - faa // tuple val(meta), path(PROKKA/PRODIGAL.out.faa) + fastas // tuple val(meta), path(contigs) + faas // tuple val(meta), path(PROKKA/PRODIGAL.out.faa) + tsvs // tuple val(meta), path(MMSEQS_CREATETSV.out.tsv) + gbks // tuple val(meta), path(ANNOTATION_ANNOTATION_TOOL.out.gbk) main: ch_versions = Channel.empty() ch_ampresults_for_ampcombi = Channel.empty() ch_ampcombi_summaries = Channel.empty() ch_macrel_faa = Channel.empty() + ch_ampcombi_complete = Channel.empty() + ch_ampcombi_for_cluster = Channel.empty() // When adding new tool that requires FAA, make sure to update conditions // in funcscan.nf around annotation and AMP subworkflow execution // to ensure annotation is executed! - ch_faa_for_amplify = faa - ch_faa_for_amp_hmmsearch = faa - ch_faa_for_ampir = faa - ch_faa_for_ampcombi = faa + ch_faa_for_amplify = faas + ch_faa_for_amp_hmmsearch = faas + ch_faa_for_ampir = faas + ch_faa_for_ampcombi = faas + ch_gbk_for_ampcombi = gbks // AMPLIFY if ( !params.amp_skip_amplify ) { AMPLIFY_PREDICT ( ch_faa_for_amplify, [] ) - ch_versions = ch_versions.mix(AMPLIFY_PREDICT.out.versions) - ch_ampresults_for_ampcombi = ch_ampresults_for_ampcombi.mix(AMPLIFY_PREDICT.out.tsv) + ch_versions = ch_versions.mix( AMPLIFY_PREDICT.out.versions ) + ch_ampresults_for_ampcombi = ch_ampresults_for_ampcombi.mix( AMPLIFY_PREDICT.out.tsv ) } // MACREL if ( !params.amp_skip_macrel ) { - MACREL_CONTIGS ( contigs ) - ch_versions = ch_versions.mix(MACREL_CONTIGS.out.versions) + MACREL_CONTIGS ( fastas ) + ch_versions = ch_versions.mix( MACREL_CONTIGS.out.versions ) GUNZIP_MACREL_PRED ( MACREL_CONTIGS.out.amp_prediction ) GUNZIP_MACREL_ORFS ( MACREL_CONTIGS.out.all_orfs ) - ch_versions = ch_versions.mix(GUNZIP_MACREL_PRED.out.versions) - ch_versions = ch_versions.mix(GUNZIP_MACREL_ORFS.out.versions) - ch_ampresults_for_ampcombi = ch_ampresults_for_ampcombi.mix(GUNZIP_MACREL_PRED.out.gunzip) - ch_macrel_faa = ch_macrel_faa.mix(GUNZIP_MACREL_ORFS.out.gunzip) - ch_faa_for_ampcombi = ch_faa_for_ampcombi.mix(ch_macrel_faa) + ch_versions = ch_versions.mix( GUNZIP_MACREL_PRED.out.versions ) + ch_versions = ch_versions.mix( GUNZIP_MACREL_ORFS.out.versions ) + ch_ampresults_for_ampcombi = ch_ampresults_for_ampcombi.mix( GUNZIP_MACREL_PRED.out.gunzip ) + ch_macrel_faa = ch_macrel_faa.mix( GUNZIP_MACREL_ORFS.out.gunzip ) + ch_faa_for_ampcombi = ch_faa_for_ampcombi.mix( ch_macrel_faa ) } // AMPIR if ( !params.amp_skip_ampir ) { AMPIR ( ch_faa_for_ampir, params.amp_ampir_model, params.amp_ampir_minlength, 0.0 ) - ch_versions = ch_versions.mix(AMPIR.out.versions) - ch_ampresults_for_ampcombi = ch_ampresults_for_ampcombi.mix(AMPIR.out.amps_tsv) + ch_versions = ch_versions.mix( AMPIR.out.versions ) + ch_ampresults_for_ampcombi = ch_ampresults_for_ampcombi.mix( AMPIR.out.amps_tsv ) } // HMMSEARCH - if ( !params.amp_skip_hmmsearch ) { - if ( params.amp_hmmsearch_models ) { ch_amp_hmm_models = Channel.fromPath( params.amp_hmmsearch_models, checkIfExists: true ) } else { error('[nf-core/funcscan] error: hmm model files not found for --amp_hmmsearch_models! Please check input.') } + if ( params.amp_run_hmmsearch ) { + if ( params.amp_hmmsearch_models ) { ch_amp_hmm_models = Channel.fromPath( params.amp_hmmsearch_models, checkIfExists: true ) } else { error('[nf-core/funcscan] error: HMM model files not found for --amp_hmmsearch_models! Please check input.') } ch_amp_hmm_models_meta = ch_amp_hmm_models .map { file -> - def meta = [:] + def meta = [:] meta['id'] = file.extension == 'gz' ? file.name - '.hmm.gz' : file.name - '.hmm' - [ meta, file ] } - ch_in_for_amp_hmmsearch = ch_faa_for_amp_hmmsearch.combine(ch_amp_hmm_models_meta) - .map { - meta_faa, faa, meta_hmm, hmm -> - def meta_new = [:] - meta_new['id'] = meta_faa['id'] - meta_new['hmm_id'] = meta_hmm['id'] - [ meta_new, hmm, faa, params.amp_hmmsearch_savealignments, params.amp_hmmsearch_savetargets, params.amp_hmmsearch_savedomains ] - } + ch_in_for_amp_hmmsearch = ch_faa_for_amp_hmmsearch + .combine( ch_amp_hmm_models_meta ) + .map { + meta_faa, faa, meta_hmm, hmm -> + def meta_new = [:] + meta_new['id'] = meta_faa['id'] + meta_new['hmm_id'] = meta_hmm['id'] + [ meta_new, hmm, faa, params.amp_hmmsearch_savealignments, params.amp_hmmsearch_savetargets, params.amp_hmmsearch_savedomains ] + } AMP_HMMER_HMMSEARCH ( ch_in_for_amp_hmmsearch ) - ch_versions = ch_versions.mix(AMP_HMMER_HMMSEARCH.out.versions) + ch_versions = ch_versions.mix( AMP_HMMER_HMMSEARCH.out.versions ) + AMP_GUNZIP_HMMER_HMMSEARCH ( AMP_HMMER_HMMSEARCH.out.output ) + ch_versions = ch_versions.mix( AMP_GUNZIP_HMMER_HMMSEARCH.out.versions ) + ch_AMP_GUNZIP_HMMER_HMMSEARCH = AMP_GUNZIP_HMMER_HMMSEARCH.out.gunzip + .map { meta, file -> + [ [id: meta.id], file ] + } + ch_ampresults_for_ampcombi = ch_ampresults_for_ampcombi.mix( ch_AMP_GUNZIP_HMMER_HMMSEARCH ) } - //AMPCOMBI + // AMPCOMBI2 ch_input_for_ampcombi = ch_ampresults_for_ampcombi .groupTuple() .join( ch_faa_for_ampcombi ) + .join( ch_gbk_for_ampcombi ) .multiMap{ input: [ it[0], it[1] ] faa: it[2] + gbk: it[3] } - // Checks if `--amp_database` is a user supplied path and if the path does not exist it goes to default, which downloads the DRAMP database once. - if ( params.amp_ampcombi_db ) { - ch_ampcombi_input_db = Channel - .fromPath( params.amp_ampcombi_db, checkIfExists: true ) } - else { - DRAMP_DOWNLOAD() - ch_ampcombi_input_db = DRAMP_DOWNLOAD.out.db + + if ( params.amp_ampcombi_db != null ) { + AMPCOMBI2_PARSETABLES ( ch_input_for_ampcombi.input, ch_input_for_ampcombi.faa, ch_input_for_ampcombi.gbk, params.amp_ampcombi_db ) + } else { + DRAMP_DOWNLOAD() + ch_versions = ch_versions.mix( DRAMP_DOWNLOAD.out.versions ) + ch_ampcombi_input_db = DRAMP_DOWNLOAD.out.db + AMPCOMBI2_PARSETABLES ( ch_input_for_ampcombi.input, ch_input_for_ampcombi.faa, ch_input_for_ampcombi.gbk, ch_ampcombi_input_db ) + } + ch_versions = ch_versions.mix( AMPCOMBI2_PARSETABLES.out.versions ) + + ch_ampcombi_summaries = AMPCOMBI2_PARSETABLES.out.tsv.map{ it[1] }.collect() + + AMPCOMBI2_COMPLETE ( ch_ampcombi_summaries ) + ch_versions = ch_versions.mix( AMPCOMBI2_COMPLETE.out.versions ) + + ch_ampcombi_complete = AMPCOMBI2_COMPLETE.out.tsv + .filter { file -> file.countLines() > 1 } + + if ( ch_ampcombi_complete != null ) { + AMPCOMBI2_CLUSTER ( ch_ampcombi_complete ) + ch_versions = ch_versions.mix( AMPCOMBI2_CLUSTER.out.versions ) + } else { + log.warn("[nf-core/funcscan] No AMP hits were found in the samples and so no clustering will be applied.") } - AMPCOMBI( ch_input_for_ampcombi.input, ch_input_for_ampcombi.faa, ch_ampcombi_input_db ) - ch_ampcombi_summaries = ch_ampcombi_summaries.mix(AMPCOMBI.out.csv) + // MERGE_TAXONOMY + if ( params.run_taxa_classification && ch_ampcombi_complete == null ) { + log.warn("[nf-core/funcscan] No AMP hits were found in the samples, therefore no Taxonomy will be merged ") + } else if ( params.run_taxa_classification && ch_ampcombi_complete != null ) { + ch_mmseqs_taxonomy_list = tsvs.map{ it[1] }.collect() - //AMPCOMBI concatenation - ch_ampcombi_summaries_out = ch_ampcombi_summaries - .multiMap{ - input: [ it[0] ] - summary: it[1] - } - - ch_tabix_input = Channel.of(['id':'ampcombi_complete_summary']) - .combine(ch_ampcombi_summaries_out.summary.collectFile(name: 'ampcombi_complete_summary.csv', keepHeader:true)) - - TABIX_BGZIP(ch_tabix_input) + MERGE_TAXONOMY_AMPCOMBI( AMPCOMBI2_CLUSTER.out.cluster_tsv, ch_mmseqs_taxonomy_list ) + ch_versions = ch_versions.mix( MERGE_TAXONOMY_AMPCOMBI.out.versions ) + + ch_tabix_input = Channel.of( [ 'id':'ampcombi_complete_summary_taxonomy' ] ) + .combine( MERGE_TAXONOMY_AMPCOMBI.out.tsv ) + + AMP_TABIX_BGZIP( ch_tabix_input ) + ch_versions = ch_versions.mix( AMP_TABIX_BGZIP.out.versions ) + } emit: versions = ch_versions - } diff --git a/subworkflows/local/annotation.nf b/subworkflows/local/annotation.nf new file mode 100644 index 00000000..c1c8e332 --- /dev/null +++ b/subworkflows/local/annotation.nf @@ -0,0 +1,95 @@ +/* + Run annotation tools +*/ + +include { PROKKA } from '../../modules/nf-core/prokka/main' +include { PRODIGAL } from '../../modules/nf-core/prodigal/main' +include { PYRODIGAL } from '../../modules/nf-core/pyrodigal/main' +include { BAKTA_BAKTADBDOWNLOAD } from '../../modules/nf-core/bakta/baktadbdownload/main' +include { BAKTA_BAKTA } from '../../modules/nf-core/bakta/bakta/main' +include { GUNZIP as GUNZIP_PRODIGAL_FNA } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_PRODIGAL_FAA } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_PRODIGAL_GBK } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_PYRODIGAL_FNA } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_PYRODIGAL_FAA } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_PYRODIGAL_GBK } from '../../modules/nf-core/gunzip/main' + +workflow ANNOTATION { + take: + fasta // tuple val(meta), path(contigs) + + main: + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + + if ( params.annotation_tool == "pyrodigal" || ( params.annotation_tool == "prodigal" && params.run_bgc_screening == true && ( !params.bgc_skip_antismash || !params.bgc_skip_deepbgc || !params.bgc_skip_gecco ) ) || ( params.annotation_tool == "prodigal" && params.run_amp_screening == true ) ) { // Need to use Pyrodigal for most BGC tools and AMPcombi because Prodigal GBK annotation format is incompatible with them. + + if ( params.annotation_tool == "prodigal" && params.run_bgc_screening == true && ( !params.bgc_skip_antismash || !params.bgc_skip_deepbgc || !params.bgc_skip_gecco ) ) { + log.warn("[nf-core/funcscan] Switching annotation tool to: Pyrodigal. This is because Prodigal annotations (in GBK format) are incompatible with antiSMASH, DeepBGC, and GECCO. If you specifically wish to run Prodigal instead, please skip antiSMASH, DeepBGC, and GECCO or provide a pre-annotated GBK file in the samplesheet.") + } else if ( params.annotation_tool == "prodigal" && params.run_amp_screening == true ) { + log.warn("[nf-core/funcscan] Switching annotation tool to: Pyrodigal. This is because Prodigal annotations (in GBK format) are incompatible with AMPcombi. If you specifically wish to run Prodigal instead, please skip AMP workflow or provide a pre-annotated GBK file in the samplesheet.") + } + + PYRODIGAL ( fasta, "gbk" ) + GUNZIP_PYRODIGAL_FAA ( PYRODIGAL.out.faa ) + GUNZIP_PYRODIGAL_FNA ( PYRODIGAL.out.fna) + GUNZIP_PYRODIGAL_GBK ( PYRODIGAL.out.annotations ) + ch_versions = ch_versions.mix(PYRODIGAL.out.versions) + ch_versions = ch_versions.mix(GUNZIP_PYRODIGAL_FAA.out.versions) + ch_versions = ch_versions.mix(GUNZIP_PYRODIGAL_FNA.out.versions) + ch_versions = ch_versions.mix(GUNZIP_PYRODIGAL_GBK.out.versions) + ch_annotation_faa = GUNZIP_PYRODIGAL_FAA.out.gunzip + ch_annotation_fna = GUNZIP_PYRODIGAL_FNA.out.gunzip + ch_annotation_gbk = GUNZIP_PYRODIGAL_GBK.out.gunzip + + } else if ( params.annotation_tool == "prodigal" ) { + + PRODIGAL ( fasta, "gbk" ) + GUNZIP_PRODIGAL_FAA ( PRODIGAL.out.amino_acid_fasta ) + GUNZIP_PRODIGAL_FNA ( PRODIGAL.out.nucleotide_fasta) + GUNZIP_PRODIGAL_GBK ( PRODIGAL.out.gene_annotations ) + ch_versions = ch_versions.mix(PRODIGAL.out.versions) + ch_versions = ch_versions.mix(GUNZIP_PRODIGAL_FAA.out.versions) + ch_versions = ch_versions.mix(GUNZIP_PRODIGAL_FNA.out.versions) + ch_versions = ch_versions.mix(GUNZIP_PRODIGAL_GBK.out.versions) + ch_annotation_faa = GUNZIP_PRODIGAL_FAA.out.gunzip + ch_annotation_fna = GUNZIP_PRODIGAL_FNA.out.gunzip + ch_annotation_gbk = GUNZIP_PRODIGAL_GBK.out.gunzip + + } else if ( params.annotation_tool == "prokka" ) { + + PROKKA ( fasta, [], [] ) + ch_versions = ch_versions.mix(PROKKA.out.versions) + ch_multiqc_files = PROKKA.out.txt.collect{it[1]}.ifEmpty([]) + ch_annotation_faa = PROKKA.out.faa + ch_annotation_fna = PROKKA.out.fna + ch_annotation_gbk = PROKKA.out.gbk + + } else if ( params.annotation_tool == "bakta" ) { + + // BAKTA prepare download + if ( params.annotation_bakta_db ) { + ch_bakta_db = Channel + .fromPath( params.annotation_bakta_db ) + .first() + } else { + BAKTA_BAKTADBDOWNLOAD ( ) + ch_versions = ch_versions.mix( BAKTA_BAKTADBDOWNLOAD.out.versions ) + ch_bakta_db = ( BAKTA_BAKTADBDOWNLOAD.out.db ) + } + + BAKTA_BAKTA ( fasta, ch_bakta_db, [], [] ) + ch_versions = ch_versions.mix(BAKTA_BAKTA.out.versions) + ch_multiqc_files = BAKTA_BAKTA.out.txt.collect{it[1]}.ifEmpty([]) + ch_annotation_faa = BAKTA_BAKTA.out.faa + ch_annotation_fna = BAKTA_BAKTA.out.fna + ch_annotation_gbk = BAKTA_BAKTA.out.gbff + } + + emit: + versions = ch_versions + multiqc_files = ch_multiqc_files + faa = ch_annotation_faa // [ [meta], path(faa) ] + fna = ch_annotation_fna // [ [meta], path(fna) ] + gbk = ch_annotation_gbk // [ [meta], path(gbk) ] +} diff --git a/subworkflows/local/arg.nf b/subworkflows/local/arg.nf index 2a63025e..81dffb72 100644 --- a/subworkflows/local/arg.nf +++ b/subworkflows/local/arg.nf @@ -2,29 +2,37 @@ Run ARG screening tools */ -include { ABRICATE_RUN } from '../../modules/nf-core/abricate/run/main' -include { AMRFINDERPLUS_UPDATE } from '../../modules/nf-core/amrfinderplus/update/main' -include { AMRFINDERPLUS_RUN } from '../../modules/nf-core/amrfinderplus/run/main' -include { FARGENE } from '../../modules/nf-core/fargene/main' -include { DEEPARG_DOWNLOADDATA } from '../../modules/nf-core/deeparg/downloaddata/main' -include { DEEPARG_PREDICT } from '../../modules/nf-core/deeparg/predict/main' -include { RGI_MAIN } from '../../modules/nf-core/rgi/main/main' -include { HAMRONIZATION_ABRICATE } from '../../modules/nf-core/hamronization/abricate/main' -include { HAMRONIZATION_RGI } from '../../modules/nf-core/hamronization/rgi/main' -include { HAMRONIZATION_DEEPARG } from '../../modules/nf-core/hamronization/deeparg/main' -include { HAMRONIZATION_AMRFINDERPLUS } from '../../modules/nf-core/hamronization/amrfinderplus/main' -include { HAMRONIZATION_FARGENE } from '../../modules/nf-core/hamronization/fargene/main' -include { HAMRONIZATION_SUMMARIZE } from '../../modules/nf-core/hamronization/summarize/main' +include { ABRICATE_RUN } from '../../modules/nf-core/abricate/run/main' +include { AMRFINDERPLUS_UPDATE } from '../../modules/nf-core/amrfinderplus/update/main' +include { AMRFINDERPLUS_RUN } from '../../modules/nf-core/amrfinderplus/run/main' +include { DEEPARG_DOWNLOADDATA } from '../../modules/nf-core/deeparg/downloaddata/main' +include { DEEPARG_PREDICT } from '../../modules/nf-core/deeparg/predict/main' +include { FARGENE } from '../../modules/nf-core/fargene/main' +include { RGI_CARDANNOTATION } from '../../modules/nf-core/rgi/cardannotation/main' +include { RGI_MAIN } from '../../modules/nf-core/rgi/main/main' +include { UNTAR as UNTAR_CARD } from '../../modules/nf-core/untar/main' +include { TABIX_BGZIP as ARG_TABIX_BGZIP } from '../../modules/nf-core/tabix/bgzip/main' +include { MERGE_TAXONOMY_HAMRONIZATION } from '../../modules/local/merge_taxonomy_hamronization' +include { HAMRONIZATION_RGI } from '../../modules/nf-core/hamronization/rgi/main' +include { HAMRONIZATION_FARGENE } from '../../modules/nf-core/hamronization/fargene/main' +include { HAMRONIZATION_SUMMARIZE } from '../../modules/nf-core/hamronization/summarize/main' +include { HAMRONIZATION_ABRICATE } from '../../modules/nf-core/hamronization/abricate/main' +include { HAMRONIZATION_DEEPARG } from '../../modules/nf-core/hamronization/deeparg/main' +include { HAMRONIZATION_AMRFINDERPLUS } from '../../modules/nf-core/hamronization/amrfinderplus/main' +include { ARGNORM as ARGNORM_DEEPARG } from '../../modules/nf-core/argnorm/main' +include { ARGNORM as ARGNORM_ABRICATE } from '../../modules/nf-core/argnorm/main' +include { ARGNORM as ARGNORM_AMRFINDERPLUS } from '../../modules/nf-core/argnorm/main' workflow ARG { take: - contigs // tuple val(meta), path(contigs) - annotations // output from prokka + fastas // tuple val(meta), path(contigs) + annotations + tsvs // tuple val(meta), path(MMSEQS_CREATETSV.out.tsv) main: ch_versions = Channel.empty() - // Prepare HAMRONIZATION reporting channel + // Prepare HAMRONIZATION reporting channel ch_input_to_hamronization_summarize = Channel.empty() // AMRfinderplus run @@ -35,68 +43,90 @@ workflow ARG { .first() } else if ( !params.arg_skip_amrfinderplus && !params.arg_amrfinderplus_db ) { AMRFINDERPLUS_UPDATE( ) - ch_versions = ch_versions.mix(AMRFINDERPLUS_UPDATE.out.versions) + ch_versions = ch_versions.mix( AMRFINDERPLUS_UPDATE.out.versions ) ch_amrfinderplus_db = AMRFINDERPLUS_UPDATE.out.db } if ( !params.arg_skip_amrfinderplus ) { - AMRFINDERPLUS_RUN ( contigs, ch_amrfinderplus_db ) - ch_versions = ch_versions.mix(AMRFINDERPLUS_RUN.out.versions) + AMRFINDERPLUS_RUN ( fastas, ch_amrfinderplus_db ) + ch_versions = ch_versions.mix( AMRFINDERPLUS_RUN.out.versions ) // Reporting - HAMRONIZATION_AMRFINDERPLUS ( AMRFINDERPLUS_RUN.out.report, 'json', AMRFINDERPLUS_RUN.out.tool_version, AMRFINDERPLUS_RUN.out.db_version ) - ch_versions = ch_versions.mix(HAMRONIZATION_AMRFINDERPLUS.out.versions) - ch_input_to_hamronization_summarize = ch_input_to_hamronization_summarize.mix(HAMRONIZATION_AMRFINDERPLUS.out.json) + HAMRONIZATION_AMRFINDERPLUS ( AMRFINDERPLUS_RUN.out.report, 'tsv', AMRFINDERPLUS_RUN.out.tool_version, AMRFINDERPLUS_RUN.out.db_version ) + ch_versions = ch_versions.mix( HAMRONIZATION_AMRFINDERPLUS.out.versions ) + ch_input_to_hamronization_summarize = ch_input_to_hamronization_summarize.mix( HAMRONIZATION_AMRFINDERPLUS.out.tsv ) + + if ( !params.arg_skip_argnorm ) { + ch_input_to_argnorm_amrfinderplus = HAMRONIZATION_AMRFINDERPLUS.out.tsv.filter{ meta, file -> !file.isEmpty() } + ARGNORM_AMRFINDERPLUS ( ch_input_to_argnorm_amrfinderplus, 'amrfinderplus', 'ncbi' ) + ch_versions = ch_versions.mix( ARGNORM_AMRFINDERPLUS.out.versions ) + } } // fARGene run if ( !params.arg_skip_fargene ) { - ch_fargene_classes = Channel.fromList( params.arg_fargene_hmmmodel.tokenize(',') ) - ch_fargene_input = contigs - .combine(ch_fargene_classes) + ch_fargene_input = fastas + .combine( ch_fargene_classes ) .map { - meta, contigs, hmm_class -> + meta, fastas, hmm_class -> def meta_new = meta.clone() meta_new['hmm_class'] = hmm_class - [ meta_new, contigs, hmm_class ] + [ meta_new, fastas, hmm_class ] } .multiMap { - contigs: [ it[0], it[1] ] + fastas: [ it[0], it[1] ] hmmclass: it[2] } - FARGENE ( ch_fargene_input.contigs, ch_fargene_input.hmmclass ) - ch_versions = ch_versions.mix(FARGENE.out.versions) + FARGENE ( ch_fargene_input.fastas, ch_fargene_input.hmmclass ) + ch_versions = ch_versions.mix( FARGENE.out.versions ) // Reporting // Note: currently hardcoding versions, has to be updated with every fARGene-update - HAMRONIZATION_FARGENE ( FARGENE.out.hmm.transpose(), 'json', '0.1', '0.1' ) - ch_versions = ch_versions.mix(HAMRONIZATION_FARGENE.out.versions) - ch_input_to_hamronization_summarize = ch_input_to_hamronization_summarize.mix(HAMRONIZATION_FARGENE.out.json) + HAMRONIZATION_FARGENE( FARGENE.out.hmm_genes.transpose(), 'tsv', '0.1', '0.1' ) + ch_versions = ch_versions.mix( HAMRONIZATION_FARGENE.out.versions ) + ch_input_to_hamronization_summarize = ch_input_to_hamronization_summarize.mix( HAMRONIZATION_FARGENE.out.tsv ) } // RGI run if ( !params.arg_skip_rgi ) { - RGI_MAIN ( contigs ) - ch_versions = ch_versions.mix(RGI_MAIN.out.versions) + if ( !params.arg_rgi_db ) { + + // Download and untar CARD + UNTAR_CARD ( [ [], file('https://card.mcmaster.ca/latest/data', checkIfExists: true) ] ) + ch_versions = ch_versions.mix( UNTAR_CARD.out.versions ) + rgi_db = UNTAR_CARD.out.untar.map{ it[1] } + + } else { + + // Use user-supplied database + rgi_db = params.arg_rgi_db + + } + + RGI_CARDANNOTATION ( rgi_db ) + ch_versions = ch_versions.mix( RGI_CARDANNOTATION.out.versions ) + + RGI_MAIN ( fastas, RGI_CARDANNOTATION.out.db, [] ) + ch_versions = ch_versions.mix( RGI_MAIN.out.versions ) // Reporting - HAMRONIZATION_RGI ( RGI_MAIN.out.tsv, 'json', RGI_MAIN.out.tool_version, RGI_MAIN.out.db_version ) - ch_versions = ch_versions.mix(HAMRONIZATION_RGI.out.versions) - ch_input_to_hamronization_summarize = ch_input_to_hamronization_summarize.mix(HAMRONIZATION_RGI.out.json) + HAMRONIZATION_RGI ( RGI_MAIN.out.tsv, 'tsv', RGI_MAIN.out.tool_version, RGI_MAIN.out.db_version ) + ch_versions = ch_versions.mix( HAMRONIZATION_RGI.out.versions ) + ch_input_to_hamronization_summarize = ch_input_to_hamronization_summarize.mix( HAMRONIZATION_RGI.out.tsv ) } // DeepARG prepare download - if ( !params.arg_skip_deeparg && params.arg_deeparg_data ) { + if ( !params.arg_skip_deeparg && params.arg_deeparg_db ) { ch_deeparg_db = Channel - .fromPath( params.arg_deeparg_data ) + .fromPath( params.arg_deeparg_db ) .first() - } else if ( !params.arg_skip_deeparg && !params.arg_deeparg_data ) { + } else if ( !params.arg_skip_deeparg && !params.arg_deeparg_db ) { DEEPARG_DOWNLOADDATA( ) - ch_versions = ch_versions.mix(DEEPARG_DOWNLOADDATA.out.versions) + ch_versions = ch_versions.mix( DEEPARG_DOWNLOADDATA.out.versions ) ch_deeparg_db = DEEPARG_DOWNLOADDATA.out.db } @@ -115,24 +145,41 @@ workflow ARG { .set { ch_input_for_deeparg } DEEPARG_PREDICT ( ch_input_for_deeparg, ch_deeparg_db ) - ch_versions = ch_versions.mix(DEEPARG_PREDICT.out.versions) + ch_versions = ch_versions.mix( DEEPARG_PREDICT.out.versions ) // Reporting // Note: currently hardcoding versions as unreported by DeepARG // Make sure to update on version bump. - HAMRONIZATION_DEEPARG ( DEEPARG_PREDICT.out.arg.mix(DEEPARG_PREDICT.out.potential_arg), 'json', '1.0.2', params.arg_deeparg_data_version ) - ch_versions = ch_versions.mix(HAMRONIZATION_DEEPARG.out.versions) - ch_input_to_hamronization_summarize = ch_input_to_hamronization_summarize.mix(HAMRONIZATION_DEEPARG.out.json) + ch_input_to_hamronization_deeparg = DEEPARG_PREDICT.out.arg.mix( DEEPARG_PREDICT.out.potential_arg ) + HAMRONIZATION_DEEPARG ( ch_input_to_hamronization_deeparg, 'tsv', '1.0.4', params.arg_deeparg_db_version ) + ch_versions = ch_versions.mix( HAMRONIZATION_DEEPARG.out.versions ) + ch_input_to_hamronization_summarize = ch_input_to_hamronization_summarize.mix( HAMRONIZATION_DEEPARG.out.tsv ) + + if ( !params.arg_skip_argnorm ) { + ch_input_to_argnorm_deeparg = HAMRONIZATION_DEEPARG.out.tsv.filter{ meta, file -> !file.isEmpty() } + ARGNORM_DEEPARG ( ch_input_to_argnorm_deeparg, 'deeparg', 'deeparg' ) + ch_versions = ch_versions.mix( ARGNORM_DEEPARG.out.versions ) + } } // ABRicate run if ( !params.arg_skip_abricate ) { - ABRICATE_RUN ( contigs ) - ch_versions = ch_versions.mix(ABRICATE_RUN.out.versions) - - HAMRONIZATION_ABRICATE ( ABRICATE_RUN.out.report, 'json', '1.0.1', '2021-Mar-27' ) - ch_versions = ch_versions.mix(HAMRONIZATION_ABRICATE.out.versions) - ch_input_to_hamronization_summarize = ch_input_to_hamronization_summarize.mix(HAMRONIZATION_ABRICATE.out.json) + abricate_dbdir = params.arg_abricate_db ? file(params.arg_abricate_db, checkIfExists: true) : [] + ABRICATE_RUN ( fastas, abricate_dbdir ) + ch_versions = ch_versions.mix( ABRICATE_RUN.out.versions ) + + HAMRONIZATION_ABRICATE ( ABRICATE_RUN.out.report, 'tsv', '1.0.1', '2021-Mar-27' ) + ch_versions = ch_versions.mix( HAMRONIZATION_ABRICATE.out.versions ) + ch_input_to_hamronization_summarize = ch_input_to_hamronization_summarize.mix( HAMRONIZATION_ABRICATE.out.tsv ) + + if ( ( params.arg_abricate_db_id == 'ncbi' || + params.arg_abricate_db_id == 'resfinder' || + params.arg_abricate_db_id == 'argannot' || + params.arg_abricate_db_id == 'megares') && !params.arg_skip_argnorm ) { + ch_input_to_argnorm_abricate = HAMRONIZATION_ABRICATE.out.tsv.filter{ meta, file -> !file.isEmpty() } + ARGNORM_ABRICATE ( ch_input_to_argnorm_abricate, 'abricate', params.arg_abricate_db_id ) + ch_versions = ch_versions.mix( ARGNORM_ABRICATE.out.versions ) + } } ch_input_to_hamronization_summarize @@ -143,7 +190,21 @@ workflow ARG { .set { ch_input_for_hamronization_summarize } HAMRONIZATION_SUMMARIZE( ch_input_for_hamronization_summarize, params.arg_hamronization_summarizeformat ) - ch_versions = ch_versions.mix(HAMRONIZATION_SUMMARIZE.out.versions) + ch_versions = ch_versions.mix( HAMRONIZATION_SUMMARIZE.out.versions ) + + // MERGE_TAXONOMY + if ( params.run_taxa_classification ) { + + ch_mmseqs_taxonomy_list = tsvs.map{ it[1] }.collect() + MERGE_TAXONOMY_HAMRONIZATION( HAMRONIZATION_SUMMARIZE.out.tsv, ch_mmseqs_taxonomy_list ) + ch_versions = ch_versions.mix( MERGE_TAXONOMY_HAMRONIZATION.out.versions ) + + ch_tabix_input = Channel.of( [ 'id':'hamronization_combined_report' ] ) + .combine(MERGE_TAXONOMY_HAMRONIZATION.out.tsv) + + ARG_TABIX_BGZIP( ch_tabix_input ) + ch_versions = ch_versions.mix( ARG_TABIX_BGZIP.out.versions ) + } emit: versions = ch_versions diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf index 22074d16..0130205d 100644 --- a/subworkflows/local/bgc.nf +++ b/subworkflows/local/bgc.nf @@ -12,14 +12,16 @@ include { HMMER_HMMSEARCH as BGC_HMMER_HMMSEARCH } from '../../modules/nf-core include { DEEPBGC_DOWNLOAD } from '../../modules/nf-core/deepbgc/download/main' include { DEEPBGC_PIPELINE } from '../../modules/nf-core/deepbgc/pipeline/main' include { COMBGC } from '../../modules/local/combgc' +include { TABIX_BGZIP as BGC_TABIX_BGZIP } from '../../modules/nf-core/tabix/bgzip/main' +include { MERGE_TAXONOMY_COMBGC } from '../../modules/local/merge_taxonomy_combgc' workflow BGC { take: - fna // tuple val(meta), path(PREPPED_INPUT.out.fna) - gff // tuple val(meta), path(.out.gff) - faa // tuple val(meta), path(.out.faa) - gbk // tuple val(meta), path(.out.gbk) + fastas // tuple val(meta), path(PREPPED_INPUT.out.fna) + faas // tuple val(meta), path(.out.faa) + gbks // tuple val(meta), path(.out.gbk) + tsvs // tuple val(meta), path(MMSEQS_CREATETSV.out.tsv) main: ch_versions = Channel.empty() @@ -28,134 +30,115 @@ workflow BGC { // When adding new tool that requires FAA, make sure to update conditions // in funcscan.nf around annotation and AMP subworkflow execution // to ensure annotation is executed! - ch_faa_for_bgc_hmmsearch = faa + ch_faa_for_bgc_hmmsearch = faas // ANTISMASH if ( !params.bgc_skip_antismash ) { // Check whether user supplies database and/or antismash directory. If not, obtain them via the module antismashlite/antismashlitedownloaddatabases. // Important for future maintenance: For CI tests, only the "else" option below is used. Both options should be tested locally whenever the antiSMASH module gets updated. - if ( params.bgc_antismash_databases && params.bgc_antismash_installationdirectory ) { + if ( params.bgc_antismash_db && params.bgc_antismash_installdir ) { ch_antismash_databases = Channel - .fromPath( params.bgc_antismash_databases ) + .fromPath( params.bgc_antismash_db ) .first() ch_antismash_directory = Channel - .fromPath( params.bgc_antismash_installationdirectory ) + .fromPath( params.bgc_antismash_installdir ) .first() + } else if ( params.bgc_antismash_db && ( session.config.conda && session.config.conda.enabled ) ) { + + ch_antismash_databases = Channel + .fromPath( params.bgc_antismash_db ) + .first() + + ch_antismash_directory = [] + } else { // May need to update on each new version of antismash-lite due to changes to scripts inside these tars - ch_css_for_antismash = "https://github.com/nf-core/test-datasets/raw/91bb8781c576967e23d2c5315dd4d43213575033/data/delete_me/antismash/css.tar.gz" - ch_detection_for_antismash = "https://github.com/nf-core/test-datasets/raw/91bb8781c576967e23d2c5315dd4d43213575033/data/delete_me/antismash/detection.tar.gz" - ch_modules_for_antismash = "https://github.com/nf-core/test-datasets/raw/91bb8781c576967e23d2c5315dd4d43213575033/data/delete_me/antismash/modules.tar.gz" + ch_css_for_antismash = "https://github.com/nf-core/test-datasets/raw/724737e23a53085129cd5e015acafbf7067822ca/data/delete_me/antismash/css.tar.gz" + ch_detection_for_antismash = "https://github.com/nf-core/test-datasets/raw/c3174c50bf654e477bf329dbaf72acc8345f9b7a/data/delete_me/antismash/detection.tar.gz" + ch_modules_for_antismash = "https://github.com/nf-core/test-datasets/raw/c3174c50bf654e477bf329dbaf72acc8345f9b7a/data/delete_me/antismash/modules.tar.gz" UNTAR_CSS ( [ [], ch_css_for_antismash ] ) - ch_versions = ch_versions.mix(UNTAR_CSS.out.versions) + ch_versions = ch_versions.mix( UNTAR_CSS.out.versions ) UNTAR_DETECTION ( [ [], ch_detection_for_antismash ] ) - ch_versions = ch_versions.mix(UNTAR_DETECTION.out.versions) + ch_versions = ch_versions.mix( UNTAR_DETECTION.out.versions ) UNTAR_MODULES ( [ [], ch_modules_for_antismash ] ) - ch_versions = ch_versions.mix(UNTAR_MODULES.out.versions) + ch_versions = ch_versions.mix( UNTAR_MODULES.out.versions ) ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES ( UNTAR_CSS.out.untar.map{ it[1] }, UNTAR_DETECTION.out.untar.map{ it[1] }, UNTAR_MODULES.out.untar.map{ it[1] } ) - ch_versions = ch_versions.mix(ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES.out.versions) + ch_versions = ch_versions.mix( ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES.out.versions ) ch_antismash_databases = ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES.out.database ch_antismash_directory = ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES.out.antismash_dir - } - if ( params.annotation_tool == 'prodigal' || params.annotation_tool == "pyrodigal" ) { - - ch_antismash_input = fna.join(gff, by: 0) - .filter { - meta, fna, gff -> - if ( meta.longest_contig < params.bgc_antismash_sampleminlength ) log.warn "[nf-core/funcscan] Sample does not have any contig reaching min. length threshold of --bgc_antismash_sampleminlength ${params.bgc_antismash_sampleminlength}. Antismash will not be run for sample: ${meta.id}." - meta.longest_contig >= params.bgc_antismash_sampleminlength - } - .multiMap { - meta, fna, gff -> - fna: [ meta, fna ] - gff: [ gff ] - } - - ANTISMASH_ANTISMASHLITE ( ch_antismash_input.fna, ch_antismash_databases, ch_antismash_directory, ch_antismash_input.gff ) - - } else if ( params.annotation_tool == 'prokka' ) { - - ch_antismash_input = gbk.filter { - meta, files -> - if ( meta.longest_contig < params.bgc_antismash_sampleminlength ) log.warn "[nf-core/funcscan] Sample does not have any contig reaching min. length threshold of --bgc_antismash_sampleminlength ${params.bgc_antismash_sampleminlength}. Antismash will not be run for sample: ${meta.id}." - meta.longest_contig >= params.bgc_antismash_sampleminlength - } - - ANTISMASH_ANTISMASHLITE ( ch_antismash_input, ch_antismash_databases, ch_antismash_directory, [] ) - - } else if ( params.annotation_tool == 'bakta' ) { - - ch_antismash_input = gbk.filter { - meta, files -> - if ( meta.longest_contig < params.bgc_antismash_sampleminlength ) log.warn "[nf-core/funcscan] Sample does not have any contig reaching min. length threshold of --bgc_antismash_sampleminlength ${params.bgc_antismash_sampleminlength}. Antismash will not be run for sample: ${meta.id}." - meta.longest_contig >= params.bgc_antismash_sampleminlength - } - - ANTISMASH_ANTISMASHLITE ( ch_antismash_input, ch_antismash_databases, ch_antismash_directory, [] ) - - } - - ch_versions = ch_versions.mix(ANTISMASH_ANTISMASHLITE.out.versions) - ch_antismashresults_for_combgc = ANTISMASH_ANTISMASHLITE.out.knownclusterblast_dir - .mix(ANTISMASH_ANTISMASHLITE.out.gbk_input) - .groupTuple() - .map{ - meta, files -> - [meta, files.flatten()] + ANTISMASH_ANTISMASHLITE ( gbks, ch_antismash_databases, ch_antismash_directory, [] ) + + ch_versions = ch_versions.mix( ANTISMASH_ANTISMASHLITE.out.versions ) + ch_antismashresults = ANTISMASH_ANTISMASHLITE.out.knownclusterblast_dir + .mix( ANTISMASH_ANTISMASHLITE.out.gbk_input ) + .groupTuple() + .map{ + meta, files -> + [ meta, files.flatten() ] + } + + // Filter out samples with no BGC hits + ch_antismashresults_for_combgc = ch_antismashresults + .join(fastas, remainder: false) + .join(ANTISMASH_ANTISMASHLITE.out.gbk_results, remainder: false) + .map { + meta, gbk_input, fasta, gbk_results -> + [ meta, gbk_input ] } - ch_bgcresults_for_combgc = ch_bgcresults_for_combgc.mix(ch_antismashresults_for_combgc) + + ch_bgcresults_for_combgc = ch_bgcresults_for_combgc.mix( ch_antismashresults_for_combgc ) } // DEEPBGC if ( !params.bgc_skip_deepbgc ) { - if ( params.bgc_deepbgc_database ) { + if ( params.bgc_deepbgc_db ) { ch_deepbgc_database = Channel - .fromPath( params.bgc_deepbgc_database ) + .fromPath( params.bgc_deepbgc_db ) .first() } else { DEEPBGC_DOWNLOAD() ch_deepbgc_database = DEEPBGC_DOWNLOAD.out.db - ch_versions = ch_versions.mix(DEEPBGC_DOWNLOAD.out.versions) + ch_versions = ch_versions.mix( DEEPBGC_DOWNLOAD.out.versions ) } - DEEPBGC_PIPELINE ( fna, ch_deepbgc_database) - ch_versions = ch_versions.mix(DEEPBGC_PIPELINE.out.versions) - ch_bgcresults_for_combgc = ch_bgcresults_for_combgc.mix(DEEPBGC_PIPELINE.out.bgc_tsv) + DEEPBGC_PIPELINE ( gbks, ch_deepbgc_database ) + ch_versions = ch_versions.mix( DEEPBGC_PIPELINE.out.versions ) + ch_bgcresults_for_combgc = ch_bgcresults_for_combgc.mix( DEEPBGC_PIPELINE.out.bgc_tsv ) } // GECCO if ( !params.bgc_skip_gecco ) { - ch_gecco_input = fna.groupTuple() + ch_gecco_input = gbks.groupTuple() .multiMap { - fna: [ it[0], it[1], [] ] + fastas: [ it[0], it[1], [] ] } GECCO_RUN ( ch_gecco_input, [] ) - ch_versions = ch_versions.mix(GECCO_RUN.out.versions) + ch_versions = ch_versions.mix( GECCO_RUN.out.versions ) ch_geccoresults_for_combgc = GECCO_RUN.out.gbk - .mix(GECCO_RUN.out.clusters) + .mix( GECCO_RUN.out.clusters ) .groupTuple() .map{ meta, files -> - [meta, files.flatten()] + [ meta, files.flatten() ] } - ch_bgcresults_for_combgc = ch_bgcresults_for_combgc.mix(ch_geccoresults_for_combgc) + ch_bgcresults_for_combgc = ch_bgcresults_for_combgc.mix( ch_geccoresults_for_combgc ) } // HMMSEARCH - if ( !params.bgc_skip_hmmsearch ) { + if ( params.bgc_run_hmmsearch ) { if ( params.bgc_hmmsearch_models ) { ch_bgc_hmm_models = Channel.fromPath( params.bgc_hmmsearch_models, checkIfExists: true ) } else { error('[nf-core/funcscan] error: hmm model files not found for --bgc_hmmsearch_models! Please check input.') } ch_bgc_hmm_models_meta = ch_bgc_hmm_models @@ -177,13 +160,42 @@ workflow BGC { } BGC_HMMER_HMMSEARCH ( ch_in_for_bgc_hmmsearch ) - ch_versions = ch_versions.mix(BGC_HMMER_HMMSEARCH.out.versions) + ch_versions = ch_versions.mix( BGC_HMMER_HMMSEARCH.out.versions ) } // COMBGC + + ch_bgcresults_for_combgc + .join(fastas, remainder: true) + .filter { + meta, bgcfile, fasta -> + if ( !bgcfile ) { log.warn("[nf-core/funcscan] BGC workflow: No hits found by BGC tools; comBGC summary tool will not be run for sample: ${meta.id}") } + return [meta, bgcfile, fasta] + } + COMBGC ( ch_bgcresults_for_combgc ) + ch_versions = ch_versions.mix( COMBGC.out.versions ) + + // COMBGC concatenation + if ( !params.run_taxa_classification ) { + ch_combgc_summaries = COMBGC.out.tsv.map{ it[1] }.collectFile( name: 'combgc_complete_summary.tsv', storeDir: "${params.outdir}/reports/combgc", keepHeader:true ) + } else { + ch_combgc_summaries = COMBGC.out.tsv.map{ it[1] }.collectFile( name: 'combgc_complete_summary.tsv', keepHeader:true ) + } + + // MERGE_TAXONOMY + if ( params.run_taxa_classification ) { - ch_combgc_summaries = COMBGC.out.tsv.map{ it[1] }.collectFile(name: 'combgc_complete_summary.tsv', storeDir: "${params.outdir}/reports/combgc", keepHeader:true) + ch_mmseqs_taxonomy_list = tsvs.map{ it[1] }.collect() + MERGE_TAXONOMY_COMBGC( ch_combgc_summaries, ch_mmseqs_taxonomy_list ) + ch_versions = ch_versions.mix( MERGE_TAXONOMY_COMBGC.out.versions ) + + ch_tabix_input = Channel.of( [ 'id':'combgc_complete_summary_taxonomy' ] ) + .combine(MERGE_TAXONOMY_COMBGC.out.tsv) + + BGC_TABIX_BGZIP( ch_tabix_input ) + ch_versions = ch_versions.mix( BGC_TABIX_BGZIP.out.versions ) + } emit: versions = ch_versions diff --git a/subworkflows/local/taxa_class.nf b/subworkflows/local/taxa_class.nf new file mode 100644 index 00000000..d76e1dff --- /dev/null +++ b/subworkflows/local/taxa_class.nf @@ -0,0 +1,62 @@ +/* + TAXONOMIC CLASSIFICATION +*/ + +include { MMSEQS_CREATEDB } from '../../modules/nf-core/mmseqs/createdb/main' +include { MMSEQS_DATABASES } from '../../modules/nf-core/mmseqs/databases/main' +include { MMSEQS_TAXONOMY } from '../../modules/nf-core/mmseqs/taxonomy/main' +include { MMSEQS_CREATETSV } from '../../modules/nf-core/mmseqs/createtsv/main' + +workflow TAXA_CLASS { + take: + contigs // tuple val(meta), path(contigs) + + main: + ch_versions = Channel.empty() + ch_mmseqs_db = Channel.empty() + ch_taxonomy_querydb = Channel.empty() + ch_taxonomy_querydb_taxdb = Channel.empty() + ch_taxonomy_tsv = Channel.empty() + + if ( params.taxa_classification_tool == 'mmseqs2') { + + // Download the ref db if not supplied by user + // MMSEQS_DATABASE + if ( params.taxa_classification_mmseqs_db != null ) { + ch_mmseqs_db = Channel + .fromPath( params.taxa_classification_mmseqs_db ) + .first() + } else { + MMSEQS_DATABASES ( params.taxa_classification_mmseqs_db_id ) + ch_versions = ch_versions.mix( MMSEQS_DATABASES.out.versions ) + ch_mmseqs_db = ( MMSEQS_DATABASES.out.database ) + } + + // Create db for query contigs, assign taxonomy and convert to table format + // MMSEQS_CREATEDB + MMSEQS_CREATEDB ( contigs ) + ch_versions = ch_versions.mix( MMSEQS_CREATEDB.out.versions ) + + // MMSEQS_TAXONOMY + MMSEQS_TAXONOMY ( MMSEQS_CREATEDB.out.db, ch_mmseqs_db ) + ch_versions = ch_versions.mix( MMSEQS_TAXONOMY.out.versions ) + ch_taxonomy_querydb_taxdb = MMSEQS_TAXONOMY.out.db_taxonomy + + // Join together to ensure in sync + ch_taxonomy_input_for_createtsv = MMSEQS_CREATEDB.out.db + .join(MMSEQS_TAXONOMY.out.db_taxonomy) + .multiMap { meta, db, db_taxonomy -> + db: [ meta,db ] + taxdb: [ meta, db_taxonomy ] + } + + // MMSEQS_CREATETSV + MMSEQS_CREATETSV ( ch_taxonomy_input_for_createtsv.taxdb, [[:],[]], ch_taxonomy_input_for_createtsv.db ) + ch_versions = ch_versions.mix( MMSEQS_CREATETSV.out.versions ) + ch_taxonomy_tsv = MMSEQS_CREATETSV.out.tsv + } + + emit: + versions = ch_versions + sample_taxonomy = ch_taxonomy_tsv // channel: [ val(meta), tsv ] +} diff --git a/subworkflows/local/utils_nfcore_funcscan_pipeline/main.nf b/subworkflows/local/utils_nfcore_funcscan_pipeline/main.nf new file mode 100644 index 00000000..0d4b7afb --- /dev/null +++ b/subworkflows/local/utils_nfcore_funcscan_pipeline/main.nf @@ -0,0 +1,345 @@ +// +// Subworkflow with functionality specific to the nf-core/funcscan pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { fromSamplesheet } from 'plugin/nf-validation' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' +include { nfCoreLogo } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' + +/* +======================================================================================== + SUBWORKFLOW TO INITIALISE PIPELINE +======================================================================================== +*/ + +workflow PIPELINE_INITIALISATION { + + take: + version // boolean: Display version and exit + help // boolean: Display help text + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs + nextflow_cli_args // array: List of positional nextflow CLI args + outdir // string: The output directory where the results will be saved + input // string: Path to input samplesheet + + main: + + ch_versions = Channel.empty() + + // + // Print version and exit if required and dump pipeline parameters to JSON file + // + UTILS_NEXTFLOW_PIPELINE ( + version, + true, + outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + ) + + // + // Validate parameters and generate parameter summary to stdout + // + pre_help_text = nfCoreLogo(monochrome_logs) + post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) + def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " + UTILS_NFVALIDATION_PLUGIN ( + help, + workflow_command, + pre_help_text, + post_help_text, + validate_params, + "nextflow_schema.json" + ) + + // + // Check config provided to the pipeline + // + UTILS_NFCORE_PIPELINE ( + nextflow_cli_args + ) + // + // Custom validation for pipeline parameters + // + validateInputParameters() + + // + // Create channel from input file provided through params.input + // + Channel + .fromSamplesheet("input") + .set { ch_samplesheet } + + emit: + samplesheet = ch_samplesheet + versions = ch_versions +} + +/* +======================================================================================== + SUBWORKFLOW FOR PIPELINE COMPLETION +======================================================================================== +*/ + +workflow PIPELINE_COMPLETION { + + take: + email // string: email address + email_on_fail // string: email address sent on pipeline failure + plaintext_email // boolean: Send plain-text email instead of HTML + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + hook_url // string: hook URL for notifications + multiqc_report // string: Path to MultiQC report + + main: + + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + + // + // Completion email and summary + // + workflow.onComplete { + if (email || email_on_fail) { + completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs, multiqc_report.toList()) + } + + completionSummary(monochrome_logs) + + if (hook_url) { + imNotification(summary_params, hook_url) + } + } + + workflow.onError { + log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + } +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ +// +// Check and validate pipeline parameters +// +def validateInputParameters() { + // Validate antiSMASH inputs for containers + // 1. Make sure that either both or none of the antiSMASH directories are supplied + if ( ['docker', 'singularity'].contains(workflow.containerEngine) && ( ( params.run_bgc_screening && !params.bgc_antismash_db && params.bgc_antismash_installdir && !params.bgc_skip_antismash) || ( params.run_bgc_screening && params.bgc_antismash_db && !params.bgc_antismash_installdir && !params.bgc_skip_antismash ) ) ) + error("[nf-core/funcscan] ERROR: You supplied either the antiSMASH database or its installation directory, but not both. Please either supply both directories or none (letting the pipeline download them instead).") + + // 2. If both are supplied: Exit if we have a name collision error + else if ( ['docker', 'singularity'].contains(workflow.containerEngine) && ( params.run_bgc_screening && params.bgc_antismash_db && params.bgc_antismash_installdir && !params.bgc_skip_antismash ) ) { + antismash_database_dir = new File(params.bgc_antismash_db) + antismash_install_dir = new File(params.bgc_antismash_installdir) + if ( antismash_database_dir.name == antismash_install_dir.name ) error("[nf-core/funcscan] ERROR: Your supplied antiSMASH database and installation directories have identical names: \"" + antismash_install_dir.name + "\".\nPlease make sure to name them differently, for example:\n - Database directory: "+ antismash_database_dir.parent + "/antismash_db\n - Installation directory: " + antismash_install_dir.parent + "/antismash_dir") + } + + // 3. Give warning if not using container system assuming conda + if ( params.run_bgc_screening && ( !params.bgc_antismash_db ) && !params.bgc_skip_antismash && ( session.config.conda && session.config.conda.enabled ) ) { + log.warn "[nf-core/funcscan] Running antiSMASH download database module, and detected conda has been enabled. Assuming using conda for pipeline run. Check config if this is not expected!" + } +} + +// +// Validate channels from input samplesheet +// +def validateInputSamplesheet(input) { + def (metas, fastas) = input[1..2] + + // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end + def endedness_ok = metas.collect{ it.single_end }.unique().size == 1 + if (!endedness_ok) { + error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") + } + + return [ metas[0], fastas ] +} +// +// Get attribute from genome config file e.g. fasta +// +def getGenomeAttribute(attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + return params.genomes[ params.genome ][ attribute ] + } + } + return null +} + +// +// Exit pipeline if incorrect --genome key provided +// +def genomeExistsError() { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome keys are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + error(error_string) + } +} + +// +// Generate methods description for MultiQC +// +def toolCitationText() { + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def preprocessing_text = "The pipeline used the following tools: preprocessing included SeqKit2 (Shen et al. 2024)." + + def annotation_text = [ + "Annotation was carried out with:", + params.annotation_tool == 'prodigal' ? "Prodigal (Hyatt et al. 2010)." : "", + params.annotation_tool == 'pyrodigal' ? "Pyrodigal (Larralde 2022)." : "", + params.annotation_tool == 'bakta' ? "BAKTA (Schwengers et al. 2021)." : "", + params.annotation_tool == 'prokka' ? "PROKKA (Seemann 2014)." : "", + ].join(' ').trim() + + def amp_text = [ + "The following antimicrobial peptide screening tools were used:", + !params.amp_skip_amplify ? "AMPlify (Li et al. 2022)," : "", + !params.amp_skip_macrel ? "Macrel (Santos-Júnior et al. 2020)," : "", + !params.amp_skip_ampir ? "ampir (Fingerhut et al. 2021)," : "", + params.amp_run_hmmsearch ? "HMMER (Eddy 2011)," : "", + ". The output from the antimicrobial peptide screening tools were standardised and summarised with AMPcombi (Ibrahim and Perelo 2023)." + ].join(' ').trim().replaceAll(", \\.", ".") + + def arg_text = [ + "The following antimicrobial resistance gene screening tools were used:", + !params.arg_skip_fargene ? "fARGene (Berglund et al. 2019)," : "", + !params.arg_skip_rgi ? "RGI (Alcock et al. 2020)," : "", + !params.arg_skip_amrfinderplus ? "AMRfinderplus (Feldgarden et al. 2021)," : "", + !params.arg_skip_deeparg ? "deepARG (Arango-Argoty 2018)," : "", + !params.arg_skip_abricate ? "ABRicate (Seemann 2020)," : "", + !params.arg_skip_argnorm ? ". The outputs from ARG screening tools were normalized to the antibiotic resistance ontology using argNorm (Perovic et al. 2024)," : "", + ". The output from the antimicrobial resistance gene screening tools were standardised and summarised with hAMRonization (Maguire et al. 2023)." + ].join(' ').trim().replaceAll(", +\\.", ".") + + def bgc_text = [ + "The following biosynthetic gene cluster screening tools were used:", + !params.bgc_skip_antismash ? "antiSMASH (Blin et al. 2021)," : "", + !params.bgc_skip_deepbgc ? "deepBGC (Hannigan et al. 2019)," : "", + !params.bgc_skip_gecco ? "GECCO (Carroll et al. 2021)," : "", + params.bgc_run_hmmsearch ? "HMMER (Eddy 2011)," : "", + ". The output from the biosynthetic gene cluster screening tools were standardised and summarised with comBGC (Frangenberg et al. 2023)." + ].join(' ').replaceAll(", +\\.", ".").trim() + + def postprocessing_text = "Run statistics were reported using MultiQC (Ewels et al. 2016)." + + def citation_text = [ + preprocessing_text, + annotation_text, + params.run_amp_screening ? amp_text : "", + params.run_arg_screening ? arg_text : "", + params.run_bgc_screening ? bgc_text : "", + postprocessing_text, + ].join(' ').trim() + + return citation_text +} + +def toolBibliographyText() { + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def preprocessing_text = "
  • Shen, W., Sipos, B., & Zhao, L. (2024). SeqKit2: A Swiss army knife for sequence and alignment processing. iMeta, e191. https://doi.org/10.1002/imt2.191
  • " + + def annotation_text = [ + params.annotation_tool == 'prodigal' ? "
  • Hyatt, D., Chen, G. L., Locascio, P. F., Land, M. L., Larimer, F. W., & Hauser, L. J. (2010). Prodigal: prokaryotic gene recognition and translation initiation site identification. BMC bioinformatics, 11, 119. DOI: 10.1186/1471-2105-11-119" : "", + params.annotation_tool == 'pyrodigal' ? "
  • Larralde, M. (2022). Pyrodigal: Python bindings and interface to Prodigal, an efficient method for gene prediction in prokaryotes. Journal of Open Source Software, 7(72), 4296. DOI: 10.21105/joss.04296
  • " : "", + params.annotation_tool == 'bakta' ? "
  • Schwengers, O., Jelonek, L., Dieckmann, M. A., Beyvers, S., Blom, J., & Goesmann, A. (2021). Bakta: rapid and standardized annotation of bacterial genomes via alignment-free sequence identification. Microbial Genomics, 7(11). DOI: 10.1099/mgen.0.000685
  • " : "", + params.annotation_tool == 'prokka' ? "
  • Seemann, T. (2014). Prokka: rapid prokaryotic genome annotation. Bioinformatics (Oxford, England), 30(14), 2068–2069. DOI: 10.1093/bioinformatics/btu153
  • " : "", + ].join(' ').trim() + + def amp_text = [ + !params.amp_skip_amplify ? "
  • Li, C., Sutherland, D., Hammond, S. A., Yang, C., Taho, F., Bergman, L., Houston, S., Warren, R. L., Wong, T., Hoang, L., Cameron, C. E., Helbing, C. C., & Birol, I. (2022). AMPlify: attentive deep learning model for discovery of novel antimicrobial peptides effective against WHO priority pathogens. BMC genomics, 23(1), 77. DOI: 10.1186/s12864-022-08310-4
  • " : "", + !params.amp_skip_macrel ? "
  • Santos-Júnior, C. D., Pan, S., Zhao, X. M., & Coelho, L. P. (2020). Macrel: antimicrobial peptide screening in genomes and metagenomes. PeerJ, 8, e10555. DOI: 10.7717/peerj.10555
  • " : "", + !params.amp_skip_ampir ? "
  • Fingerhut, L., Miller, D. J., Strugnell, J. M., Daly, N. L., & Cooke, I. R. (2021). ampir: an R package for fast genome-wide prediction of antimicrobial peptides. Bioinformatics (Oxford, England), 36(21), 5262–5263. DOI: 10.1093/bioinformatics/btaa653
  • " : "", + "
  • Ibrahim, A. & Perelo, L. (2023). Darcy220606/AMPcombi. DOI: 10.5281/zenodo.7639121
  • " + ].join(' ').trim().replaceAll(", \\.", ".") + + def arg_text = [ + !params.arg_skip_fargene ? "
  • Berglund, F., Österlund, T., Boulund, F., Marathe, N. P., Larsson, D., & Kristiansson, E. (2019). Identification and reconstruction of novel antibiotic resistance genes from metagenomes. Microbiome, 7(1), 52. DOI: 10.1186/s40168-019-0670-1
  • " : "", + !params.arg_skip_rgi ? "
  • Alcock, B. P., Raphenya, A. R., Lau, T., Tsang, K. K., Bouchard, M., Edalatmand, A., Huynh, W., Nguyen, A. V., Cheng, A. A., Liu, S., Min, S. Y., Miroshnichenko, A., Tran, H. K., Werfalli, R. E., Nasir, J. A., Oloni, M., Speicher, D. J., Florescu, A., Singh, B., Faltyn, M., … McArthur, A. G. (2020). CARD 2020: antibiotic resistome surveillance with the comprehensive antibiotic resistance database. Nucleic acids research, 48(D1), D517–D525. DOI: 10.1093/nar/gkz935
  • " : "", + !params.arg_skip_amrfinderplus ? "
  • Feldgarden, M., Brover, V., Gonzalez-Escalona, N., Frye, J. G., Haendiges, J., Haft, D. H., Hoffmann, M., Pettengill, J. B., Prasad, A. B., Tillman, G. E., Tyson, G. H., & Klimke, W. (2021). AMRFinderPlus and the Reference Gene Catalog facilitate examination of the genomic links among antimicrobial resistance, stress response, and virulence. Scientific reports, 11(1), 12728. DOI: 10.1038/s41598-021-91456-0
  • " : "", + !params.arg_skip_deeparg ? "
  • Arango-Argoty, G., Garner, E., Pruden, A., Heath, L. S., Vikesland, P., & Zhang, L. (2018). DeepARG: a deep learning approach for predicting antibiotic resistance genes from metagenomic data. Microbiome, 6(1), 23. DOI: 10.1186/s40168-018-0401-z" : "", + !params.arg_skip_abricate ? "
  • Seemann, T. (2020). ABRicate. Github https://github.com/tseemann/abricate.
  • " : "", + !params.arg_skip_argnorm ? "
  • Perovic, S. U., Ramji, V., Chong, H., Duan, Y., Maguire, F., Coelho, L. P. (2024). argNorm. DOI: .
  • " : "", + "
  • Public Health Alliance for Genomic Epidemiology (pha4ge). (2022). Parse multiple Antimicrobial Resistance Analysis Reports into a common data structure. Github. Retrieved October 5, 2022, from https://github.com/pha4ge/hAMRonization
  • " + ].join(' ').trim().replaceAll(", +\\.", ".") + + def bgc_text = [ + !params.bgc_skip_antismash ? "
  • Blin, K., Shaw, S., Kloosterman, A. M., Charlop-Powers, Z., van Wezel, G. P., Medema, M. H., & Weber, T. (2021). antiSMASH 6.0: improving cluster detection and comparison capabilities. Nucleic acids research, 49(W1), W29–W35. DOI:
  • " : "", + !params.bgc_skip_deepbgc ? "
  • Hannigan, G. D., Prihoda, D., Palicka, A., Soukup, J., Klempir, O., Rampula, L., Durcak, J., Wurst, M., Kotowski, J., Chang, D., Wang, R., Piizzi, G., Temesi, G., Hazuda, D. J., Woelk, C. H., & Bitton, D. A. (2019). A deep learning genome-mining strategy for biosynthetic gene cluster prediction. Nucleic acids research, 47(18), e110. DOI: 10.1093/nar/gkz654
  • " : "", + !params.bgc_skip_gecco ? "
  • Carroll, L. M. , Larralde, M., Fleck, J. S., Ponnudurai, R., Milanese, A., Cappio Barazzone, E. & Zeller, G. (2021). Accurate de novo identification of biosynthetic gene clusters with GECCO. bioRxiv DOI: 0.1101/2021.05.03.442509
  • " : "", + "
  • Frangenberg, J. Fellows Yates, J. A., Ibrahim, A., Perelo, L., & Beber, M. E. (2023). nf-core/funcscan: 1.0.0 - German Rollmops - 2023-02-15. https://doi.org/10.5281/zenodo.7643100
  • " + ].join(' ').replaceAll(", +\\.", ".").trim() + + def postprocessing_text = "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. https://doi.org/10.1093/bioinformatics/btw354
  • " + + // Special as reused in multiple subworkflows, and we don't want to cause duplicates + def hmmsearch_text = ( params.run_amp_screening && params.amp_run_hmmsearch ) || ( params.run_bgc_screening && params.bgc_run_hmmsearch ) ? "
  • Eddy S. R. (2011). Accelerated Profile HMM Searches. PLoS computational biology, 7(10), e1002195. DOI: 10.1371/journal.pcbi.1002195
  • " : "" + + def reference_text = [ + preprocessing_text, + annotation_text, + params.run_amp_screening ? amp_text : "", + params.run_arg_screening ? arg_text : "", + params.run_bgc_screening ? bgc_text : "", + hmmsearch_text, + postprocessing_text, + ].join(' ').trim() + + return reference_text +} + +def methodsDescriptionText(mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = workflow.toMap() + meta["manifest_map"] = workflow.manifest.toMap() + + // Pipeline DOI + if (meta.manifest_map.doi) { + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + def temp_doi_ref = "" + String[] manifest_doi = meta.manifest_map.doi.tokenize(",") + for (String doi_ref: manifest_doi) temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " + meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) + } else meta["doi_text"] = "" + meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + meta["tool_bibliography"] = toolBibliographyText() + + + def methods_text = mqc_methods_yaml.text + + def engine = new groovy.text.SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html.toString() +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf new file mode 100644 index 00000000..ac31f28f --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -0,0 +1,126 @@ +// +// Subworkflow with functionality that may be useful for any Nextflow pipeline +// + +import org.yaml.snakeyaml.Yaml +import groovy.json.JsonOutput +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NEXTFLOW_PIPELINE { + + take: + print_version // boolean: print version + dump_parameters // boolean: dump parameters + outdir // path: base directory used to publish pipeline results + check_conda_channels // boolean: check conda channels + + main: + + // + // Print workflow version and exit on --version + // + if (print_version) { + log.info "${workflow.manifest.name} ${getWorkflowVersion()}" + System.exit(0) + } + + // + // Dump pipeline parameters to a JSON file + // + if (dump_parameters && outdir) { + dumpParametersToJSON(outdir) + } + + // + // When running with Conda, warn if channels have not been set-up appropriately + // + if (check_conda_channels) { + checkCondaChannels() + } + + emit: + dummy_emit = true +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Generate version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Dump pipeline parameters to a JSON file +// +def dumpParametersToJSON(outdir) { + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = JsonOutput.toJson(params) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() +} + +// +// When running with -profile conda, warn if channels have not been set-up appropriately +// +def checkCondaChannels() { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } + + if (channels_missing | channel_priority_violation) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml new file mode 100644 index 00000000..e5c3a0a8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NEXTFLOW_PIPELINE" +description: Subworkflow with functionality that may be useful for any Nextflow pipeline +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - print_version: + type: boolean + description: | + Print the version of the pipeline and exit + - dump_parameters: + type: boolean + description: | + Dump the parameters of the pipeline to a JSON file + - output_directory: + type: directory + description: Path to output dir to write JSON file to. + pattern: "results/" + - check_conda_channel: + type: boolean + description: | + Check if the conda channel priority is correct. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" + - "@drpatelh" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..68718e4f --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test @@ -0,0 +1,54 @@ + +nextflow_function { + + name "Test Functions" + script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Test Function getWorkflowVersion") { + + function "getWorkflowVersion" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dumpParametersToJSON") { + + function "dumpParametersToJSON" + + when { + function { + """ + // define inputs of the function here. Example: + input[0] = "$outputDir" + """.stripIndent() + } + } + + then { + assertAll( + { assert function.success } + ) + } + } + + test("Test Function checkCondaChannels") { + + function "checkCondaChannels" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..e3f0baf4 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,20 @@ +{ + "Test Function getWorkflowVersion": { + "content": [ + "v9.9.9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:05.308243" + }, + "Test Function checkCondaChannels": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:12.425833" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..ca964ce8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,111 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NEXTFLOW_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + workflow "UTILS_NEXTFLOW_PIPELINE" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Should run no inputs") { + + when { + workflow { + """ + print_version = false + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should print version") { + + when { + workflow { + """ + print_version = true + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.stdout.contains("nextflow_workflow v9.9.9") } + ) + } + } + + test("Should dump params") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = 'results' + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should not create params JSON if no output directory") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = null + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config new file mode 100644 index 00000000..d0a926bf --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml new file mode 100644 index 00000000..f8476112 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nextflow_pipeline: + - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf new file mode 100644 index 00000000..14558c39 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -0,0 +1,446 @@ +// +// Subworkflow with utility functions specific to the nf-core pipeline template +// + +import org.yaml.snakeyaml.Yaml +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFCORE_PIPELINE { + + take: + nextflow_cli_args + + main: + valid_config = checkConfigProvided() + checkProfileProvided(nextflow_cli_args) + + emit: + valid_config +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Warn if a -profile or Nextflow config has not been provided to run the pipeline +// +def checkConfigProvided() { + valid_config = true + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + + "Please refer to the quick start section and usage docs for the pipeline.\n " + valid_config = false + } + return valid_config +} + +// +// Exit pipeline if --profile contains spaces +// +def checkProfileProvided(nextflow_cli_args) { + if (workflow.profile.endsWith(',')) { + error "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } + if (nextflow_cli_args[0]) { + log.warn "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } +} + +// +// Citation string for pipeline +// +def workflowCitation() { + def temp_doi_ref = "" + String[] manifest_doi = workflow.manifest.doi.tokenize(",") + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + for (String doi_ref: manifest_doi) temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + "* The pipeline\n" + + temp_doi_ref + "\n" + + "* The nf-core framework\n" + + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" +} + +// +// Generate workflow version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + Yaml yaml = new Yaml() + versions = yaml.load(yaml_file).collectEntries { k, v -> [ k.tokenize(':')[-1], v ] } + return yaml.dumpAsMap(versions).trim() +} + +// +// Get workflow version for pipeline +// +def workflowVersionToYAML() { + return """ + Workflow: + $workflow.manifest.name: ${getWorkflowVersion()} + Nextflow: $workflow.nextflow.version + """.stripIndent().trim() +} + +// +// Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions + .unique() + .map { processVersionsFromYAML(it) } + .unique() + .mix(Channel.of(workflowVersionToYAML())) +} + +// +// Get workflow summary for MultiQC +// +def paramsSummaryMultiqc(summary_params) { + def summary_section = '' + for (group in summary_params.keySet()) { + def group_params = summary_params.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += "

    $group

    \n" + summary_section += "
    \n" + for (param in group_params.keySet()) { + summary_section += "
    $param
    ${group_params.get(param) ?: 'N/A'}
    \n" + } + summary_section += "
    \n" + } + } + + String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + + return yaml_file_text +} + +// +// nf-core logo +// +def nfCoreLogo(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) +} + +// +// Return dashed line +// +def dashedLine(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" +} + +// +// ANSII colours used for terminal logging +// +def logColours(monochrome_logs=true) { + Map colorcodes = [:] + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes +} + +// +// Attach the multiqc report to email +// +def attachMultiqcReport(multiqc_report) { + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + } + mqc_report = mqc_report[0] + } + } + } catch (all) { + if (multiqc_report) { + log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + } + } + return mqc_report +} + +// +// Construct and send completion email +// +def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = getWorkflowVersion() + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = attachMultiqcReport(multiqc_report) + + // Check if we are only sending emails on failure + def email_address = email + if (!email && email_on_fail && !workflow.success) { + email_address = email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("${workflow.projectDir}/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + Map colors = logColours(monochrome_logs) + if (email_address) { + try { + if (plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() +} + +// +// Print pipeline summary on completion +// +def completionSummary(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } +} + +// +// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack +// +def imNotification(summary_params, hook_url) { + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = getWorkflowVersion() + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml new file mode 100644 index 00000000..d08d2434 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml @@ -0,0 +1,24 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFCORE_PIPELINE" +description: Subworkflow with utility functions specific to the nf-core pipeline template +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - nextflow_cli_args: + type: list + description: | + Nextflow CLI positional arguments +output: + - success: + type: boolean + description: | + Dummy output to indicate success +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..1dc317f8 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -0,0 +1,134 @@ + +nextflow_function { + + name "Test Functions" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Test Function checkConfigProvided") { + + function "checkConfigProvided" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function checkProfileProvided") { + + function "checkProfileProvided" + + when { + function { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function workflowCitation") { + + function "workflowCitation" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function nfCoreLogo") { + + function "nfCoreLogo" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dashedLine") { + + function "dashedLine" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function without logColours") { + + function "logColours" + + when { + function { + """ + input[0] = true + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function with logColours") { + function "logColours" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..1037232c --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,166 @@ +{ + "Test Function checkProfileProvided": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:03.360873" + }, + "Test Function checkConfigProvided": { + "content": [ + true + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:59.729647" + }, + "Test Function nfCoreLogo": { + "content": [ + "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:10.562934" + }, + "Test Function workflowCitation": { + "content": [ + "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:07.019761" + }, + "Test Function without logColours": { + "content": [ + { + "reset": "", + "bold": "", + "dim": "", + "underlined": "", + "blink": "", + "reverse": "", + "hidden": "", + "black": "", + "red": "", + "green": "", + "yellow": "", + "blue": "", + "purple": "", + "cyan": "", + "white": "", + "bblack": "", + "bred": "", + "bgreen": "", + "byellow": "", + "bblue": "", + "bpurple": "", + "bcyan": "", + "bwhite": "", + "ublack": "", + "ured": "", + "ugreen": "", + "uyellow": "", + "ublue": "", + "upurple": "", + "ucyan": "", + "uwhite": "", + "iblack": "", + "ired": "", + "igreen": "", + "iyellow": "", + "iblue": "", + "ipurple": "", + "icyan": "", + "iwhite": "", + "biblack": "", + "bired": "", + "bigreen": "", + "biyellow": "", + "biblue": "", + "bipurple": "", + "bicyan": "", + "biwhite": "" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:17.969323" + }, + "Test Function dashedLine": { + "content": [ + "-\u001b[2m----------------------------------------------------\u001b[0m-" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:14.366181" + }, + "Test Function with logColours": { + "content": [ + { + "reset": "\u001b[0m", + "bold": "\u001b[1m", + "dim": "\u001b[2m", + "underlined": "\u001b[4m", + "blink": "\u001b[5m", + "reverse": "\u001b[7m", + "hidden": "\u001b[8m", + "black": "\u001b[0;30m", + "red": "\u001b[0;31m", + "green": "\u001b[0;32m", + "yellow": "\u001b[0;33m", + "blue": "\u001b[0;34m", + "purple": "\u001b[0;35m", + "cyan": "\u001b[0;36m", + "white": "\u001b[0;37m", + "bblack": "\u001b[1;30m", + "bred": "\u001b[1;31m", + "bgreen": "\u001b[1;32m", + "byellow": "\u001b[1;33m", + "bblue": "\u001b[1;34m", + "bpurple": "\u001b[1;35m", + "bcyan": "\u001b[1;36m", + "bwhite": "\u001b[1;37m", + "ublack": "\u001b[4;30m", + "ured": "\u001b[4;31m", + "ugreen": "\u001b[4;32m", + "uyellow": "\u001b[4;33m", + "ublue": "\u001b[4;34m", + "upurple": "\u001b[4;35m", + "ucyan": "\u001b[4;36m", + "uwhite": "\u001b[4;37m", + "iblack": "\u001b[0;90m", + "ired": "\u001b[0;91m", + "igreen": "\u001b[0;92m", + "iyellow": "\u001b[0;93m", + "iblue": "\u001b[0;94m", + "ipurple": "\u001b[0;95m", + "icyan": "\u001b[0;96m", + "iwhite": "\u001b[0;97m", + "biblack": "\u001b[1;90m", + "bired": "\u001b[1;91m", + "bigreen": "\u001b[1;92m", + "biyellow": "\u001b[1;93m", + "biblue": "\u001b[1;94m", + "bipurple": "\u001b[1;95m", + "bicyan": "\u001b[1;96m", + "biwhite": "\u001b[1;97m" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:21.714424" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..8940d32d --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,29 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFCORE_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + workflow "UTILS_NFCORE_PIPELINE" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Should run without failures") { + + when { + workflow { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap new file mode 100644 index 00000000..859d1030 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap @@ -0,0 +1,19 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + true + ], + "valid_config": [ + true + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:25.726491" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config new file mode 100644 index 00000000..d0a926bf --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml new file mode 100644 index 00000000..ac8523c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfcore_pipeline: + - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf new file mode 100644 index 00000000..2585b65d --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf @@ -0,0 +1,62 @@ +// +// Subworkflow that uses the nf-validation plugin to render help text and parameter summary +// + +/* +======================================================================================== + IMPORT NF-VALIDATION PLUGIN +======================================================================================== +*/ + +include { paramsHelp } from 'plugin/nf-validation' +include { paramsSummaryLog } from 'plugin/nf-validation' +include { validateParameters } from 'plugin/nf-validation' + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFVALIDATION_PLUGIN { + + take: + print_help // boolean: print help + workflow_command // string: default commmand used to run pipeline + pre_help_text // string: string to be printed before help text and summary log + post_help_text // string: string to be printed after help text and summary log + validate_params // boolean: validate parameters + schema_filename // path: JSON schema file, null to use default value + + main: + + log.debug "Using schema file: ${schema_filename}" + + // Default values for strings + pre_help_text = pre_help_text ?: '' + post_help_text = post_help_text ?: '' + workflow_command = workflow_command ?: '' + + // + // Print help message if needed + // + if (print_help) { + log.info pre_help_text + paramsHelp(workflow_command, parameters_schema: schema_filename) + post_help_text + System.exit(0) + } + + // + // Print parameter summary to stdout + // + log.info pre_help_text + paramsSummaryLog(workflow, parameters_schema: schema_filename) + post_help_text + + // + // Validate parameters relative to the parameter JSON schema + // + if (validate_params){ + validateParameters(parameters_schema: schema_filename) + } + + emit: + dummy_emit = true +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml new file mode 100644 index 00000000..3d4a6b04 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml @@ -0,0 +1,44 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFVALIDATION_PLUGIN" +description: Use nf-validation to initiate and validate a pipeline +keywords: + - utility + - pipeline + - initialise + - validation +components: [] +input: + - print_help: + type: boolean + description: | + Print help message and exit + - workflow_command: + type: string + description: | + The command to run the workflow e.g. "nextflow run main.nf" + - pre_help_text: + type: string + description: | + Text to print before the help message + - post_help_text: + type: string + description: | + Text to print after the help message + - validate_params: + type: boolean + description: | + Validate the parameters and error if invalid. + - schema_filename: + type: string + description: | + The filename of the schema to validate against. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test new file mode 100644 index 00000000..5784a33f --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test @@ -0,0 +1,200 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFVALIDATION_PLUGIN" + script "../main.nf" + workflow "UTILS_NFVALIDATION_PLUGIN" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "plugin/nf-validation" + tag "'plugin/nf-validation'" + tag "utils_nfvalidation_plugin" + tag "subworkflows/utils_nfvalidation_plugin" + + test("Should run nothing") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should run help") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with command") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with extra text") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = "pre-help-text" + post_help_text = "post-help-text" + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('pre-help-text') } }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } }, + { assert workflow.stdout.any { it.contains('post-help-text') } } + ) + } + } + + test("Should validate params") { + + when { + + params { + monochrome_logs = true + test_data = '' + outdir = 1 + } + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = true + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ ERROR: Validation of pipeline parameters failed!') } } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json new file mode 100644 index 00000000..7626c1c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json", + "title": ". pipeline parameters", + "description": "", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["outdir"], + "properties": { + "validate_params": { + "type": "boolean", + "description": "Validate parameters?", + "default": true, + "hidden": true + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "test_data_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules", + "description": "Base for test data directory", + "hidden": true + }, + "test_data": { + "type": "string", + "description": "Fake test data param", + "hidden": true + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "logo": { + "type": "boolean", + "default": true, + "description": "Display nf-core logo in console output.", + "fa_icon": "fas fa-image", + "hidden": true + }, + "singularity_pull_docker_container": { + "type": "boolean", + "description": "Pull Singularity container from Docker?", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Use monochrome_logs", + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/generic_options" + } + ] +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml new file mode 100644 index 00000000..60b1cfff --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfvalidation_plugin: + - subworkflows/nf-core/utils_nfvalidation_plugin/** diff --git a/tests/test.nf.test b/tests/test.nf.test new file mode 100644 index 00000000..c3db6a93 --- /dev/null +++ b/tests/test.nf.test @@ -0,0 +1,109 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_FUNCSCAN" + script "main.nf" + tag "pipeline" + tag "nfcore_funcscan" + tag "test" + + test("test_profile") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + + // AMPir + { assert snapshot( + file("$outputDir/amp/ampir/sample_1/sample_1.ampir.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/ampir/sample_1/sample_1.ampir.faa").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/ampir/sample_2/sample_2.ampir.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/ampir/sample_2/sample_2.ampir.faa").text.contains("NODE_882919_length_258_cov_0.935961_1"), + ).match("ampir") }, + + // AMPlify + { assert snapshot( + file("$outputDir/amp/amplify/sample_1/sample_1.amplify.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/amplify/sample_2/sample_2.amplify.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1") + ).match("amplify") }, + + // HMMsearch + { assert new File("$outputDir/amp/hmmer_hmmsearch/sample_1/sample_1_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/sample_2/sample_2_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + + // Macrel + { assert snapshot( + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.smorfs.faa.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.smorfs.faa.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.all_orfs.faa.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.all_orfs.faa.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.prediction.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.prediction.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/README.md"), + path("$outputDir/amp/macrel/sample_1.macrel/README.md"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel_log.txt"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel_log.txt") + ).match("macrel") }, + + // AMPcombi + { assert path("$outputDir/reports/ampcombi2/Ampcombi_summary.tsv").text.contains("NODE_515831_length_303_cov_1.532258_1") }, + { assert snapshot( + path("$outputDir/reports/ampcombi2/Ampcombi_cluster.log"), + path("$outputDir/reports/ampcombi2/Ampcombi_complete.log"), + path("$outputDir/reports/ampcombi2/Ampcombi_parse_tables.log") + ).match("ampcombi_logfiles") }, + + // DeepARG + { assert snapshot( + path("$outputDir/arg/deeparg/sample_1/sample_1.align.daa.tsv"), + path("$outputDir/arg/deeparg/sample_2/sample_2.align.daa.tsv"), + path("$outputDir/arg/deeparg/sample_1/sample_1.mapping.ARG"), + path("$outputDir/arg/deeparg/sample_2/sample_2.mapping.ARG") + ).match("deeparg_tsv_ARG") }, + { assert file("$outputDir/arg/deeparg/sample_1/sample_1.align.daa").name }, + { assert file("$outputDir/arg/deeparg/sample_2/sample_2.align.daa").name }, + { assert path("$outputDir/arg/deeparg/sample_1/sample_1.mapping.potential.ARG").text.contains("#ARG") }, + { assert path("$outputDir/arg/deeparg/sample_2/sample_2.mapping.potential.ARG").text.contains("#ARG") }, + + // ABRicate + { assert snapshot( + path("$outputDir/arg/abricate/sample_1/sample_1.txt"), + path("$outputDir/arg/abricate/sample_2/sample_2.txt"), + ).match("abricate") }, + + // AMRFinderPlus + { assert snapshot( + path("$outputDir/arg/amrfinderplus/sample_1/sample_1.tsv"), + path("$outputDir/arg/amrfinderplus/sample_2/sample_2.tsv"), + ).match("amrfinderplus") }, + + // RGI + { assert snapshot( + path("$outputDir/arg/rgi/sample_1/sample_1.txt"), + path("$outputDir/arg/rgi/sample_2/sample_2.txt"), + ).match("rgi") }, + + // fARGene + { assert snapshot( + path("$outputDir/arg/fargene/sample_1/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/sample_2/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/sample_1/class_b_1_2/results_summary.txt"), + path("$outputDir/arg/fargene/sample_2/class_b_1_2/results_summary.txt") + ).match("fargene") + }, + { assert path("$outputDir/arg/fargene/sample_1/fargene_analysis.log").text.contains("fARGene is done.") }, + { assert path("$outputDir/arg/fargene/sample_2/fargene_analysis.log").text.contains("fARGene is done.") }, + + // hAMRonization + { assert snapshot(path("$outputDir/reports/hamronization_summarize/hamronization_combined_report.tsv")).match("hamronization_summarize") } + ) + } + } +} diff --git a/tests/test.nf.test.snap b/tests/test.nf.test.snap new file mode 100644 index 00000000..b8784d4e --- /dev/null +++ b/tests/test.nf.test.snap @@ -0,0 +1,126 @@ +{ + "hamronization_summarize": { + "content": [ + "hamronization_combined_report.tsv:md5,864466b0fb1acfc0e6b3425271f78ecb" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T15:33:45.641163055" + }, + "abricate": { + "content": [ + "sample_1.txt:md5,69af3321b0bc808b7ef85f102395736f", + "sample_2.txt:md5,69af3321b0bc808b7ef85f102395736f" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T15:33:45.590739146" + }, + "fargene": { + "content": [ + "results_summary.txt:md5,690d351cfc52577263ef4cfab1c81f50", + "results_summary.txt:md5,690d351cfc52577263ef4cfab1c81f50", + "results_summary.txt:md5,f69af9b0e18a0cfc934eb18b7e4bffe1", + "results_summary.txt:md5,f69af9b0e18a0cfc934eb18b7e4bffe1" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T15:33:45.625398198" + }, + "rgi": { + "content": [ + "sample_1.txt:md5,fce130af51f93cccfc09ddaf9caf623f", + "sample_2.txt:md5,fce130af51f93cccfc09ddaf9caf623f" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T15:33:45.614473482" + }, + "deeparg_tsv_ARG": { + "content": [ + "sample_1.align.daa.tsv:md5,21822364379fe8f991d27cdb52a33d1d", + "sample_2.align.daa.tsv:md5,f448465df58785a87cdee53691a77bfe", + "sample_1.mapping.ARG:md5,0e049e99eab4c55666062df21707d5b9", + "sample_2.mapping.ARG:md5,0e049e99eab4c55666062df21707d5b9" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T15:33:45.575881231" + }, + "ampir": { + "content": [ + true, + true, + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T15:33:45.512274661" + }, + "ampcombi_logfiles": { + "content": [ + "Ampcombi_cluster.log:md5,4c78f5f134edf566f39e04e3ab7d8558", + "Ampcombi_complete.log:md5,3dabfea4303bf94bd4f5d78c5b8c83c1", + "Ampcombi_parse_tables.log:md5,cb5dc95f6b64edc2f0eb56bb541660d5" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T15:33:45.560675596" + }, + "amplify": { + "content": [ + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T15:33:45.522977776" + }, + "macrel": { + "content": [ + "sample_1.macrel.smorfs.faa.gz:md5,1b5e2434860e635e95324d1804a3be7b", + "sample_2.macrel.smorfs.faa.gz:md5,38108b5cdfdc2196afe67418b9b04682", + "sample_1.macrel.all_orfs.faa.gz:md5,86f6b3b590d1b22d9c5aa164f8a14080", + "sample_2.macrel.all_orfs.faa.gz:md5,fdb384925af50ecade05dccaff68afd8", + "sample_1.macrel.prediction.gz:md5,0c4b16e0838be56e012b99169863a168", + "sample_2.macrel.prediction.gz:md5,440deffd6b6d9986ce098e44c66db9ae", + "README.md:md5,fa3706dfc95d0538a52c4d0d824be5fb", + "README.md:md5,fa3706dfc95d0538a52c4d0d824be5fb", + "sample_1.macrel_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_2.macrel_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T15:33:45.525854315" + }, + "amrfinderplus": { + "content": [ + "sample_1.tsv:md5,b4d261ace9be7d013c19d1f5c0005bfe", + "sample_2.tsv:md5,b4d261ace9be7d013c19d1f5c0005bfe" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T15:33:45.603392278" + } +} \ No newline at end of file diff --git a/tests/test_bakta.nf.test b/tests/test_bakta.nf.test new file mode 100644 index 00000000..b1913b04 --- /dev/null +++ b/tests/test_bakta.nf.test @@ -0,0 +1,109 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_FUNCSCAN" + script "main.nf" + tag "pipeline" + tag "nfcore_funcscan" + tag "test_bakta" + + test("test_bakta_profile") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + + // AMPir + { assert snapshot( + file("$outputDir/amp/ampir/sample_1/sample_1.ampir.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/ampir/sample_1/sample_1.ampir.faa").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/ampir/sample_2/sample_2.ampir.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/ampir/sample_2/sample_2.ampir.faa").text.contains("NODE_882919_length_258_cov_0.935961_1") + ).match("ampir") }, + + // AMPlify + { assert snapshot( + file("$outputDir/amp/amplify/sample_1/sample_1.amplify.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/amplify/sample_2/sample_2.amplify.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1") + ).match("amplify") }, + + // HMMsearch + { assert new File("$outputDir/amp/hmmer_hmmsearch/sample_1/sample_1_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/sample_2/sample_2_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + + // Macrel + { assert snapshot( + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.smorfs.faa.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.smorfs.faa.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.all_orfs.faa.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.all_orfs.faa.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.prediction.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.prediction.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/README.md"), + path("$outputDir/amp/macrel/sample_1.macrel/README.md"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel_log.txt"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel_log.txt") + ).match("macrel") }, + + // AMPcombi + { assert path("$outputDir/reports/ampcombi2/Ampcombi_summary.tsv").text.contains("KKEJHB_00100") }, + { assert snapshot( + path("$outputDir/reports/ampcombi2/Ampcombi_cluster.log"), + path("$outputDir/reports/ampcombi2/Ampcombi_complete.log"), + path("$outputDir/reports/ampcombi2/Ampcombi_parse_tables.log") + ).match("ampcombi_logfiles") }, + + // DeepARG + { assert snapshot( + path("$outputDir/arg/deeparg/sample_1/sample_1.align.daa.tsv"), + path("$outputDir/arg/deeparg/sample_2/sample_2.align.daa.tsv"), + path("$outputDir/arg/deeparg/sample_1/sample_1.mapping.ARG"), + path("$outputDir/arg/deeparg/sample_2/sample_2.mapping.ARG") + ).match("deeparg_tsv_ARG") }, + { assert file("$outputDir/arg/deeparg/sample_1/sample_1.align.daa").name }, + { assert file("$outputDir/arg/deeparg/sample_2/sample_2.align.daa").name }, + { assert path("$outputDir/arg/deeparg/sample_1/sample_1.mapping.potential.ARG").text.contains("#ARG") }, + { assert path("$outputDir/arg/deeparg/sample_2/sample_2.mapping.potential.ARG").text.contains("#ARG") }, + + // ABRicate + { assert snapshot( + path("$outputDir/arg/abricate/sample_1/sample_1.txt"), + path("$outputDir/arg/abricate/sample_2/sample_2.txt"), + ).match("abricate") }, + + // AMRFinderPlus + { assert snapshot( + path("$outputDir/arg/amrfinderplus/sample_1/sample_1.tsv"), + path("$outputDir/arg/amrfinderplus/sample_2/sample_2.tsv"), + ).match("amrfinderplus") }, + + // RGI + { assert snapshot( + path("$outputDir/arg/rgi/sample_1/sample_1.txt"), + path("$outputDir/arg/rgi/sample_2/sample_2.txt"), + ).match("rgi") }, + + // fARGene + { assert snapshot( + path("$outputDir/arg/fargene/sample_1/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/sample_2/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/sample_1/class_b_1_2/results_summary.txt"), + path("$outputDir/arg/fargene/sample_2/class_b_1_2/results_summary.txt") + ).match("fargene") + }, + { assert path("$outputDir/arg/fargene/sample_1/fargene_analysis.log").text.contains("fARGene is done.") }, + { assert path("$outputDir/arg/fargene/sample_2/fargene_analysis.log").text.contains("fARGene is done.") }, + + // hAMRonization + { assert snapshot(path("$outputDir/reports/hamronization_summarize/hamronization_combined_report.tsv")).match("hamronization_summarize") }, + ) + } + } +} diff --git a/tests/test_bakta.nf.test.snap b/tests/test_bakta.nf.test.snap new file mode 100644 index 00000000..ff73f307 --- /dev/null +++ b/tests/test_bakta.nf.test.snap @@ -0,0 +1,126 @@ +{ + "hamronization_summarize": { + "content": [ + "hamronization_combined_report.tsv:md5,864466b0fb1acfc0e6b3425271f78ecb" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T16:51:37.320498194" + }, + "abricate": { + "content": [ + "sample_1.txt:md5,69af3321b0bc808b7ef85f102395736f", + "sample_2.txt:md5,69af3321b0bc808b7ef85f102395736f" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T16:51:37.261118633" + }, + "fargene": { + "content": [ + "results_summary.txt:md5,690d351cfc52577263ef4cfab1c81f50", + "results_summary.txt:md5,690d351cfc52577263ef4cfab1c81f50", + "results_summary.txt:md5,f69af9b0e18a0cfc934eb18b7e4bffe1", + "results_summary.txt:md5,f69af9b0e18a0cfc934eb18b7e4bffe1" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T16:51:37.30812705" + }, + "rgi": { + "content": [ + "sample_1.txt:md5,fce130af51f93cccfc09ddaf9caf623f", + "sample_2.txt:md5,fce130af51f93cccfc09ddaf9caf623f" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T16:51:37.292595392" + }, + "deeparg_tsv_ARG": { + "content": [ + "sample_1.align.daa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_2.align.daa.tsv:md5,4a86ca69defa4c861fabf236609afe8a", + "sample_1.mapping.ARG:md5,0e049e99eab4c55666062df21707d5b9", + "sample_2.mapping.ARG:md5,0e049e99eab4c55666062df21707d5b9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T16:51:37.245901486" + }, + "ampir": { + "content": [ + false, + false, + false, + false + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T16:51:37.19218768" + }, + "ampcombi_logfiles": { + "content": [ + "Ampcombi_cluster.log:md5,4c78f5f134edf566f39e04e3ab7d8558", + "Ampcombi_complete.log:md5,3dabfea4303bf94bd4f5d78c5b8c83c1", + "Ampcombi_parse_tables.log:md5,cb5dc95f6b64edc2f0eb56bb541660d5" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T16:51:37.230099612" + }, + "amplify": { + "content": [ + false, + false + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T16:51:37.204985783" + }, + "macrel": { + "content": [ + "sample_1.macrel.smorfs.faa.gz:md5,1b5e2434860e635e95324d1804a3be7b", + "sample_2.macrel.smorfs.faa.gz:md5,38108b5cdfdc2196afe67418b9b04682", + "sample_1.macrel.all_orfs.faa.gz:md5,86f6b3b590d1b22d9c5aa164f8a14080", + "sample_2.macrel.all_orfs.faa.gz:md5,fdb384925af50ecade05dccaff68afd8", + "sample_1.macrel.prediction.gz:md5,0c4b16e0838be56e012b99169863a168", + "sample_2.macrel.prediction.gz:md5,440deffd6b6d9986ce098e44c66db9ae", + "README.md:md5,fa3706dfc95d0538a52c4d0d824be5fb", + "README.md:md5,fa3706dfc95d0538a52c4d0d824be5fb", + "sample_1.macrel_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_2.macrel_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T16:51:37.208256804" + }, + "amrfinderplus": { + "content": [ + "sample_1.tsv:md5,b4d261ace9be7d013c19d1f5c0005bfe", + "sample_2.tsv:md5,b4d261ace9be7d013c19d1f5c0005bfe" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-23T16:51:37.276435739" + } +} \ No newline at end of file diff --git a/tests/test_bgc_bakta.nf.test b/tests/test_bgc_bakta.nf.test new file mode 100644 index 00000000..37a0a0b1 --- /dev/null +++ b/tests/test_bgc_bakta.nf.test @@ -0,0 +1,45 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_FUNCSCAN" + script "main.nf" + tag "pipeline" + tag "nfcore_funcscan" + tag "test_bgc_bakta" + + test("test_bgc_bakta_profile") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + + // antiSMASH + { assert path("$outputDir/bgc/antismash/sample_2/sample_2.gbk").text.contains("##antiSMASH-Data-START##") }, // channel: gbk_input + { assert snapshot(path("$outputDir/bgc/antismash/sample_2/css")).match("antismash_css") }, // parts of channel: html_accessory_files + { assert path("$outputDir/bgc/antismash/sample_2/sample_2.zip").exists() }, // channel: zip + { assert path("$outputDir/bgc/antismash/sample_2/index.html").text.contains("https://antismash.secondarymetabolites.org/") }, // channel: html + { assert path("$outputDir/bgc/antismash/sample_2/regions.js").text.contains('NODE_861_length_4516_cov') }, // channel: json_sideloading + { assert path("$outputDir/bgc/antismash/sample_2/sample_2.log").text.contains("antiSMASH status: SUCCESS") }, // channel: log + + // DeepBGC + { assert snapshot(path("$outputDir/bgc/deepbgc/sample_2/sample_2.bgc.gbk")).match("deepbgc_bgc_gbk") }, // channel: bgc_gbk + { assert path("$outputDir/bgc/deepbgc/sample_2/sample_2.antismash.json").text.contains("Putative BGCs predicted using DeepBGC") }, // channel: json + { assert path("$outputDir/bgc/deepbgc/sample_2/LOG.txt").text.contains('Saved DeepBGC result to: sample_2') }, // channel: log + { assert path("$outputDir/bgc/deepbgc/sample_2/sample_2.full.gbk").text.contains('1 aaggggtatg gagcagcgac gtctacccgt') }, // channel: full_gbk + + // GECCO + { assert snapshot( + path("$outputDir/bgc/gecco/sample_2/sample_2.genes.tsv"), // channel: genes + path("$outputDir/bgc/gecco/sample_2/sample_2.features.tsv") // channel: features + ).match("gecco") } + ) + } + } +} diff --git a/tests/test_bgc_bakta.nf.test.snap b/tests/test_bgc_bakta.nf.test.snap new file mode 100644 index 00000000..9bae9f24 --- /dev/null +++ b/tests/test_bgc_bakta.nf.test.snap @@ -0,0 +1,35 @@ +{ + "antismash_css": { + "content": [ + [ + "bacteria.css:md5,39c0ca9cbc64cb824dc958b26b5b4ab8" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T10:32:18.349501125" + }, + "deepbgc_bgc_gbk": { + "content": [ + "sample_2.bgc.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T10:32:18.378687548" + }, + "gecco": { + "content": [ + "sample_2.genes.tsv:md5,66e3724c7e7da102bf58acd564211e8b", + "sample_2.features.tsv:md5,2ef146213836ca80d3079776f17c7cb2" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T10:32:18.404694725" + } +} \ No newline at end of file diff --git a/tests/test_bgc_prokka.nf.test b/tests/test_bgc_prokka.nf.test new file mode 100644 index 00000000..0fe53cd5 --- /dev/null +++ b/tests/test_bgc_prokka.nf.test @@ -0,0 +1,45 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_FUNCSCAN" + script "main.nf" + tag "pipeline" + tag "nfcore_funcscan" + tag "test_bgc_prokka" + + test("test_bgc_prokka_profile") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + + // antiSMASH + { assert path("$outputDir/bgc/antismash/sample_2/sample_2.gbk").text.contains("##antiSMASH-Data-START##") }, // channel: gbk_input + { assert snapshot(path("$outputDir/bgc/antismash/sample_2/css")).match("antismash_css") }, // parts of channel: html_accessory_files + { assert path("$outputDir/bgc/antismash/sample_2/sample_2.zip").exists() }, // channel: zip + { assert path("$outputDir/bgc/antismash/sample_2/index.html").text.contains("https://antismash.secondarymetabolites.org/") }, // channel: html + { assert path("$outputDir/bgc/antismash/sample_2/regions.js").text.contains('PROKKA_1') }, // channel: json_sideloading + { assert path("$outputDir/bgc/antismash/sample_2/sample_2.log").text.contains("antiSMASH status: SUCCESS") }, // channel: log + + // DeepBGC + { assert snapshot(path("$outputDir/bgc/deepbgc/sample_2/sample_2.bgc.gbk")).match("deepbgc_bgc_gbk") }, // channel: bgc_gbk + { assert path("$outputDir/bgc/deepbgc/sample_2/sample_2.antismash.json").text.contains("Putative BGCs predicted using DeepBGC") }, // channel: json + { assert path("$outputDir/bgc/deepbgc/sample_2/LOG.txt").text.contains('Saved DeepBGC result to: sample_2') }, // channel: log + { assert path("$outputDir/bgc/deepbgc/sample_2/sample_2.full.gbk").text.contains('1 aaggggtatg gagcagcgac gtctacccgt') }, // channel: full_gbk + + // GECCO + { assert snapshot( + path("$outputDir/bgc/gecco/sample_2/sample_2.genes.tsv"), // channel: genes + path("$outputDir/bgc/gecco/sample_2/sample_2.features.tsv") // channel: features + ).match("gecco") } + ) + } + } +} diff --git a/tests/test_bgc_prokka.nf.test.snap b/tests/test_bgc_prokka.nf.test.snap new file mode 100644 index 00000000..4894afa1 --- /dev/null +++ b/tests/test_bgc_prokka.nf.test.snap @@ -0,0 +1,35 @@ +{ + "antismash_css": { + "content": [ + [ + "bacteria.css:md5,39c0ca9cbc64cb824dc958b26b5b4ab8" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T10:39:33.879464917" + }, + "deepbgc_bgc_gbk": { + "content": [ + "sample_2.bgc.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T10:39:33.920624113" + }, + "gecco": { + "content": [ + "sample_2.genes.tsv:md5,050b82ca462430ecc0635acb2e297531", + "sample_2.features.tsv:md5,79354868ee3de6fdc419195b8fa8edb6" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T10:39:33.944935473" + } +} \ No newline at end of file diff --git a/tests/test_bgc_pyrodigal.nf.test b/tests/test_bgc_pyrodigal.nf.test new file mode 100644 index 00000000..cab97577 --- /dev/null +++ b/tests/test_bgc_pyrodigal.nf.test @@ -0,0 +1,45 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_FUNCSCAN" + script "main.nf" + tag "pipeline" + tag "nfcore_funcscan" + tag "test_bgc_pyrodigal" + + test("test_bgc_pyrodigal_profile") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + + // antiSMASH + { assert path("$outputDir/bgc/antismash/sample_2/sample_2.gbk").text.contains("##antiSMASH-Data-START##") }, // channel: gbk_input + { assert snapshot(path("$outputDir/bgc/antismash/sample_2/css")).match("antismash_css") }, // parts of channel: html_accessory_files + { assert path("$outputDir/bgc/antismash/sample_2/sample_2.zip").exists() }, // channel: zip + { assert path("$outputDir/bgc/antismash/sample_2/index.html").text.contains("https://antismash.secondarymetabolites.org/") }, // channel: html + { assert path("$outputDir/bgc/antismash/sample_2/regions.js").text.contains('NODE_861_length_4516_cov') }, // channel: json_sideloading + { assert path("$outputDir/bgc/antismash/sample_2/sample_2.log").text.contains("antiSMASH status: SUCCESS") }, // channel: log + + // DeepBGC + { assert snapshot(path("$outputDir/bgc/deepbgc/sample_2/sample_2.bgc.gbk")).match("deepbgc_bgc_gbk") }, // channel: bgc_gbk + { assert path("$outputDir/bgc/deepbgc/sample_2/sample_2.antismash.json").text.contains("NODE_861_length_4516_cov_2.736606") }, // channel: json + { assert path("$outputDir/bgc/deepbgc/sample_2/LOG.txt").text.contains('Saved DeepBGC result to: sample_2') }, // channel: log + { assert path("$outputDir/bgc/deepbgc/sample_2/sample_2.full.gbk").text.contains('1 aaggggtatg gagcagcgac gtctacccgt') }, // channel: full_gbk + + // GECCO + { assert snapshot( + path("$outputDir/bgc/gecco/sample_2/sample_2.genes.tsv"), // channel: genes + path("$outputDir/bgc/gecco/sample_2/sample_2.features.tsv") // channel: features + ).match("gecco") } + ) + } + } +} diff --git a/tests/test_bgc_pyrodigal.nf.test.snap b/tests/test_bgc_pyrodigal.nf.test.snap new file mode 100644 index 00000000..67089772 --- /dev/null +++ b/tests/test_bgc_pyrodigal.nf.test.snap @@ -0,0 +1,35 @@ +{ + "antismash_css": { + "content": [ + [ + "bacteria.css:md5,39c0ca9cbc64cb824dc958b26b5b4ab8" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T10:45:25.720352923" + }, + "deepbgc_bgc_gbk": { + "content": [ + "sample_2.bgc.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T10:45:44.435766452" + }, + "gecco": { + "content": [ + "sample_2.genes.tsv:md5,66e3724c7e7da102bf58acd564211e8b", + "sample_2.features.tsv:md5,2ef146213836ca80d3079776f17c7cb2" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T10:45:25.732866237" + } +} \ No newline at end of file diff --git a/tests/test_full.nf.test b/tests/test_full.nf.test new file mode 100644 index 00000000..b5d53e6d --- /dev/null +++ b/tests/test_full.nf.test @@ -0,0 +1,373 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_FUNCSCAN" + script "main.nf" + tag "pipeline" + tag "nfcore_funcscan" + tag "test_full" + + test("test_full_profile") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + + // AMP workflow + + // AMPir + { assert snapshot( + path("$outputDir/amp/ampir/ERZ1664501/ERZ1664501.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664503/ERZ1664503.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664504/ERZ1664504.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664505/ERZ1664505.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664506/ERZ1664506.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664507/ERZ1664507.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664508/ERZ1664508.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664509/ERZ1664509.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664510/ERZ1664510.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664511/ERZ1664511.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664515/ERZ1664515.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664516/ERZ1664516.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664517/ERZ1664517.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664518/ERZ1664518.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664520/ERZ1664520.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664521/ERZ1664521.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664523/ERZ1664523.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664524/ERZ1664524.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664528/ERZ1664528.ampir.tsv"), + path("$outputDir/amp/ampir/ERZ1664501/ERZ1664501.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664503/ERZ1664503.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664504/ERZ1664504.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664505/ERZ1664505.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664506/ERZ1664506.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664507/ERZ1664507.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664508/ERZ1664508.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664509/ERZ1664509.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664510/ERZ1664510.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664511/ERZ1664511.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664515/ERZ1664515.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664516/ERZ1664516.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664517/ERZ1664517.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664518/ERZ1664518.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664520/ERZ1664520.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664521/ERZ1664521.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664523/ERZ1664523.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664524/ERZ1664524.ampir.faa"), + path("$outputDir/amp/ampir/ERZ1664528/ERZ1664528.ampir.faa") + ).match("ampir") }, + + // HMMsearch + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664501/ERZ1664501_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664503/ERZ1664503_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664504/ERZ1664504_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664506/ERZ1664506_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664507/ERZ1664507_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664508/ERZ1664508_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664509/ERZ1664509_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664510/ERZ1664510_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664511/ERZ1664511_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664515/ERZ1664515_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664516/ERZ1664516_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664517/ERZ1664517_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664518/ERZ1664518_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664520/ERZ1664520_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664521/ERZ1664521_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664523/ERZ1664523_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664523/ERZ1664523_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664524/ERZ1664524_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/ERZ1664528/ERZ1664528_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + + // Macrel + { assert snapshot( + path("$outputDir/amp/macrel/ERZ1664501.macrel/ERZ1664501.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664503.macrel/ERZ1664503.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664504.macrel/ERZ1664504.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664505.macrel/ERZ1664505.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664506.macrel/ERZ1664506.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664507.macrel/ERZ1664507.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664508.macrel/ERZ1664508.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664509.macrel/ERZ1664509.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664510.macrel/ERZ1664510.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664511.macrel/ERZ1664511.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664515.macrel/ERZ1664515.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664516.macrel/ERZ1664516.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664517.macrel/ERZ1664517.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664518.macrel/ERZ1664518.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664520.macrel/ERZ1664520.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664521.macrel/ERZ1664521.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664523.macrel/ERZ1664523.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664524.macrel/ERZ1664524.macrel.prediction.gz"), + path("$outputDir/amp/macrel/ERZ1664528.macrel/ERZ1664528.macrel.prediction.gz") + ).match("macrel") }, + + // AMPcombi + { assert path("$outputDir/reports/ampcombi2/Ampcombi_summary.tsv").text.contains("ERZ1664515.11560-NODE-11560-length-551-cov-1.403226_2") }, + + // ARG workflow + + // DeepARG + { assert snapshot( + path("$outputDir/arg/deeparg/ERZ1664501/ERZ1664501.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664503/ERZ1664503.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664504/ERZ1664504.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664505/ERZ1664505.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664506/ERZ1664506.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664507/ERZ1664507.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664508/ERZ1664508.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664509/ERZ1664509.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664510/ERZ1664510.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664511/ERZ1664511.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664515/ERZ1664515.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664516/ERZ1664516.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664517/ERZ1664517.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664518/ERZ1664518.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664520/ERZ1664520.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664521/ERZ1664521.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664523/ERZ1664523.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664524/ERZ1664524.mapping.ARG"), + path("$outputDir/arg/deeparg/ERZ1664528/ERZ1664528.mapping.ARG") + ).match("deeparg") }, + + { assert new File("$outputDir/arg/deeparg/ERZ1664501/ERZ1664501.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664503/ERZ1664503.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664504/ERZ1664504.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664505/ERZ1664505.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664506/ERZ1664506.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664507/ERZ1664507.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664508/ERZ1664508.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664509/ERZ1664509.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664510/ERZ1664510.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664511/ERZ1664511.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664515/ERZ1664515.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664516/ERZ1664516.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664517/ERZ1664517.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664518/ERZ1664518.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664520/ERZ1664520.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664521/ERZ1664521.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664523/ERZ1664523.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664524/ERZ1664524.align.daa").exists() }, + { assert new File("$outputDir/arg/deeparg/ERZ1664528/ERZ1664528.align.daa").exists() }, + + // ABRicate + { assert snapshot( + path("$outputDir/arg/abricate/ERZ1664501/ERZ1664501.txt"), + path("$outputDir/arg/abricate/ERZ1664503/ERZ1664503.txt"), + path("$outputDir/arg/abricate/ERZ1664504/ERZ1664504.txt"), + path("$outputDir/arg/abricate/ERZ1664505/ERZ1664505.txt"), + path("$outputDir/arg/abricate/ERZ1664506/ERZ1664506.txt"), + path("$outputDir/arg/abricate/ERZ1664507/ERZ1664507.txt"), + path("$outputDir/arg/abricate/ERZ1664508/ERZ1664508.txt"), + path("$outputDir/arg/abricate/ERZ1664509/ERZ1664509.txt"), + path("$outputDir/arg/abricate/ERZ1664510/ERZ1664510.txt"), + path("$outputDir/arg/abricate/ERZ1664511/ERZ1664511.txt"), + path("$outputDir/arg/abricate/ERZ1664515/ERZ1664515.txt"), + path("$outputDir/arg/abricate/ERZ1664516/ERZ1664516.txt"), + path("$outputDir/arg/abricate/ERZ1664517/ERZ1664517.txt"), + path("$outputDir/arg/abricate/ERZ1664518/ERZ1664518.txt"), + path("$outputDir/arg/abricate/ERZ1664520/ERZ1664520.txt"), + path("$outputDir/arg/abricate/ERZ1664521/ERZ1664521.txt"), + path("$outputDir/arg/abricate/ERZ1664523/ERZ1664523.txt"), + path("$outputDir/arg/abricate/ERZ1664524/ERZ1664524.txt"), + path("$outputDir/arg/abricate/ERZ1664528/ERZ1664528.txt") + ).match("abricate") }, + + // AMRFinderPlus + { assert snapshot( + path("$outputDir/arg/amrfinderplus/ERZ1664501/ERZ1664501.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664503/ERZ1664503.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664504/ERZ1664504.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664505/ERZ1664505.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664506/ERZ1664506.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664507/ERZ1664507.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664508/ERZ1664508.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664509/ERZ1664509.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664510/ERZ1664510.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664511/ERZ1664511.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664515/ERZ1664515.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664516/ERZ1664516.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664517/ERZ1664517.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664518/ERZ1664518.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664520/ERZ1664520.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664521/ERZ1664521.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664523/ERZ1664523.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664524/ERZ1664524.tsv"), + path("$outputDir/arg/amrfinderplus/ERZ1664528/ERZ1664528.tsv") + ).match("amrfinderplus") }, + + // RGI + { assert snapshot( + path("$outputDir/arg/rgi/ERZ1664501/ERZ1664501.txt"), + path("$outputDir/arg/rgi/ERZ1664503/ERZ1664503.txt"), + path("$outputDir/arg/rgi/ERZ1664504/ERZ1664504.txt"), + path("$outputDir/arg/rgi/ERZ1664505/ERZ1664505.txt"), + path("$outputDir/arg/rgi/ERZ1664506/ERZ1664506.txt"), + path("$outputDir/arg/rgi/ERZ1664507/ERZ1664507.txt"), + path("$outputDir/arg/rgi/ERZ1664508/ERZ1664508.txt"), + path("$outputDir/arg/rgi/ERZ1664509/ERZ1664509.txt"), + path("$outputDir/arg/rgi/ERZ1664510/ERZ1664510.txt"), + path("$outputDir/arg/rgi/ERZ1664511/ERZ1664511.txt"), + path("$outputDir/arg/rgi/ERZ1664515/ERZ1664515.txt"), + path("$outputDir/arg/rgi/ERZ1664516/ERZ1664516.txt"), + path("$outputDir/arg/rgi/ERZ1664517/ERZ1664517.txt"), + path("$outputDir/arg/rgi/ERZ1664518/ERZ1664518.txt"), + path("$outputDir/arg/rgi/ERZ1664520/ERZ1664520.txt"), + path("$outputDir/arg/rgi/ERZ1664521/ERZ1664521.txt"), + path("$outputDir/arg/rgi/ERZ1664523/ERZ1664523.txt"), + path("$outputDir/arg/rgi/ERZ1664524/ERZ1664524.txt"), + path("$outputDir/arg/rgi/ERZ1664528/ERZ1664528.txt") + ).match("rgi") }, + + // fARGene + { assert snapshot( + path("$outputDir/arg/fargene/ERZ1664501/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664503/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664504/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664505/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664506/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664507/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664508/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664509/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664510/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664511/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664515/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664516/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664517/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664518/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664520/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664521/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664523/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664524/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/ERZ1664528/class_a/results_summary.txt") + ).match("fargene") }, + + // hAMRonization + { assert snapshot(path("$outputDir/reports/hamronization_summarize/hamronization_combined_report.tsv")).match("hamronization_summarize") }, + + // argNorm + { assert snapshot ( + path("$outputDir/arg/argnorm/deeparg/ERZ1664501.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664503.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664504.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664505.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664506.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664507.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664508.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664509.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664510.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664511.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664515.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664516.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664517.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664518.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664520.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664521.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664523.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664524.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/ERZ1664528.ARG.normalized.tsv"), + ).match("argnorm_deeparg") }, + + { assert snapshot ( + path("$outputDir/arg/argnorm/abricate/ERZ1664501.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664503.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664504.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664505.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664506.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664507.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664508.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664509.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664510.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664511.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664515.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664516.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664517.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664518.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664520.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664521.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664523.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664524.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/ERZ1664528.normalized.tsv"), + ).match("argnorm_abricate") }, + + { assert snapshot ( + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664501.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664503.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664504.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664505.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664506.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664507.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664508.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664509.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664510.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664511.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664515.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664516.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664517.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664518.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664520.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664521.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664523.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664524.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/ERZ1664528.normalized.tsv"), + ).match("argnorm_amrfinderplus") }, + + // BGC workflow + + // antiSMASH + { assert snapshot( + path("$outputDir/bgc/antismash/ERZ1664501/ERZ1664501.gbk").text.contains("ccgcccatat cctttctgtc accgactcgg"), + path("$outputDir/bgc/antismash/ERZ1664503/ERZ1664503.gbk").text.contains("agaaggaacc gagcttgata aaacctatgc"), + path("$outputDir/bgc/antismash/ERZ1664504/ERZ1664504.gbk").text.contains("aggcaatacc ggctccaaca acagcagatt"), + path("$outputDir/bgc/antismash/ERZ1664505/ERZ1664505.gbk").text.contains("agggccacgc acacgggctc ggtgcacccc"), + path("$outputDir/bgc/antismash/ERZ1664506/ERZ1664506.gbk").text.contains("acaggatgga gcattgacta cattctggat"), + path("$outputDir/bgc/antismash/ERZ1664507/ERZ1664507.gbk").text.contains("aaagcaggaa aaagctgata acgcccgccc"), + path("$outputDir/bgc/antismash/ERZ1664508/ERZ1664508.gbk").text.contains("gggccgtttc gcggtaggcc tggttcatat"), + path("$outputDir/bgc/antismash/ERZ1664509/ERZ1664509.gbk").text.contains("aagcagtggg tctaaggcga agtcataccc"), + path("$outputDir/bgc/antismash/ERZ1664510/ERZ1664510.gbk").text.contains("ttcgcgataa agcgttccaa tggggatgag"), + path("$outputDir/bgc/antismash/ERZ1664511/ERZ1664511.gbk").text.contains("tttttgggaa cggcgtccgt tctcaaagag"), + path("$outputDir/bgc/antismash/ERZ1664515/ERZ1664515.gbk").text.contains("tattgcaaac atatcaagca ccttcccttc"), + path("$outputDir/bgc/antismash/ERZ1664516/ERZ1664516.gbk").text.contains("aactccctgg ttgaaccggc cgtaatactt"), + path("$outputDir/bgc/antismash/ERZ1664517/ERZ1664517.gbk").text.contains("gacgctttct ttcagaaacg ttttcccctt"), + path("$outputDir/bgc/antismash/ERZ1664518/ERZ1664518.gbk").text.contains("cagcgcataa gtctgcgtca cctgtcccag"), + path("$outputDir/bgc/antismash/ERZ1664520/ERZ1664520.gbk").text.contains("ggagtttttg cgctttgacc gccacgggga"), + path("$outputDir/bgc/antismash/ERZ1664521/ERZ1664521.gbk").text.contains("ggtaaaggaa accatccggc ggccaatctg"), + path("$outputDir/bgc/antismash/ERZ1664523/ERZ1664523.gbk").text.contains("gctgtgggga tgggtaagcg aggatgatgc"), + path("$outputDir/bgc/antismash/ERZ1664524/ERZ1664524.gbk").text.contains("ataatggctt cttttataaa tgcataaatt"), + path("$outputDir/bgc/antismash/ERZ1664528/ERZ1664528.gbk").text.contains("cagaaagaag aaaaacgcct gacttgggcg") + ).match("antismash") }, + + // GECCO + { assert snapshot( + path("$outputDir/bgc/gecco/ERZ1664504/ERZ1664504.220-NODE-220-length-4587-cov-2.552957_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664504/ERZ1664504.48-NODE-48-length-9582-cov-5.239425_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664506/ERZ1664506.42-NODE-42-length-11967-cov-6.006380_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664508/ERZ1664508.3061-NODE-3061-length-1263-cov-3.647351_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664510/ERZ1664510.123-NODE-123-length-8863-cov-8.649410_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664510/ERZ1664510.210-NODE-210-length-5173-cov-7.860688_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664511/ERZ1664511.16-NODE-16-length-49668-cov-9.810473_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664515/ERZ1664515.9-NODE-9-length-49063-cov-10.926196_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664516/ERZ1664516.158-NODE-158-length-6232-cov-9.863850_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664517/ERZ1664517.38-NODE-38-length-19981-cov-8.613771_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664518/ERZ1664518.217-NODE-217-length-4457-cov-6.415947_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664521/ERZ1664521.1871-NODE-1871-length-1473-cov-1.858251_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664521/ERZ1664521.895-NODE-895-length-1964-cov-2.221058_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664523/ERZ1664523.16-NODE-16-length-15072-cov-6.654591_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664524/ERZ1664524.1150-NODE-1150-length-2386-cov-3.450879_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664524/ERZ1664524.54-NODE-54-length-9607-cov-5.345582_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664528/ERZ1664528.138-NODE-138-length-5805-cov-4.599304_cluster_1.gbk"), + path("$outputDir/bgc/gecco/ERZ1664528/ERZ1664528.1641-NODE-1641-length-2049-cov-4.697091_cluster_1.gbk") + ).match("gecco") }, + + // comBGC + { assert snapshot("$outputDir/reports/combgc/combgc_complete_summary.tsv") } + ) + } + } +} diff --git a/tests/test_full.nf.test.snap b/tests/test_full.nf.test.snap new file mode 100644 index 00000000..296fd4b5 --- /dev/null +++ b/tests/test_full.nf.test.snap @@ -0,0 +1,366 @@ +{ + "hamronization_summarize": { + "content": [ + "hamronization_combined_report.tsv:md5,69e71df9685cbd70579ac0030f624ca4" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2024-08-16T21:42:15.219521746" + }, + "deeparg": { + "content": [ + "ERZ1664501.mapping.ARG:md5,a4641ca28291c73f4ce664af575a4811", + "ERZ1664503.mapping.ARG:md5,c217c176a55170cf7f342dd6b082bec6", + "ERZ1664504.mapping.ARG:md5,2f2a6297dc8752766f65f2b3e966158d", + "ERZ1664505.mapping.ARG:md5,be741b562796026b46f649c8fbe6e73d", + "ERZ1664506.mapping.ARG:md5,17122078c5b1821ea9d841eb1775e987", + "ERZ1664507.mapping.ARG:md5,6dbd85abafa0f892c4b557eb8f93b788", + "ERZ1664508.mapping.ARG:md5,9a824269207740c926eb7d1ade69cd89", + "ERZ1664509.mapping.ARG:md5,fa8ffd39d8405bd167bb1676b5a29db7", + "ERZ1664510.mapping.ARG:md5,16f2e41c2378645dbbdf9867f9000acf", + "ERZ1664511.mapping.ARG:md5,7f2bd09ed161162a82c3c10c06bf1ee8", + "ERZ1664515.mapping.ARG:md5,a4bb295a414b9a26c4e2d032bb25624f", + "ERZ1664516.mapping.ARG:md5,e3ec14da3e206782e1151593d801015d", + "ERZ1664517.mapping.ARG:md5,9f22fec9df39231f0a52865ca9245451", + "ERZ1664518.mapping.ARG:md5,821fd592c54795e2666277e3a2c84169", + "ERZ1664520.mapping.ARG:md5,24942a7fadad6af031c116e8f9ea848e", + "ERZ1664521.mapping.ARG:md5,d61ee33f0395ab5dbb6b65f816186d77", + "ERZ1664523.mapping.ARG:md5,2ba512cfd091a9ab18825cd4d9560a83", + "ERZ1664524.mapping.ARG:md5,d3fd9b70a667f37478c901c4ec5c69be", + "ERZ1664528.mapping.ARG:md5,1da3f34f173fabe34ff5bc122d9ec7e8" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2024-08-16T21:42:11.887062467" + }, + "ampir": { + "content": [ + "ERZ1664501.ampir.tsv:md5,ef78b10c6f4c6a555eb9ef8d93291aad", + "ERZ1664503.ampir.tsv:md5,7354b3c583443d9d3cab33ce3cb6327e", + "ERZ1664504.ampir.tsv:md5,506a55b7dc97b33b5533febabde8b0cf", + "ERZ1664505.ampir.tsv:md5,27f72ca9213aac5b0d857df638db692b", + "ERZ1664506.ampir.tsv:md5,1c5df79369a889b136cdef0e89f3f120", + "ERZ1664507.ampir.tsv:md5,a44751ce33a3384583dd43795d729245", + "ERZ1664508.ampir.tsv:md5,a9267c72360b01116bc61ab360f01ab4", + "ERZ1664509.ampir.tsv:md5,c0d8115529d6d8ee4989bd1e9dfe5766", + "ERZ1664510.ampir.tsv:md5,2a6d6563be682769a83208fe025ed946", + "ERZ1664511.ampir.tsv:md5,b96317516b603ea796d58366693e6b96", + "ERZ1664515.ampir.tsv:md5,9fbeb531294e951336e4c91257d44e30", + "ERZ1664516.ampir.tsv:md5,44dcbd1371c1fcfe0e98e756d6a74996", + "ERZ1664517.ampir.tsv:md5,35a42d7aabc1edef65a0c0f2129530bc", + "ERZ1664518.ampir.tsv:md5,c7c9157000642e158b6746c719d65a85", + "ERZ1664520.ampir.tsv:md5,62f2e109031048fc593aa525405a19b4", + "ERZ1664521.ampir.tsv:md5,91bebaf23d2a63192359178af8ae1d42", + "ERZ1664523.ampir.tsv:md5,1e01f9649dc2e9bebd8ce635e051e3df", + "ERZ1664524.ampir.tsv:md5,8ea8ca6483c416695ad2307e7a939f8d", + "ERZ1664528.ampir.tsv:md5,a239169a2cd41265693442694bb5e329", + "ERZ1664501.ampir.faa:md5,88d04f76764566e029f1a0eb7481bd50", + "ERZ1664503.ampir.faa:md5,754b00982b99d20d24ddd2c39e3db060", + "ERZ1664504.ampir.faa:md5,c6e84c9ee141c097decb89def230a70b", + "ERZ1664505.ampir.faa:md5,7519e8f28ca3c3e8b33e65a672b6f418", + "ERZ1664506.ampir.faa:md5,39162c25303085463d893acee70c2921", + "ERZ1664507.ampir.faa:md5,8119bbc3daa1fc93cf3760b359001212", + "ERZ1664508.ampir.faa:md5,369131964554c5d7b7b56a99a4eeb851", + "ERZ1664509.ampir.faa:md5,2594cd39d2d0cf96d303955528e9c962", + "ERZ1664510.ampir.faa:md5,9bf556234e1a9843d7155118cb8b6afb", + "ERZ1664511.ampir.faa:md5,5ddc4c648af3db91d1aba27527c13622", + "ERZ1664515.ampir.faa:md5,a7830a1af51b290793af9ac83d8c3296", + "ERZ1664516.ampir.faa:md5,6c5b07f03f6f1bc55a44e0a8cbc18cb3", + "ERZ1664517.ampir.faa:md5,2c59abb9b9bfc690f97fefe10a6bc4ce", + "ERZ1664518.ampir.faa:md5,7f5519edb85db37903f3665541219c69", + "ERZ1664520.ampir.faa:md5,f3314a405c3c33e05722a8ab6021cb64", + "ERZ1664521.ampir.faa:md5,139303c88f5f5a4041ee059519ba7f98", + "ERZ1664523.ampir.faa:md5,fb34351d27a405e4a9968664878a0fd4", + "ERZ1664524.ampir.faa:md5,e4660c8d3ac00779a26ee2f0105bba2a", + "ERZ1664528.ampir.faa:md5,36fd7ea6830c3068015105d20b4404a7" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2024-08-16T21:42:05.572253729" + }, + "argnorm_amrfinderplus": { + "content": [ + "ERZ1664501.normalized.tsv:md5,ef07ef517f4e73b6cfd4155d14f1a459", + "ERZ1664503.normalized.tsv:md5,4a6be3b2878c77c351581283a3c6cb92", + "ERZ1664504.normalized.tsv:md5,8e984c4365df778d75e80c2928bad20d", + "ERZ1664505.normalized.tsv:md5,cbe3ce3a810cc6c628268617d2924f51", + "ERZ1664506.normalized.tsv:md5,14225d75f1af11a6b667d1a80a14e9d4", + "ERZ1664507.normalized.tsv:md5,8febe711ddd369571c5dd071d77fdbeb", + "ERZ1664508.normalized.tsv:md5,973d098a82e9d67e87a1bd7a2684299a", + "ERZ1664509.normalized.tsv:md5,e0a387b6727320a712e204af4776bd79", + "ERZ1664510.normalized.tsv:md5,949b8524b11e281d53fa67037a346497", + "ERZ1664511.normalized.tsv:md5,810ff27b0c8664f2350ade9e76095574", + "ERZ1664515.normalized.tsv:md5,64847a921608f2b37ecfbc324fec1cb1", + "ERZ1664516.normalized.tsv:md5,d25d1d43562344b463802dc5dfaccf52", + "ERZ1664517.normalized.tsv:md5,4d8e73eccd1001ebc6225167df6a2374", + "ERZ1664518.normalized.tsv:md5,f8b744ae41b1d0ba101ae9a228529d05", + "ERZ1664520.normalized.tsv:md5,f036a7211ad6df9b874bad4c99c5ddda", + "ERZ1664521.normalized.tsv:md5,e41c50f9524dfdde17bf782dfc6c7eea", + "ERZ1664523.normalized.tsv:md5,8590e4c5437121a93f527f55125291c5", + "ERZ1664524.normalized.tsv:md5,951d29c42bd2890bc1a28d91a3f9bb84", + "ERZ1664528.normalized.tsv:md5,52495202c208557c2c9ee0c7b7ef5497" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2024-08-19T17:43:22.768320462" + }, + "argnorm_abricate": { + "content": [ + "ERZ1664501.normalized.tsv:md5,618aa19dcaed5d3a5909cb84393d90cb", + "ERZ1664503.normalized.tsv:md5,f2ee4aeafc929e3893677c271b3a04d4", + "ERZ1664504.normalized.tsv:md5,88b9a76d726402b95b2cd348459f0666", + "ERZ1664505.normalized.tsv:md5,817218f39d51d6f327623b26512e2e2d", + "ERZ1664506.normalized.tsv:md5,66806a70c95b2186f085f27661639738", + "ERZ1664507.normalized.tsv:md5,47e109f815e4b8e8d28aaeb75e4947b7", + "ERZ1664508.normalized.tsv:md5,60990fe382f0b43a288a8f66bcbde19f", + "ERZ1664509.normalized.tsv:md5,9710235350f4ff66c06b4abb78c23f80", + "ERZ1664510.normalized.tsv:md5,520f6eff7afdc9c52b9a1a8bb363fe85", + "ERZ1664511.normalized.tsv:md5,893ce88576218cd6acb246046eadb1af", + "ERZ1664515.normalized.tsv:md5,f88c35c590379f3a8a62664679d7404b", + "ERZ1664516.normalized.tsv:md5,b0499afcad11e34f3224e58431fd1aff", + "ERZ1664517.normalized.tsv:md5,79d79caa0a5a87a8dfb48eb67e4bf3f1", + "ERZ1664518.normalized.tsv:md5,8cee92e968b380c2c1ab6b5707608092", + "ERZ1664520.normalized.tsv:md5,4ba7e4daeeaf7f5d064131a742225152", + "ERZ1664521.normalized.tsv:md5,b724f087cc957400a5ff404a11535e29", + "ERZ1664523.normalized.tsv:md5,6cbe41ccfb7660e70aec3b711a33e18d", + "ERZ1664524.normalized.tsv:md5,658d14b9cfd773bc0ada2da2f44252cd", + "ERZ1664528.normalized.tsv:md5,a3db7b884b5fe91a59cf55c332fd0337" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T00:54:44.957353948" + }, + "gecco": { + "content": [ + "ERZ1664504.220-NODE-220-length-4587-cov-2.552957_cluster_1.gbk:md5,261a667aef6a1fed0aab0a1e6c4c396f", + "ERZ1664504.48-NODE-48-length-9582-cov-5.239425_cluster_1.gbk:md5,1b0d356ed26b09b3c62e7bf06b003c1a", + "ERZ1664506.42-NODE-42-length-11967-cov-6.006380_cluster_1.gbk:md5,1c2e2d8033286841b412399039bbfd24", + "ERZ1664508.3061-NODE-3061-length-1263-cov-3.647351_cluster_1.gbk:md5,fc75b2afba00fa0c08fc677920c3dab5", + "ERZ1664510.123-NODE-123-length-8863-cov-8.649410_cluster_1.gbk:md5,1ebcc9f338bf402483f671b9e641b9f3", + "ERZ1664510.210-NODE-210-length-5173-cov-7.860688_cluster_1.gbk:md5,e11a7207dae596faa24f3ccc3bd4078d", + "ERZ1664511.16-NODE-16-length-49668-cov-9.810473_cluster_1.gbk:md5,611d4b75a1206df0ced28fb49de5d970", + "ERZ1664515.9-NODE-9-length-49063-cov-10.926196_cluster_1.gbk:md5,b4c89821cb6f28be3408e88490d38ae9", + "ERZ1664516.158-NODE-158-length-6232-cov-9.863850_cluster_1.gbk:md5,110480bf384c530e7aff3ad42be5e9fd", + "ERZ1664517.38-NODE-38-length-19981-cov-8.613771_cluster_1.gbk:md5,e2adfe2599cc481c84ef41167ef0192e", + "ERZ1664518.217-NODE-217-length-4457-cov-6.415947_cluster_1.gbk:md5,2c34b0b6e3611bba535afdea3b5d8f5a", + "ERZ1664521.1871-NODE-1871-length-1473-cov-1.858251_cluster_1.gbk:md5,9b91e8a5adc522ffa4a5fc47a2fbb570", + "ERZ1664521.895-NODE-895-length-1964-cov-2.221058_cluster_1.gbk:md5,f39ce0627a18c84feba727596b5e9b69", + "ERZ1664523.16-NODE-16-length-15072-cov-6.654591_cluster_1.gbk:md5,4e1c5e95f7d4c6e1e61a8ceddfa3137e", + "ERZ1664524.1150-NODE-1150-length-2386-cov-3.450879_cluster_1.gbk:md5,78b7101cad30b392a7bbf6d9be7c5152", + "ERZ1664524.54-NODE-54-length-9607-cov-5.345582_cluster_1.gbk:md5,56a8f6598d928e7514ab2a5ab663f076", + "ERZ1664528.138-NODE-138-length-5805-cov-4.599304_cluster_1.gbk:md5,096bf5dc83df18507982bd9b3dc0cf72", + "ERZ1664528.1641-NODE-1641-length-2049-cov-4.697091_cluster_1.gbk:md5,c122763612b7cbe1967d98784cb11273" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2024-08-19T17:43:36.771956416" + }, + "antismash": { + "content": [ + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T00:54:59.889179408" + }, + "argnorm_deeparg": { + "content": [ + "ERZ1664501.ARG.normalized.tsv:md5,b1112b8509e77e01b6810b71c9ab4cd2", + "ERZ1664503.ARG.normalized.tsv:md5,b736252e5dafbea27284d73650a1fae2", + "ERZ1664504.ARG.normalized.tsv:md5,6e8daa74f53ed59b99e2e281153a2a1b", + "ERZ1664505.ARG.normalized.tsv:md5,1ca35eca67e9d8cb61acaf80a0b27425", + "ERZ1664506.ARG.normalized.tsv:md5,2bba2c688159baff5b48d7547d330444", + "ERZ1664507.ARG.normalized.tsv:md5,63e96001b1ab9a64724f4c3c38c21004", + "ERZ1664508.ARG.normalized.tsv:md5,59764d22d08c34e3a5cefd682257b5f6", + "ERZ1664509.ARG.normalized.tsv:md5,86730f7950d84ef4a48e2042d92d9abc", + "ERZ1664510.ARG.normalized.tsv:md5,361e6f9a96d923f97d685df86492068a", + "ERZ1664511.ARG.normalized.tsv:md5,87628e85f45fd91c51c4fa1fe40a4150", + "ERZ1664515.ARG.normalized.tsv:md5,4e38fecd8c8ad0242e1b1907072af64b", + "ERZ1664516.ARG.normalized.tsv:md5,79ef10afc7673dcc633861d1e5871b24", + "ERZ1664517.ARG.normalized.tsv:md5,abed6aef4acab35851fb2e12f276a9e0", + "ERZ1664518.ARG.normalized.tsv:md5,6d8c2154cad737d01eceb497ee3482b3", + "ERZ1664520.ARG.normalized.tsv:md5,fd60cd7748be9074357033907053a0b0", + "ERZ1664521.ARG.normalized.tsv:md5,d4a368c0125cad652e07065516da794b", + "ERZ1664523.ARG.normalized.tsv:md5,6473552807041db9b4fd0cd17a81659c", + "ERZ1664524.ARG.normalized.tsv:md5,03840f3b0030f196bd890fb1e576d952", + "ERZ1664528.ARG.normalized.tsv:md5,473d63c133be0c8d402af3bcf0fbfda9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2024-08-19T17:43:22.511288416" + }, + "macrel": { + "content": [ + "ERZ1664501.macrel.prediction.gz:md5,a553cb5d0745a01816c7b9c75822df29", + "ERZ1664503.macrel.prediction.gz:md5,3ca2cd9a6d999725b4a487c13ddb7fd9", + "ERZ1664504.macrel.prediction.gz:md5,52e9c1ec54f486765dea07d97b1c97b7", + "ERZ1664505.macrel.prediction.gz:md5,94fa17cce88549aab16555ee598c02bb", + "ERZ1664506.macrel.prediction.gz:md5,732be08d0236cf63641ef445a02cf1ee", + "ERZ1664507.macrel.prediction.gz:md5,eab2d426cf31a47597e61ddb25bf3d49", + "ERZ1664508.macrel.prediction.gz:md5,a0e40483e0136c3bb4abea9c9cba7d4b", + "ERZ1664509.macrel.prediction.gz:md5,47ca65c261cf402b390f6951bb1ed9dc", + "ERZ1664510.macrel.prediction.gz:md5,3e6ca785e579757616fe515efef1537e", + "ERZ1664511.macrel.prediction.gz:md5,df36fa0448591690fc6b7ded2517256e", + "ERZ1664515.macrel.prediction.gz:md5,b896ac50f6ebec1e725cff67bdff8fed", + "ERZ1664516.macrel.prediction.gz:md5,5dcbc87c6a44e8783dfe48b6385dfca8", + "ERZ1664517.macrel.prediction.gz:md5,02373e1b4383dc7501e7e142c9443b7a", + "ERZ1664518.macrel.prediction.gz:md5,7290477960af29a76563e8ded5d4a623", + "ERZ1664520.macrel.prediction.gz:md5,bdf1a379ee49e34b9a448762e5301926", + "ERZ1664521.macrel.prediction.gz:md5,91bd81f6c4e5c8ff4cc684ec04fa0a30", + "ERZ1664523.macrel.prediction.gz:md5,cee5ae9ba4a8a3879ab245b767815394", + "ERZ1664524.macrel.prediction.gz:md5,73f21254f4e4056a71ebf43851af3698", + "ERZ1664528.macrel.prediction.gz:md5,2e957f217c570a58ee61d97c690c1424" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2024-08-16T21:42:08.697655199" + }, + "abricate": { + "content": [ + "ERZ1664501.txt:md5,1ace32d2d44194d635db31daa89a6fae", + "ERZ1664503.txt:md5,da890e2cef1b1a34ec035f6198f0a60b", + "ERZ1664504.txt:md5,3db1864a6efe5321379e89dcee34d505", + "ERZ1664505.txt:md5,317354f6091bad44ab5852399d48eb4a", + "ERZ1664506.txt:md5,8fcc568d6a15d1c3ac889169ce884093", + "ERZ1664507.txt:md5,0be8f76b35aca900f8f7fa6d2e7fc1f9", + "ERZ1664508.txt:md5,357da5e192d9a17b501446e181f41942", + "ERZ1664509.txt:md5,c14f4aef2c96e8c4f6688af35fe07a2c", + "ERZ1664510.txt:md5,0d88060e28b267a308271a2a2af38b12", + "ERZ1664511.txt:md5,46adf11d5d3952e3709ba05ec76b5e8a", + "ERZ1664515.txt:md5,4b7a0db47ac6e9baf723e6b2ef31bfc4", + "ERZ1664516.txt:md5,1ccfd94077fe329f7b30351aa846b327", + "ERZ1664517.txt:md5,8137ab84373a5300c9626a9459a2c935", + "ERZ1664518.txt:md5,db514f4bef8de0d4799f478e1807adc6", + "ERZ1664520.txt:md5,a3afa2368c941cdb0c4abd8efa855f0e", + "ERZ1664521.txt:md5,2849a78188c4793d4608ba1775da1d58", + "ERZ1664523.txt:md5,507e1710e7220965010ad8375b4c434a", + "ERZ1664524.txt:md5,b7d380fe3fbcb0fe2ac23823cb6b35e8", + "ERZ1664528.txt:md5,64aff1aaaab8b3d009edd40527446c08" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T00:54:25.215727223" + }, + "fargene": { + "content": [ + "results_summary.txt:md5,3061d75086b3d25605eda0ea05e1c927", + "results_summary.txt:md5,ea00de6524c521e06583ee13ffbcf338", + "results_summary.txt:md5,c5a6205a249cb6112b6235bbab51c60d", + "results_summary.txt:md5,c5a6205a249cb6112b6235bbab51c60d", + "results_summary.txt:md5,cc647b7810c7335edb6aa939f9f0fbde", + "results_summary.txt:md5,fa58a7f510100be20ce22fe3f6b036e3", + "results_summary.txt:md5,33b51ce0c8ba7c65bdb8bfe1480d85cb", + "results_summary.txt:md5,fa58a7f510100be20ce22fe3f6b036e3", + "results_summary.txt:md5,f3f36761cda3fbb23e0250f9b0b6657a", + "results_summary.txt:md5,f3f36761cda3fbb23e0250f9b0b6657a", + "results_summary.txt:md5,657d012f697a1a9e3ce7f8a0f675aed0", + "results_summary.txt:md5,f3f36761cda3fbb23e0250f9b0b6657a", + "results_summary.txt:md5,f3f36761cda3fbb23e0250f9b0b6657a", + "results_summary.txt:md5,c5a6205a249cb6112b6235bbab51c60d", + "results_summary.txt:md5,33b51ce0c8ba7c65bdb8bfe1480d85cb", + "results_summary.txt:md5,54ba6a1a657fea6b78abac50820c2c24", + "results_summary.txt:md5,f3f36761cda3fbb23e0250f9b0b6657a", + "results_summary.txt:md5,f3f36761cda3fbb23e0250f9b0b6657a", + "results_summary.txt:md5,cc647b7810c7335edb6aa939f9f0fbde" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T00:54:34.896363945" + }, + "rgi": { + "content": [ + "ERZ1664501.txt:md5,631580295a01dfa9942a84ec6daddb7e", + "ERZ1664503.txt:md5,19b414d2f84d99e7da99558f13ddc3e5", + "ERZ1664504.txt:md5,35cfd6af482966669d98a65b56331a3d", + "ERZ1664505.txt:md5,d9f983090909140617bc784635220c4b", + "ERZ1664506.txt:md5,7a52f37f5672b06b05741ee058391f8f", + "ERZ1664507.txt:md5,721b11a0d9a16cbcbfd9004478b00600", + "ERZ1664508.txt:md5,b216d24eb67e17b00176fd0e9fddee2d", + "ERZ1664509.txt:md5,a83a12f5eee2817adde168ceea2918c5", + "ERZ1664510.txt:md5,648ff158c4eb26a5ea32d784f035919e", + "ERZ1664511.txt:md5,9bae24f90a3ec78bf949a98fdf22a497", + "ERZ1664515.txt:md5,2d0d0b2f048fa6c28840b1b6a2c9454d", + "ERZ1664516.txt:md5,eb69d148d8dad471c8d9a36dd915f4a4", + "ERZ1664517.txt:md5,79b0f80950eb5f0f51542b394a77a173", + "ERZ1664518.txt:md5,887de51b7632b0c635b0fe6deda75266", + "ERZ1664520.txt:md5,3caf2e1b0afcbfb73522bfa1cee1d06e", + "ERZ1664521.txt:md5,19334a653a98bbced73f1f2ec92e4eb8", + "ERZ1664523.txt:md5,0e47ce5c41d4d0d39d270a18ce62773a", + "ERZ1664524.txt:md5,8ca49d7dee9c7de25910de130de93859", + "ERZ1664528.txt:md5,6a10752196f2f33bcee972d15d669803" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T00:54:31.62460906" + }, + "amrfinderplus": { + "content": [ + "ERZ1664501.tsv:md5,dd81ffbf4ceddfd08df9c93d350d19fa", + "ERZ1664503.tsv:md5,7b1e3c4be2e369a2ca04fcd63da1acaa", + "ERZ1664504.tsv:md5,34f751f50617c9612b2e84ee61182ace", + "ERZ1664505.tsv:md5,195ee9875c095324bf9da03627551f71", + "ERZ1664506.tsv:md5,ab833bb2d72e4165130c590feeb81abc", + "ERZ1664507.tsv:md5,a416a831bcc9f2334064c45b04b65893", + "ERZ1664508.tsv:md5,7bf5af85e96374b92dec02986f55cd29", + "ERZ1664509.tsv:md5,90090405b63b9e4e6b115ad4d7658681", + "ERZ1664510.tsv:md5,5cf184c3f55fca8b2ab74fd8e2c68c8b", + "ERZ1664511.tsv:md5,caac6335c1ef383c33173a8a627c0a95", + "ERZ1664515.tsv:md5,cc8b7e5d2df434729a08b0aabefba91c", + "ERZ1664516.tsv:md5,1a92c5bec7ff819a6f830a1726894f7c", + "ERZ1664517.tsv:md5,d8c4989f198d6853e35820da21feffe2", + "ERZ1664518.tsv:md5,709d6bfb280c509b74f3c1b4d8a1c4bc", + "ERZ1664520.tsv:md5,2367abb0f961e00bf8dcdfe7e6083c2c", + "ERZ1664521.tsv:md5,12f6aee5987e86669534d3b64a62a840", + "ERZ1664523.tsv:md5,ba69795aaea671108bfa1e48c509dd79", + "ERZ1664524.tsv:md5,b0aa6f732ca2b922d2291deaba0d1312", + "ERZ1664528.tsv:md5,93249b05df4a0587db305684da8a1f8e" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T00:54:28.386682111" + } +} \ No newline at end of file diff --git a/tests/test_nothing.nf.test b/tests/test_nothing.nf.test new file mode 100644 index 00000000..a141d401 --- /dev/null +++ b/tests/test_nothing.nf.test @@ -0,0 +1,25 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_FUNCSCAN" + script "main.nf" + tag "pipeline" + tag "nfcore_funcscan" + tag "test_nothing" + + test("test_nothing_profile") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + ) + } + } +} diff --git a/tests/test_preannotated.nf.test b/tests/test_preannotated.nf.test new file mode 100644 index 00000000..2577b779 --- /dev/null +++ b/tests/test_preannotated.nf.test @@ -0,0 +1,150 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_FUNCSCAN" + script "main.nf" + tag "pipeline" + tag "nfcore_funcscan" + tag "test_preannotated" + + test("test_preannotated_profile") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + + // AMPir + { assert snapshot( + path("$outputDir/amp/ampir/sample_1/sample_1.ampir.tsv").text.contains("MRWGYPLSLVLMALSVAAPMIYFRRKGWLR"), + path("$outputDir/amp/ampir/sample_1/sample_1.ampir.faa"), + path("$outputDir/amp/ampir/sample_2/sample_2.ampir.tsv").text.contains("MRWGYPLSLVLMALSVAAPMIYFRRKGWLR"), + path("$outputDir/amp/ampir/sample_2/sample_2.ampir.faa"), + path("$outputDir/amp/ampir/sample_3/sample_3.ampir.tsv").text.contains("IPELEMRWGYPLSLVLMALSVAAPMIYFRRKGWLR"), + path("$outputDir/amp/ampir/sample_3/sample_3.ampir.faa") + ).match("ampir") }, + + // AMPlify + { assert snapshot( + path("$outputDir/amp/amplify/sample_1/sample_1.amplify.tsv").text.contains("MRWGYPLSLVLMALSVAAPMIYFRRKGWLR"), + path("$outputDir/amp/amplify/sample_2/sample_2.amplify.tsv").text.contains("MRWGYPLSLVLMALSVAAPMIYFRRKGWLR"), + path("$outputDir/amp/amplify/sample_3/sample_3.amplify.tsv").text.contains("IPELEMRWGYPLSLVLMALSVAAPMIYFRRKGWLR") + ).match("amplify") }, + + // HMMsearch + { assert new File("$outputDir/amp/hmmer_hmmsearch/sample_1/sample_1_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/sample_2/sample_2_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/sample_3/sample_3_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + + // Macrel + { assert snapshot( + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.smorfs.faa.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.smorfs.faa.gz"), + path("$outputDir/amp/macrel/sample_3.macrel/sample_3.macrel.smorfs.faa.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.all_orfs.faa.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.all_orfs.faa.gz"), + path("$outputDir/amp/macrel/sample_3.macrel/sample_3.macrel.all_orfs.faa.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.prediction.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.prediction.gz"), + path("$outputDir/amp/macrel/sample_3.macrel/sample_3.macrel.prediction.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/README.md"), + path("$outputDir/amp/macrel/sample_2.macrel/README.md"), + path("$outputDir/amp/macrel/sample_3.macrel/README.md"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel_log.txt"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel_log.txt"), + path("$outputDir/amp/macrel/sample_3.macrel/sample_3.macrel_log.txt") + ).match("macrel") }, + + // AMPcombi + { assert snapshot( + path("$outputDir/reports/ampcombi2/Ampcombi_cluster.log"), + path("$outputDir/reports/ampcombi2/Ampcombi_complete.log"), + path("$outputDir/reports/ampcombi2/Ampcombi_summary.tsv").text.contains("NODE_515831_length_303_cov_1.532258_1"), + path("$outputDir/reports/ampcombi2/Ampcombi_parse_tables.log").text.contains(" \$\$\$\$\$\$\\ \$\$\\ \$\$\\") + ).match("ampcombi") }, + + // DeepARG + { assert snapshot( + path("$outputDir/arg/deeparg/sample_1/sample_1.align.daa.tsv"), + path("$outputDir/arg/deeparg/sample_2/sample_2.align.daa.tsv"), + path("$outputDir/arg/deeparg/sample_3/sample_3.align.daa.tsv"), + path("$outputDir/arg/deeparg/sample_1/sample_1.mapping.ARG").text.contains("#ARG"), + path("$outputDir/arg/deeparg/sample_2/sample_2.mapping.ARG").text.contains("#ARG"), + path("$outputDir/arg/deeparg/sample_3/sample_3.mapping.ARG").text.contains("#ARG") + ).match("deeparg") }, + { assert file("$outputDir/arg/deeparg/sample_1/sample_1.align.daa").name }, + { assert file("$outputDir/arg/deeparg/sample_2/sample_2.align.daa").name }, + { assert file("$outputDir/arg/deeparg/sample_3/sample_3.align.daa").name }, + { assert path("$outputDir/arg/deeparg/sample_1/sample_1.mapping.potential.ARG").text.contains("#ARG") }, + { assert path("$outputDir/arg/deeparg/sample_2/sample_2.mapping.potential.ARG").text.contains("#ARG") }, + { assert path("$outputDir/arg/deeparg/sample_3/sample_3.mapping.potential.ARG").text.contains("#ARG") }, + + // ABRicate + { assert snapshot( + path("$outputDir/arg/abricate/sample_1/sample_1.txt"), + path("$outputDir/arg/abricate/sample_2/sample_2.txt"), + path("$outputDir/arg/abricate/sample_3/sample_3.txt") + ).match("abricate") }, + + // AMRFinderPlus + { assert snapshot( + path("$outputDir/arg/amrfinderplus/sample_1/sample_1.tsv"), + path("$outputDir/arg/amrfinderplus/sample_2/sample_2.tsv"), + path("$outputDir/arg/amrfinderplus/sample_3/sample_3.tsv") + ).match("amrfinderplus") }, + + // RGI + { assert snapshot( + path("$outputDir/arg/rgi/sample_1/sample_1.txt"), + path("$outputDir/arg/rgi/sample_2/sample_2.txt"), + path("$outputDir/arg/rgi/sample_3/sample_3.txt") + ).match("rgi") }, + + // fARGene + { assert snapshot( + path("$outputDir/arg/fargene/sample_1/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/sample_2/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/sample_3/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/sample_1/class_b_1_2/results_summary.txt"), + path("$outputDir/arg/fargene/sample_2/class_b_1_2/results_summary.txt"), + path("$outputDir/arg/fargene/sample_3/class_b_1_2/results_summary.txt") + ).match("fargene") + }, + { assert path("$outputDir/arg/fargene/sample_1/fargene_analysis.log").text.contains("fARGene is done.") }, + { assert path("$outputDir/arg/fargene/sample_2/fargene_analysis.log").text.contains("fARGene is done.") }, + { assert path("$outputDir/arg/fargene/sample_3/fargene_analysis.log").text.contains("fARGene is done.") }, + + // hAMRonization + { assert snapshot(path("$outputDir/reports/hamronization_summarize/hamronization_combined_report.tsv")).match("hamronization") }, + + // argNorm + { assert snapshot( + path("$outputDir/arg/argnorm/amrfinderplus/sample_1.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/sample_2.normalized.tsv"), + path("$outputDir/arg/argnorm/amrfinderplus/sample_3.normalized.tsv") + ).match("argnorm_amrfinderplus") }, + + { assert snapshot( + path("$outputDir/arg/argnorm/deeparg/sample_1.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/sample_1.potential.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/sample_2.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/sample_2.potential.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/sample_3.ARG.normalized.tsv"), + path("$outputDir/arg/argnorm/deeparg/sample_3.potential.ARG.normalized.tsv") + ).match("argnorm_deeparg") }, + + { assert snapshot( + path("$outputDir/arg/argnorm/abricate/sample_1.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/sample_2.normalized.tsv"), + path("$outputDir/arg/argnorm/abricate/sample_3.normalized.tsv") + ).match("argnorm_abricate") } + ) + } + } +} diff --git a/tests/test_preannotated.nf.test.snap b/tests/test_preannotated.nf.test.snap new file mode 100644 index 00000000..b1843ff4 --- /dev/null +++ b/tests/test_preannotated.nf.test.snap @@ -0,0 +1,181 @@ +{ + "deeparg": { + "content": [ + "sample_1.align.daa.tsv:md5,0e71c37318bdc6cba792196d0455293d", + "sample_2.align.daa.tsv:md5,1092ecd3cd6931653168b46c7afeb9e3", + "sample_3.align.daa.tsv:md5,b79070fe26acd1a10ae3aaf06b0d5901", + true, + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-27T08:11:24.751995878" + }, + "ampir": { + "content": [ + true, + "sample_1.ampir.faa:md5,ab02c6e9c5f36ba9c31af97f95f9c317", + true, + "sample_2.ampir.faa:md5,12826875bd18623da78770187a7bbd2c", + true, + "sample_3.ampir.faa:md5,0a36691485930a1b77c4b68a738fd98d" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-27T08:11:24.436374797" + }, + "argnorm_amrfinderplus": { + "content": [ + "sample_1.normalized.tsv:md5,0a7f76ceb606ac46730a51dd57290768", + "sample_2.normalized.tsv:md5,602afce3ee0ee179855c848bd87208fe", + "sample_3.normalized.tsv:md5,d4fb8fbd890217eb4d667d7a4dd80c9b" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-27T08:11:25.55764618" + }, + "argnorm_abricate": { + "content": [ + "sample_1.normalized.tsv:md5,ddd8d454672c57b798f477ca32504a42", + "sample_2.normalized.tsv:md5,0323fc890a8f698ac4b0ac25f5e65964", + "sample_3.normalized.tsv:md5,f71490c27790071bd5974ecc5502cf73" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-27T08:23:32.486921338" + }, + "amplify": { + "content": [ + true, + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-27T08:11:24.483855968" + }, + "argnorm_deeparg": { + "content": [ + "sample_1.ARG.normalized.tsv:md5,26aa409bfd0fc9096f2ac404760cc492", + "sample_1.potential.ARG.normalized.tsv:md5,d6732b4b9765bfa47e27ba673e24b6a4", + "sample_2.ARG.normalized.tsv:md5,1a19b894a7315aaae5f799e4539e6619", + "sample_2.potential.ARG.normalized.tsv:md5,b241e22f9116d8f518ba8526d52ac4dc", + "sample_3.ARG.normalized.tsv:md5,d40d387176649ce80827420fef6a0169", + "sample_3.potential.ARG.normalized.tsv:md5,f331efd21ea143c180a15ae56a5210d3" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-27T08:23:32.446555281" + }, + "macrel": { + "content": [ + "sample_1.macrel.smorfs.faa.gz:md5,9cddad1e4b6dbcb76888f1a87db388ec", + "sample_2.macrel.smorfs.faa.gz:md5,e055dd2a9e44f3dcaa8af7198600349c", + "sample_3.macrel.smorfs.faa.gz:md5,9cddad1e4b6dbcb76888f1a87db388ec", + "sample_1.macrel.all_orfs.faa.gz:md5,c276fb1ec494ff53ded1e6fc118e25b9", + "sample_2.macrel.all_orfs.faa.gz:md5,e75e434a30922d80169d0666fd07e446", + "sample_3.macrel.all_orfs.faa.gz:md5,c276fb1ec494ff53ded1e6fc118e25b9", + "sample_1.macrel.prediction.gz:md5,0277725512f7d2954a99692bb65f1475", + "sample_2.macrel.prediction.gz:md5,06f7ce99cfe6f364d38743aae094402a", + "sample_3.macrel.prediction.gz:md5,0277725512f7d2954a99692bb65f1475", + "README.md:md5,fa3706dfc95d0538a52c4d0d824be5fb", + "README.md:md5,fa3706dfc95d0538a52c4d0d824be5fb", + "README.md:md5,fa3706dfc95d0538a52c4d0d824be5fb", + "sample_1.macrel_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_2.macrel_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_3.macrel_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-27T08:11:24.514344973" + }, + "hamronization": { + "content": [ + "hamronization_combined_report.tsv:md5,69a16cdf66a817c2ed1a725ecce02d5b" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-13T14:22:38.851885764" + }, + "abricate": { + "content": [ + "sample_1.txt:md5,427cec26e354ac6b0ab6047ec6621202", + "sample_2.txt:md5,4c140c932a48a22bcd8ae911bda8f4c7", + "sample_3.txt:md5,d6534efe3d03173749d003bf9e624e68" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-27T08:11:24.87794287" + }, + "fargene": { + "content": [ + "results_summary.txt:md5,2c8a073d2a7938e8aedcc097e6df2aa5", + "results_summary.txt:md5,3b86a5513e89e22a4c8b9279678ce0c0", + "results_summary.txt:md5,2c8a073d2a7938e8aedcc097e6df2aa5", + "results_summary.txt:md5,59f2e69c670d72f0c0a401e0dc90cbeb", + "results_summary.txt:md5,59f2e69c670d72f0c0a401e0dc90cbeb", + "results_summary.txt:md5,59f2e69c670d72f0c0a401e0dc90cbeb" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-27T08:11:25.248986515" + }, + "rgi": { + "content": [ + "sample_1.txt:md5,dde77ae2dc240ee4717d8d33a92dfb66", + "sample_2.txt:md5,0e652d35ef6e9272aa194b55db609e75", + "sample_3.txt:md5,dde77ae2dc240ee4717d8d33a92dfb66" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-27T08:11:25.117843821" + }, + "ampcombi": { + "content": [ + "Ampcombi_cluster.log:md5,4c78f5f134edf566f39e04e3ab7d8558", + "Ampcombi_complete.log:md5,3dabfea4303bf94bd4f5d78c5b8c83c1", + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-27T08:11:24.639509225" + }, + "amrfinderplus": { + "content": [ + "sample_1.tsv:md5,29cfb6f34f420d802eda95c6d9daa361", + "sample_2.tsv:md5,d9b6565167d603a1f07cff2374db8eb2", + "sample_3.tsv:md5,29cfb6f34f420d802eda95c6d9daa361" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-27T08:11:24.994284774" + } +} \ No newline at end of file diff --git a/tests/test_preannotated_bgc.nf.test b/tests/test_preannotated_bgc.nf.test new file mode 100644 index 00000000..0e9ca618 --- /dev/null +++ b/tests/test_preannotated_bgc.nf.test @@ -0,0 +1,73 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_FUNCSCAN" + script "main.nf" + tag "pipeline" + tag "nfcore_funcscan" + tag "test_preannotated_bgc" + + test("test_preannotated_bgc_profile") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + + // antiSMASH + { assert snapshot( + path("$outputDir/bgc/antismash/sample_1/css"), + path("$outputDir/bgc/antismash/sample_2/css"), + path("$outputDir/bgc/antismash/sample_3/css") + ).match("antismash_css") }, // parts of channel: html_accessory_files + { assert path("$outputDir/bgc/antismash/sample_1/sample_1.gbk").text.contains("##antiSMASH-Data-START##") }, // channel: gbk_input + { assert path("$outputDir/bgc/antismash/sample_1/sample_1.zip").exists() }, // channel: zip + { assert path("$outputDir/bgc/antismash/sample_1/index.html").text.contains("https://antismash.secondarymetabolites.org/") }, // channel: html + { assert path("$outputDir/bgc/antismash/sample_1/regions.js").text.contains('PROKKA_859') }, // channel: json_sideloading + { assert path("$outputDir/bgc/antismash/sample_1/sample_1.log").text.contains("antiSMASH status: SUCCESS") }, // channel: log + { assert path("$outputDir/bgc/antismash/sample_2/sample_2.gbk").text.contains("##antiSMASH-Data-START##") }, // channel: gbk_input + { assert path("$outputDir/bgc/antismash/sample_2/sample_2.zip").exists() }, // channel: zip + { assert path("$outputDir/bgc/antismash/sample_2/index.html").text.contains("https://antismash.secondarymetabolites.org/") }, // channel: html + { assert path("$outputDir/bgc/antismash/sample_2/regions.js").text.contains('PROKKA_859') }, // channel: json_sideloading + { assert path("$outputDir/bgc/antismash/sample_2/sample_2.log").text.contains("antiSMASH status: SUCCESS") }, // channel: log + { assert path("$outputDir/bgc/antismash/sample_3/sample_3.gbk").text.contains("##antiSMASH-Data-START##") }, // channel: gbk_input + { assert path("$outputDir/bgc/antismash/sample_3/sample_3.zip").exists() }, // channel: zip + { assert path("$outputDir/bgc/antismash/sample_3/index.html").text.contains("https://antismash.secondarymetabolites.org/") }, // channel: html + { assert path("$outputDir/bgc/antismash/sample_3/regions.js").text.contains('NODE_1328_length_3730_cov_3.647347') }, // channel: json_sideloading + { assert path("$outputDir/bgc/antismash/sample_3/sample_3.log").text.contains("antiSMASH status: SUCCESS") }, // channel: log + + // DeepBGC + { assert snapshot( + path("$outputDir/bgc/deepbgc/sample_1/sample_1.bgc.gbk"), + path("$outputDir/bgc/deepbgc/sample_2/sample_2.bgc.gbk"), + path("$outputDir/bgc/deepbgc/sample_3/sample_3.bgc.gbk") + ).match("deepbgc_bgc_gbk") }, // channel: bgc_gbk + { assert path("$outputDir/bgc/deepbgc/sample_1/sample_1.antismash.json").text.contains("Putative BGCs predicted using DeepBGC") }, // channel: json + { assert path("$outputDir/bgc/deepbgc/sample_1/LOG.txt").text.contains('Saved DeepBGC result to:') }, // channel: log + { assert path("$outputDir/bgc/deepbgc/sample_1/sample_1.full.gbk").text.contains('1 ttcgccagga gtggcgaagc gatgcgaggt') }, // channel: full_gbk + { assert path("$outputDir/bgc/deepbgc/sample_2/sample_2.antismash.json").text.contains("Putative BGCs predicted using DeepBGC") }, // channel: json + { assert path("$outputDir/bgc/deepbgc/sample_2/LOG.txt").text.contains('Saved DeepBGC result to:') }, // channel: log + { assert path("$outputDir/bgc/deepbgc/sample_2/sample_2.full.gbk").text.contains('1 aaggggtatg gagcagcgac gtctacccgt') }, // channel: full_gbk + { assert path("$outputDir/bgc/deepbgc/sample_3/sample_3.antismash.json").text.contains("NODE_1328_length_3730_cov_3.647347") }, // channel: json + { assert path("$outputDir/bgc/deepbgc/sample_3/LOG.txt").text.contains('Saved DeepBGC result to:') }, // channel: log + { assert path("$outputDir/bgc/deepbgc/sample_3/sample_3.full.gbk").text.contains('1 tgaatctgtt ttaaagcaaa ttgatctcgc') }, // channel: full_gbk + + // GECCO + { assert snapshot( + path("$outputDir/bgc/gecco/sample_1/sample_1.genes.tsv"), // channel: genes + path("$outputDir/bgc/gecco/sample_1/sample_1.features.tsv"), // channel: features + path("$outputDir/bgc/gecco/sample_2/sample_2.genes.tsv"), // channel: genes + path("$outputDir/bgc/gecco/sample_2/sample_2.features.tsv"), // channel: features + path("$outputDir/bgc/gecco/sample_3/sample_3.genes.tsv"), // channel: genes + path("$outputDir/bgc/gecco/sample_3/sample_3.features.tsv") // channel: features + ).match("gecco") } + ) + } + } +} diff --git a/tests/test_preannotated_bgc.nf.test.snap b/tests/test_preannotated_bgc.nf.test.snap new file mode 100644 index 00000000..b05b7921 --- /dev/null +++ b/tests/test_preannotated_bgc.nf.test.snap @@ -0,0 +1,47 @@ +{ + "antismash_css": { + "content": [ + [ + "bacteria.css:md5,39c0ca9cbc64cb824dc958b26b5b4ab8" + ], + [ + "bacteria.css:md5,39c0ca9cbc64cb824dc958b26b5b4ab8" + ], + [ + "bacteria.css:md5,39c0ca9cbc64cb824dc958b26b5b4ab8" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T10:49:00.343547789" + }, + "deepbgc_bgc_gbk": { + "content": [ + "sample_1.bgc.gbk:md5,e50e429959e9c4bf0c4b97d9dcd54a08", + "sample_2.bgc.gbk:md5,effe3cfc91772eb4e4b50ac46f13a941", + "sample_3.bgc.gbk:md5,c9028aca1282b314d296091e1f0b8e52" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T11:06:00.388012579" + }, + "gecco": { + "content": [ + "sample_1.genes.tsv:md5,804af8236a7148baf8919e3acf30947d", + "sample_1.features.tsv:md5,a84d59fd63e2593dc5872b4f9bb268b2", + "sample_2.genes.tsv:md5,5a2b20c5c1cd821a2af405229c4c0f78", + "sample_2.features.tsv:md5,579a27490188f5bc47a4deb4d1d1b8dc", + "sample_3.genes.tsv:md5,6874723404b3326f0f73e59f03b96837", + "sample_3.features.tsv:md5,490f98655089b3c73f88b93347cca465" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T10:49:00.44526019" + } +} \ No newline at end of file diff --git a/tests/test_prokka.nf.test b/tests/test_prokka.nf.test new file mode 100644 index 00000000..94e65ae2 --- /dev/null +++ b/tests/test_prokka.nf.test @@ -0,0 +1,108 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_FUNCSCAN" + script "main.nf" + tag "pipeline" + tag "nfcore_funcscan" + tag "test_prokka" + + test("test_prokka_profile") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + + // AMPir + { assert snapshot( + file("$outputDir/amp/ampir/sample_1/sample_1.ampir.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/ampir/sample_1/sample_1.ampir.faa").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/ampir/sample_2/sample_2.ampir.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/ampir/sample_2/sample_2.ampir.faa").text.contains("NODE_882919_length_258_cov_0.935961_1") + ).match("ampir") }, + + // AMPlify + { assert snapshot( + file("$outputDir/amp/amplify/sample_1/sample_1.amplify.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/amplify/sample_2/sample_2.amplify.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1") + ).match("amplify") }, + + // HMMsearch + { assert new File("$outputDir/amp/hmmer_hmmsearch/sample_1/sample_1_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/sample_2/sample_2_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + + // Macrel + { assert snapshot( + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.smorfs.faa.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.smorfs.faa.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.all_orfs.faa.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.all_orfs.faa.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.prediction.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.prediction.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/README.md"), + path("$outputDir/amp/macrel/sample_1.macrel/README.md"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel_log.txt"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel_log.txt") + ).match("macrel") }, + + // AMPcombi + { assert path("$outputDir/reports/ampcombi2/Ampcombi_summary.tsv").text.contains("PROKKA_00019") }, + { assert snapshot( + path("$outputDir/reports/ampcombi2/Ampcombi_cluster.log"), + path("$outputDir/reports/ampcombi2/Ampcombi_complete.log"), + path("$outputDir/reports/ampcombi2/Ampcombi_parse_tables.log") + ).match("ampcombi_logfiles") }, + + // DeepARG + { assert snapshot( + path("$outputDir/arg/deeparg/sample_1/sample_1.align.daa.tsv"), + path("$outputDir/arg/deeparg/sample_2/sample_2.align.daa.tsv"), + path("$outputDir/arg/deeparg/sample_1/sample_1.mapping.ARG"), + path("$outputDir/arg/deeparg/sample_2/sample_2.mapping.ARG") + ).match("deeparg_tsv_ARG") }, + { assert file("$outputDir/arg/deeparg/sample_1/sample_1.align.daa").name }, + { assert file("$outputDir/arg/deeparg/sample_2/sample_2.align.daa").name }, + { assert path("$outputDir/arg/deeparg/sample_1/sample_1.mapping.potential.ARG").text.contains("#ARG") }, + { assert path("$outputDir/arg/deeparg/sample_2/sample_2.mapping.potential.ARG").text.contains("#ARG") }, + + // ABRicate + { assert snapshot( + path("$outputDir/arg/abricate/sample_1/sample_1.txt"), + path("$outputDir/arg/abricate/sample_2/sample_2.txt"), + ).match("abricate") }, + + // AMRFinderPlus + { assert snapshot( + path("$outputDir/arg/amrfinderplus/sample_1/sample_1.tsv"), + path("$outputDir/arg/amrfinderplus/sample_2/sample_2.tsv"), + ).match("amrfinderplus") }, + + // RGI + { assert snapshot( + path("$outputDir/arg/rgi/sample_1/sample_1.txt"), + path("$outputDir/arg/rgi/sample_2/sample_2.txt"), + ).match("rgi") }, + + // fARGene + { assert snapshot( + path("$outputDir/arg/fargene/sample_1/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/sample_2/class_a/results_summary.txt"), + path("$outputDir/arg/fargene/sample_1/class_b_1_2/results_summary.txt"), + path("$outputDir/arg/fargene/sample_2/class_b_1_2/results_summary.txt") + ).match("fargene") }, + { assert path("$outputDir/arg/fargene/sample_1/fargene_analysis.log").text.contains("fARGene is done.") }, + { assert path("$outputDir/arg/fargene/sample_2/fargene_analysis.log").text.contains("fARGene is done.") }, + + // hAMRonization + { assert snapshot(path("$outputDir/reports/hamronization_summarize/hamronization_combined_report.tsv")).match("hamronization_summarize") }, + ) + } + } +} diff --git a/tests/test_prokka.nf.test.snap b/tests/test_prokka.nf.test.snap new file mode 100644 index 00000000..07cfeefd --- /dev/null +++ b/tests/test_prokka.nf.test.snap @@ -0,0 +1,126 @@ +{ + "hamronization_summarize": { + "content": [ + "hamronization_combined_report.tsv:md5,864466b0fb1acfc0e6b3425271f78ecb" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T12:53:09.990722943" + }, + "abricate": { + "content": [ + "sample_1.txt:md5,69af3321b0bc808b7ef85f102395736f", + "sample_2.txt:md5,69af3321b0bc808b7ef85f102395736f" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T12:53:09.941179691" + }, + "fargene": { + "content": [ + "results_summary.txt:md5,690d351cfc52577263ef4cfab1c81f50", + "results_summary.txt:md5,690d351cfc52577263ef4cfab1c81f50", + "results_summary.txt:md5,f69af9b0e18a0cfc934eb18b7e4bffe1", + "results_summary.txt:md5,f69af9b0e18a0cfc934eb18b7e4bffe1" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T12:53:09.978326519" + }, + "rgi": { + "content": [ + "sample_1.txt:md5,fce130af51f93cccfc09ddaf9caf623f", + "sample_2.txt:md5,fce130af51f93cccfc09ddaf9caf623f" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T12:53:09.964420301" + }, + "deeparg_tsv_ARG": { + "content": [ + "sample_1.align.daa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_2.align.daa.tsv:md5,06648de08caca0b7f42eab9576615226", + "sample_1.mapping.ARG:md5,0e049e99eab4c55666062df21707d5b9", + "sample_2.mapping.ARG:md5,0e049e99eab4c55666062df21707d5b9" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T12:53:09.928505029" + }, + "ampir": { + "content": [ + false, + false, + false, + false + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T12:53:09.879791305" + }, + "ampcombi_logfiles": { + "content": [ + "Ampcombi_cluster.log:md5,4c78f5f134edf566f39e04e3ab7d8558", + "Ampcombi_complete.log:md5,3dabfea4303bf94bd4f5d78c5b8c83c1", + "Ampcombi_parse_tables.log:md5,1e2b5abad7d17e03428066f345b91117" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T12:53:09.914363724" + }, + "amplify": { + "content": [ + false, + false + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T12:53:09.889521968" + }, + "macrel": { + "content": [ + "sample_1.macrel.smorfs.faa.gz:md5,1b5e2434860e635e95324d1804a3be7b", + "sample_2.macrel.smorfs.faa.gz:md5,38108b5cdfdc2196afe67418b9b04682", + "sample_1.macrel.all_orfs.faa.gz:md5,86f6b3b590d1b22d9c5aa164f8a14080", + "sample_2.macrel.all_orfs.faa.gz:md5,fdb384925af50ecade05dccaff68afd8", + "sample_1.macrel.prediction.gz:md5,0c4b16e0838be56e012b99169863a168", + "sample_2.macrel.prediction.gz:md5,440deffd6b6d9986ce098e44c66db9ae", + "README.md:md5,fa3706dfc95d0538a52c4d0d824be5fb", + "README.md:md5,fa3706dfc95d0538a52c4d0d824be5fb", + "sample_1.macrel_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_2.macrel_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T12:53:09.892460736" + }, + "amrfinderplus": { + "content": [ + "sample_1.tsv:md5,b4d261ace9be7d013c19d1f5c0005bfe", + "sample_2.tsv:md5,b4d261ace9be7d013c19d1f5c0005bfe" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T12:53:09.952983609" + } +} \ No newline at end of file diff --git a/tests/test_taxonomy_bakta.nf.test b/tests/test_taxonomy_bakta.nf.test new file mode 100644 index 00000000..5a412fa9 --- /dev/null +++ b/tests/test_taxonomy_bakta.nf.test @@ -0,0 +1,101 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_FUNCSCAN" + script "main.nf" + tag "pipeline" + tag "nfcore_funcscan" + tag "test_taxonomy_bakta" + + test("test_taxonomy_bakta") { + + when { + params { + outdir = "$outputDir" + run_taxa_classification = true + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + + // ampir + { assert snapshot( + file("$outputDir/amp/ampir/sample_1/sample_1.ampir.tsv").text.contains("KKEJHB_00005"), + file("$outputDir/amp/ampir/sample_1/sample_1.ampir.faa").text.contains("KKEJHB_00005"), + file("$outputDir/amp/ampir/sample_2/sample_2.ampir.tsv").text.contains("KDEMFK_00005"), + file("$outputDir/amp/ampir/sample_2/sample_2.ampir.faa").text.contains("KDEMFK_00005") + ).match("ampir") }, + + // AMPlify + { assert snapshot( + file("$outputDir/amp/amplify/sample_1/sample_1.amplify.tsv").text.contains("KKEJHB_00005"), + file("$outputDir/amp/amplify/sample_2/sample_2.amplify.tsv").text.contains("KDEMFK_00005") + ).match("amplify") }, + + // Macrel + { assert snapshot ( + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.smorfs.faa.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.smorfs.faa.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.all_orfs.faa.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.all_orfs.faa.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.prediction.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.prediction.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/README.md"), + path("$outputDir/amp/macrel/sample_2.macrel/README.md"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel_log.txt"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel_log.txt") + ).match("macrel") }, + + // hmmsearch (AMP) + { assert new File("$outputDir/amp/hmmer_hmmsearch/sample_1/sample_1_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/sample_2/sample_2_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + + // AMPcombi + { assert snapshot ( + file("$outputDir/reports/ampcombi2/sample_2/sample_2_ampcombi.tsv").text.contains("KDEMFK_00575"), + ).match("ampcombi") }, + { assert new File("$outputDir/reports/ampcombi2/ampcombi_complete_summary_taxonomy.tsv.gz").exists() }, + + // RGI + { assert snapshot ( + path("$outputDir/arg/rgi/sample_1/sample_1.txt"), + path("$outputDir/arg/rgi/sample_2/sample_2.txt") + ).match("rgi") }, + + // ABRicate + { assert snapshot ( + file("$outputDir/arg/abricate/sample_1/sample_1.txt").text.contains("COVERAGE_MAP"), + file("$outputDir/arg/abricate/sample_2/sample_2.txt").text.contains("COVERAGE_MAP") + ).match("abricate") }, + + // fARGene + { assert snapshot ( + path("$outputDir/arg/fargene/sample_1/class_b_1_2/results_summary.txt"), + file("$outputDir/arg/fargene/sample_2/class_b_3/results_summary.txt").text.contains("class_B_3.hmm"), + file("$outputDir/arg/fargene/sample_2/tet_efflux/results_summary.txt").text.contains("tet_efflux.hmm") + ).match("fargene") }, + + // hAMRonization + { assert new File("$outputDir/reports/hamronization_summarize/hamronization_combined_report.tsv.gz").exists() }, + + // antiSMASH + { assert snapshot ( + file("$outputDir/bgc/antismash/sample_2/sample_2.gbk").text.contains("##antiSMASH-Data-START##"), + file("$outputDir/bgc/antismash/sample_2/sample_2.log").text.contains("antiSMASH status: SUCCESS") + ).match("antismash") }, + + // GECCO + { assert snapshot ( + path("$outputDir/bgc/gecco/sample_2/sample_2.genes.tsv").text.contains("sequence_id"), + path("$outputDir/bgc/gecco/sample_2/sample_2.features.tsv").text.contains("cluster_probability") + ).match("gecco") }, + + // hmmsearch (BGC) + { assert new File("$outputDir/bgc/hmmer_hmmsearch/sample_2/sample_2_ToyB.txt.gz").exists() } + ) + } + } +} diff --git a/tests/test_taxonomy_bakta.nf.test.snap b/tests/test_taxonomy_bakta.nf.test.snap new file mode 100644 index 00000000..5606db1e --- /dev/null +++ b/tests/test_taxonomy_bakta.nf.test.snap @@ -0,0 +1,111 @@ +{ + "abricate": { + "content": [ + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:19:30.025771" + }, + "fargene": { + "content": [ + "results_summary.txt:md5,f69af9b0e18a0cfc934eb18b7e4bffe1", + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:19:30.025771" + }, + "rgi": { + "content": [ + "sample_1.txt:md5,fce130af51f93cccfc09ddaf9caf623f", + "sample_2.txt:md5,fce130af51f93cccfc09ddaf9caf623f" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:19:30.025771" + }, + "ampir": { + "content": [ + true, + true, + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:19:30.025771" + }, + "gecco": { + "content": [ + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:19:30.025771" + }, + "ampcombi": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:19:30.025771" + }, + "antismash": { + "content": [ + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:19:30.025771" + }, + "amplify": { + "content": [ + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:19:06.451885596" + }, + "macrel": { + "content": [ + "sample_1.macrel.smorfs.faa.gz:md5,1b5e2434860e635e95324d1804a3be7b", + "sample_2.macrel.smorfs.faa.gz:md5,38108b5cdfdc2196afe67418b9b04682", + "sample_1.macrel.all_orfs.faa.gz:md5,86f6b3b590d1b22d9c5aa164f8a14080", + "sample_2.macrel.all_orfs.faa.gz:md5,fdb384925af50ecade05dccaff68afd8", + "sample_1.macrel.prediction.gz:md5,0c4b16e0838be56e012b99169863a168", + "sample_2.macrel.prediction.gz:md5,440deffd6b6d9986ce098e44c66db9ae", + "README.md:md5,fa3706dfc95d0538a52c4d0d824be5fb", + "README.md:md5,fa3706dfc95d0538a52c4d0d824be5fb", + "sample_1.macrel_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_2.macrel_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:19:30.025771" + } +} \ No newline at end of file diff --git a/tests/test_taxonomy_prokka.nf.test b/tests/test_taxonomy_prokka.nf.test new file mode 100644 index 00000000..e0992dbf --- /dev/null +++ b/tests/test_taxonomy_prokka.nf.test @@ -0,0 +1,101 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_FUNCSCAN" + script "main.nf" + tag "pipeline" + tag "nfcore_funcscan" + tag "test_taxonomy_prokka" + + test("test_taxonomy_prokka") { + + when { + params { + outdir = "$outputDir" + run_taxa_classification = true + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + + // ampir + { assert snapshot( + file("$outputDir/amp/ampir/sample_1/sample_1.ampir.tsv").text.contains("PROKKA_00001"), + file("$outputDir/amp/ampir/sample_1/sample_1.ampir.faa").text.contains("PROKKA_00001"), + file("$outputDir/amp/ampir/sample_2/sample_2.ampir.tsv").text.contains("PROKKA_00001"), + file("$outputDir/amp/ampir/sample_2/sample_2.ampir.faa").text.contains("PROKKA_00001") + ).match("ampir") }, + + // AMPlify + { assert snapshot( + file("$outputDir/amp/amplify/sample_1/sample_1.amplify.tsv").text.contains("PROKKA_00001"), + file("$outputDir/amp/amplify/sample_2/sample_2.amplify.tsv").text.contains("PROKKA_00001") + ).match("amplify") }, + + // Macrel + { assert snapshot ( + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.smorfs.faa.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.smorfs.faa.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.all_orfs.faa.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.all_orfs.faa.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.prediction.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.prediction.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/README.md"), + path("$outputDir/amp/macrel/sample_2.macrel/README.md"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel_log.txt"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel_log.txt") + ).match("macrel") }, + + // hmmsearch (AMP) + { assert new File("$outputDir/amp/hmmer_hmmsearch/sample_1/sample_1_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/sample_2/sample_2_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + + // AMPcombi + { assert snapshot ( + file("$outputDir/reports/ampcombi2/sample_2/sample_2_ampcombi.tsv").text.contains("PROKKA_00109"), + ).match("ampcombi") }, + { assert new File("$outputDir/reports/ampcombi2/ampcombi_complete_summary_taxonomy.tsv.gz").exists() }, + + // RGI + { assert snapshot ( + path("$outputDir/arg/rgi/sample_1/sample_1.txt"), + path("$outputDir/arg/rgi/sample_2/sample_2.txt") + ).match("rgi") }, + + // ABRicate + { assert snapshot ( + file("$outputDir/arg/abricate/sample_1/sample_1.txt").text.contains("COVERAGE_MAP"), + file("$outputDir/arg/abricate/sample_2/sample_2.txt").text.contains("COVERAGE_MAP") + ).match("abricate") }, + + // fARGene + { assert snapshot ( + path("$outputDir/arg/fargene/sample_1/class_b_1_2/results_summary.txt"), + file("$outputDir/arg/fargene/sample_2/class_b_3/results_summary.txt").text.contains("class_B_3.hmm"), + file("$outputDir/arg/fargene/sample_2/tet_efflux/results_summary.txt").text.contains("tet_efflux.hmm") + ).match("fargene") }, + + // hAMRonization + { assert new File("$outputDir/reports/hamronization_summarize/hamronization_combined_report.tsv.gz").exists() }, + + // antiSMASH + { assert snapshot ( + file("$outputDir/bgc/antismash/sample_2/sample_2.gbk").text.contains("##antiSMASH-Data-START##"), + file("$outputDir/bgc/antismash/sample_2/sample_2.log").text.contains("antiSMASH status: SUCCESS") + ).match("antismash") }, + + // GECCO + { assert snapshot ( + path("$outputDir/bgc/gecco/sample_2/sample_2.genes.tsv").text.contains("sequence_id"), + path("$outputDir/bgc/gecco/sample_2/sample_2.features.tsv").text.contains("cluster_probability") + ).match("gecco") }, + + // hmmsearch (BGC) + { assert new File("$outputDir/bgc/hmmer_hmmsearch/sample_2/sample_2_ToyB.txt.gz").exists() } + ) + } + } +} diff --git a/tests/test_taxonomy_prokka.nf.test.snap b/tests/test_taxonomy_prokka.nf.test.snap new file mode 100644 index 00000000..8e2e581a --- /dev/null +++ b/tests/test_taxonomy_prokka.nf.test.snap @@ -0,0 +1,111 @@ +{ + "abricate": { + "content": [ + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:05:30.025771" + }, + "fargene": { + "content": [ + "results_summary.txt:md5,f69af9b0e18a0cfc934eb18b7e4bffe1", + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:05:30.025771" + }, + "rgi": { + "content": [ + "sample_1.txt:md5,fce130af51f93cccfc09ddaf9caf623f", + "sample_2.txt:md5,fce130af51f93cccfc09ddaf9caf623f" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:05:30.025771" + }, + "ampir": { + "content": [ + true, + true, + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:05:30.025771" + }, + "gecco": { + "content": [ + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:05:30.025771" + }, + "ampcombi": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:05:30.025771" + }, + "antismash": { + "content": [ + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:05:30.025771" + }, + "amplify": { + "content": [ + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:05:06.451885596" + }, + "macrel": { + "content": [ + "sample_1.macrel.smorfs.faa.gz:md5,1b5e2434860e635e95324d1804a3be7b", + "sample_2.macrel.smorfs.faa.gz:md5,38108b5cdfdc2196afe67418b9b04682", + "sample_1.macrel.all_orfs.faa.gz:md5,86f6b3b590d1b22d9c5aa164f8a14080", + "sample_2.macrel.all_orfs.faa.gz:md5,fdb384925af50ecade05dccaff68afd8", + "sample_1.macrel.prediction.gz:md5,0c4b16e0838be56e012b99169863a168", + "sample_2.macrel.prediction.gz:md5,440deffd6b6d9986ce098e44c66db9ae", + "README.md:md5,fa3706dfc95d0538a52c4d0d824be5fb", + "README.md:md5,fa3706dfc95d0538a52c4d0d824be5fb", + "sample_1.macrel_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_2.macrel_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T13:05:30.025771" + } +} \ No newline at end of file diff --git a/tests/test_taxonomy_pyrodigal.nf.test b/tests/test_taxonomy_pyrodigal.nf.test new file mode 100644 index 00000000..3cc5535e --- /dev/null +++ b/tests/test_taxonomy_pyrodigal.nf.test @@ -0,0 +1,101 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_FUNCSCAN" + script "main.nf" + tag "pipeline" + tag "nfcore_funcscan" + tag "test_taxonomy_pyrodigal" + + test("test_taxonomy_pyrodigal") { + + when { + params { + outdir = "$outputDir" + run_taxa_classification = true + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + + // ampir + { assert snapshot( + file("$outputDir/amp/ampir/sample_1/sample_1.ampir.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/ampir/sample_1/sample_1.ampir.faa").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/ampir/sample_2/sample_2.ampir.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/ampir/sample_2/sample_2.ampir.faa").text.contains("NODE_882919_length_258_cov_0.935961_1") + ).match("ampir") }, + + // AMPlify + { assert snapshot( + file("$outputDir/amp/amplify/sample_1/sample_1.amplify.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1"), + file("$outputDir/amp/amplify/sample_2/sample_2.amplify.tsv").text.contains("NODE_882919_length_258_cov_0.935961_1") + ).match("amplify") }, + + // Macrel + { assert snapshot ( + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.smorfs.faa.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.smorfs.faa.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.all_orfs.faa.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.all_orfs.faa.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel.prediction.gz"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel.prediction.gz"), + path("$outputDir/amp/macrel/sample_1.macrel/README.md"), + path("$outputDir/amp/macrel/sample_2.macrel/README.md"), + path("$outputDir/amp/macrel/sample_1.macrel/sample_1.macrel_log.txt"), + path("$outputDir/amp/macrel/sample_2.macrel/sample_2.macrel_log.txt") + ).match("macrel") }, + + // hmmsearch (AMP) + { assert new File("$outputDir/amp/hmmer_hmmsearch/sample_1/sample_1_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + { assert new File("$outputDir/amp/hmmer_hmmsearch/sample_2/sample_2_mybacteriocin.hmmer_hmmsearch.txt.gz").exists() }, + + // AMPcombi + { assert snapshot ( + file("$outputDir/reports/ampcombi2/sample_2/sample_2_ampcombi.tsv").text.contains("NODE_515831_length_303_cov_1.532258_1"), + ).match("ampcombi") }, + { assert new File("$outputDir/reports/ampcombi2/ampcombi_complete_summary_taxonomy.tsv.gz").exists() }, + + // RGI + { assert snapshot ( + path("$outputDir/arg/rgi/sample_1/sample_1.txt"), + path("$outputDir/arg/rgi/sample_2/sample_2.txt") + ).match("rgi") }, + + // ABRicate + { assert snapshot ( + file("$outputDir/arg/abricate/sample_1/sample_1.txt").text.contains("COVERAGE_MAP"), + file("$outputDir/arg/abricate/sample_2/sample_2.txt").text.contains("COVERAGE_MAP") + ).match("abricate") }, + + // fARGene + { assert snapshot ( + path("$outputDir/arg/fargene/sample_1/class_b_1_2/results_summary.txt"), + file("$outputDir/arg/fargene/sample_2/class_b_3/results_summary.txt").text.contains("class_B_3.hmm"), + file("$outputDir/arg/fargene/sample_2/tet_efflux/results_summary.txt").text.contains("tet_efflux.hmm") + ).match("fargene") }, + + // hAMRonization + { assert new File("$outputDir/reports/hamronization_summarize/hamronization_combined_report.tsv.gz").exists() }, + + // antiSMASH + { assert snapshot ( + file("$outputDir/bgc/antismash/sample_2/sample_2.gbk").text.contains("##antiSMASH-Data-START##"), + file("$outputDir/bgc/antismash/sample_2/sample_2.log").text.contains("antiSMASH status: SUCCESS") + ).match("antismash") }, + + // GECCO + { assert snapshot ( + path("$outputDir/bgc/gecco/sample_2/sample_2.genes.tsv").text.contains("sequence_id"), + path("$outputDir/bgc/gecco/sample_2/sample_2.features.tsv").text.contains("cluster_probability") + ).match("gecco") }, + + // hmmsearch (BGC) + { assert new File("$outputDir/bgc/hmmer_hmmsearch/sample_2/sample_2_ToyB.txt.gz").exists() } + ) + } + } +} diff --git a/tests/test_taxonomy_pyrodigal.nf.test.snap b/tests/test_taxonomy_pyrodigal.nf.test.snap new file mode 100644 index 00000000..668aab92 --- /dev/null +++ b/tests/test_taxonomy_pyrodigal.nf.test.snap @@ -0,0 +1,111 @@ +{ + "abricate": { + "content": [ + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T16:24:30.025771" + }, + "fargene": { + "content": [ + "results_summary.txt:md5,f69af9b0e18a0cfc934eb18b7e4bffe1", + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T16:24:30.025771" + }, + "rgi": { + "content": [ + "sample_1.txt:md5,fce130af51f93cccfc09ddaf9caf623f", + "sample_2.txt:md5,fce130af51f93cccfc09ddaf9caf623f" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T16:24:30.025771" + }, + "ampir": { + "content": [ + true, + true, + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T16:24:30.025771" + }, + "gecco": { + "content": [ + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T16:24:30.025771" + }, + "ampcombi": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T16:24:30.025771" + }, + "antismash": { + "content": [ + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T16:24:30.025771" + }, + "amplify": { + "content": [ + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T16:24:06.451885596" + }, + "macrel": { + "content": [ + "sample_1.macrel.smorfs.faa.gz:md5,1b5e2434860e635e95324d1804a3be7b", + "sample_2.macrel.smorfs.faa.gz:md5,38108b5cdfdc2196afe67418b9b04682", + "sample_1.macrel.all_orfs.faa.gz:md5,86f6b3b590d1b22d9c5aa164f8a14080", + "sample_2.macrel.all_orfs.faa.gz:md5,fdb384925af50ecade05dccaff68afd8", + "sample_1.macrel.prediction.gz:md5,0c4b16e0838be56e012b99169863a168", + "sample_2.macrel.prediction.gz:md5,440deffd6b6d9986ce098e44c66db9ae", + "README.md:md5,fa3706dfc95d0538a52c4d0d824be5fb", + "README.md:md5,fa3706dfc95d0538a52c4d0d824be5fb", + "sample_1.macrel_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample_2.macrel_log.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-24T16:24:30.025771" + } +} \ No newline at end of file diff --git a/workflows/funcscan.nf b/workflows/funcscan.nf index b5885478..07e0a3d8 100644 --- a/workflows/funcscan.nf +++ b/workflows/funcscan.nf @@ -1,46 +1,13 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - PRINT PARAMS SUMMARY + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { validateParameters; paramsHelp; paramsSummaryLog; paramsSummaryMap; fromSamplesheet } from 'plugin/nf-validation' -def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) -def citation = '\n' + WorkflowMain.citation(workflow) + '\n' -def summary_params = paramsSummaryMap(workflow) - -// Print parameter summary log to screen -log.info logo + paramsSummaryLog(workflow) + citation - -WorkflowFuncscan.initialise(params, log) - -// Check input path parameters to see if they exist -/*def checkPathParamList = [ params.input, params.multiqc_config, params.annotation_bakta_db_localpath, - params.amp_hmmsearch_models, params.amp_ampcombi_db, - params.arg_amrfinderplus_db, params.arg_deeparg_data, - params.bgc_antismash_databases, params.bgc_antismash_installationdirectory, - params.bgc_deepbgc_database, params.bgc_hmmsearch_models ] -for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } - - -// Check mandatory parameters -if (params.input) { ch_input = file(params.input) } else { error("Input samplesheet not specified!") } -*/ - -// Validate antiSMASH inputs -// 1. Make sure that either both or none of the antiSMASH directories are supplied -if ( ( params.run_bgc_screening && !params.bgc_antismash_databases && params.bgc_antismash_installationdirectory && !params.bgc_skip_antismash) || ( params.run_bgc_screening && params.bgc_antismash_databases && !params.bgc_antismash_installationdirectory && !params.bgc_skip_antismash ) ) error("[nf-core/funcscan] ERROR: You supplied either the antiSMASH database or its installation directory, but not both. Please either supply both directories or none (letting the pipeline download them instead).") - -// 2. If both are supplied: Exit if we have a name collision error -else if ( params.run_bgc_screening && params.bgc_antismash_databases && params.bgc_antismash_installationdirectory && !params.bgc_skip_antismash ) { - antismash_database_dir = new File(params.bgc_antismash_databases) - antismash_install_dir = new File(params.bgc_antismash_installationdirectory) - if ( antismash_database_dir.name == antismash_install_dir.name ) error("[nf-core/funcscan] ERROR: Your supplied antiSMASH database and installation directories have identical names: \"" + antismash_install_dir.name + "\".\nPlease make sure to name them differently, for example:\n - Database directory: "+ antismash_database_dir.parent + "/antismash_db\n - Installation directory: " + antismash_install_dir.parent + "/antismash_dir") -} - -// 3. Give warning if not using container system assuming conda - -if ( params.run_bgc_screening && ( !params.bgc_antismash_databases || !params.bgc_antismash_installationdirectory ) && !params.bgc_skip_antismash && ( session.config.conda && session.config.conda.enabled ) ) { log.warn "[nf-core/funcscan] Running antiSMASH download database module, and detected conda has been enabled. Assuming using conda for pipeline run, check config if this is not expected!" } +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_funcscan_pipeline' +include { paramsSummaryMap; validateParameters; paramsHelp; paramsSummaryLog; fromSamplesheet } from 'plugin/nf-validation' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -48,9 +15,9 @@ if ( params.run_bgc_screening && ( !params.bgc_antismash_databases || !params.bg ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() -ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() +ch_multiqc_config = Channel.fromPath( "$projectDir/assets/multiqc_config.yml", checkIfExists: true ) +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() +ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) /* @@ -62,9 +29,11 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { AMP } from '../subworkflows/local/amp' -include { ARG } from '../subworkflows/local/arg' -include { BGC } from '../subworkflows/local/bgc' +include { ANNOTATION } from '../subworkflows/local/annotation' +include { AMP } from '../subworkflows/local/amp' +include { ARG } from '../subworkflows/local/arg' +include { BGC } from '../subworkflows/local/bgc' +include { TAXA_CLASS } from '../subworkflows/local/taxa_class' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -75,22 +44,9 @@ include { BGC } from '../subworkflows/local/bgc' // // MODULE: Installed directly from nf-core/modules // -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' -include { GUNZIP as GUNZIP_FASTA_PREP } from '../modules/nf-core/gunzip/main' -include { GUNZIP as GUNZIP_PRODIGAL_FNA } from '../modules/nf-core/gunzip/main' -include { GUNZIP as GUNZIP_PRODIGAL_FAA } from '../modules/nf-core/gunzip/main' -include { GUNZIP as GUNZIP_PRODIGAL_GFF } from '../modules/nf-core/gunzip/main' -include { GUNZIP as GUNZIP_PYRODIGAL_FNA } from '../modules/nf-core/gunzip/main' -include { GUNZIP as GUNZIP_PYRODIGAL_FAA } from '../modules/nf-core/gunzip/main' -include { GUNZIP as GUNZIP_PYRODIGAL_GFF } from '../modules/nf-core/gunzip/main' -include { BIOAWK } from '../modules/nf-core/bioawk/main' -include { PROKKA } from '../modules/nf-core/prokka/main' -include { PRODIGAL as PRODIGAL_GFF } from '../modules/nf-core/prodigal/main' -include { PRODIGAL as PRODIGAL_GBK } from '../modules/nf-core/prodigal/main' -include { PYRODIGAL } from '../modules/nf-core/pyrodigal/main' -include { BAKTA_BAKTADBDOWNLOAD } from '../modules/nf-core/bakta/baktadbdownload/main' -include { BAKTA_BAKTA } from '../modules/nf-core/bakta/bakta/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { GUNZIP as GUNZIP_INPUT_PREP } from '../modules/nf-core/gunzip/main' +include { SEQKIT_SEQ } from '../modules/nf-core/seqkit/seq/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -98,113 +54,128 @@ include { BAKTA_BAKTA } from '../modules/nf-core/bakta/bak ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// Info required for completion email and summary -def multiqc_report = [] - workflow FUNCSCAN { + take: + ch_samplesheet // channel: samplesheet read in from --input + + main: + ch_versions = Channel.empty() - ch_multiqc_logo = Channel.fromPath("$projectDir/docs/images/nf-core-funcscan_logo_flat_light.png") + ch_multiqc_files = Channel.empty() ch_input = Channel.fromSamplesheet("input") // Some tools require uncompressed input - fasta_prep = ch_input - .branch { - compressed: it[1].toString().endsWith('.gz') - uncompressed: it[1] - } + ch_input_prep = ch_input + .map{ meta, fasta, faa, gbk -> [meta + [category: 'all'], [fasta, faa, gbk]] } + .transpose() + .branch { + compressed: it[1].toString().endsWith('.gz') + uncompressed: it[1] + } - GUNZIP_FASTA_PREP ( fasta_prep.compressed ) - ch_versions = ch_versions.mix(GUNZIP_FASTA_PREP.out.versions) + GUNZIP_INPUT_PREP ( ch_input_prep.compressed ) + ch_versions = ch_versions.mix( GUNZIP_INPUT_PREP.out.versions ) // Merge all the already uncompressed and newly compressed FASTAs here into // a single input channel for downstream - ch_prepped_fastas = GUNZIP_FASTA_PREP.out.gunzip - .mix(fasta_prep.uncompressed) - - // Add to meta the length of longest contig for downstream filtering - BIOAWK ( ch_prepped_fastas ) - ch_versions = ch_versions.mix(BIOAWK.out.versions) - - ch_prepped_input = ch_prepped_fastas - .join( BIOAWK.out.longest ) - .map{ - meta, fasta, length -> - def meta_new = meta.clone() - meta['longest_contig'] = Integer.parseInt(length) - [ meta, fasta ] - } + ch_intermediate_input = GUNZIP_INPUT_PREP.out.gunzip + .mix( ch_input_prep.uncompressed ) + .groupTuple() + .map{ + meta, files -> + def fasta_found = files.find{it.toString().tokenize('.').last().matches('fasta|fas|fna|fa')} + def faa_found = files.find{it.toString().endsWith('.faa')} + def gbk_found = files.find{it.toString().tokenize('.').last().matches('gbk|gbff')} + def fasta = fasta_found != null ? fasta_found : [] + def faa = faa_found != null ? faa_found : [] + def gbk = gbk_found != null ? gbk_found : [] + + [meta, fasta, faa, gbk] + } + .branch { + meta, fasta, faa, gbk -> + preannotated: gbk != [] + fastas: true + } + + // Duplicate and filter the duplicated file for long contigs only for BGC + // This is to speed up BGC run and prevent 'no hits found' fails + if ( params.run_bgc_screening ){ + SEQKIT_SEQ ( ch_intermediate_input.fastas.map{meta, fasta, faa, gbk -> [ meta, fasta ]} ) + ch_input_for_annotation = ch_intermediate_input.fastas + .map { meta, fasta, protein, gbk -> [ meta, fasta ] } + .mix( SEQKIT_SEQ.out.fastx.map{ meta, fasta -> [ meta + [category: 'long'], fasta ] } ) + .filter { + meta, fasta -> + if ( fasta != [] && fasta.isEmpty() ) log.warn("[nf-core/funcscan] Sample ${meta.id} does not have contigs longer than ${params.bgc_mincontiglength} bp. Will not be screened for BGCs.") + !fasta.isEmpty() + } + ch_versions = ch_versions.mix( SEQKIT_SEQ.out.versions ) + } else { + ch_input_for_annotation = ch_intermediate_input.fastas.map { meta, fasta, protein, gbk -> [ meta, fasta ] } + } /* ANNOTATION */ // Some tools require annotated FASTAs - // For prodigal: run twice, once for gff and once for gbk generation, (for parity with PROKKA which produces both) - if ( ( params.run_arg_screening && !params.arg_skip_deeparg ) || ( params.run_amp_screening && ( !params.amp_skip_hmmsearch || !params.amp_skip_amplify || !params.amp_skip_ampir ) ) || ( params.run_bgc_screening && ( !params.bgc_skip_hmmsearch || !params.bgc_skip_antismash ) ) ) { - - if ( params.annotation_tool == "prodigal" ) { - PRODIGAL_GFF ( ch_prepped_input, "gff" ) - GUNZIP_PRODIGAL_FAA ( PRODIGAL_GFF.out.amino_acid_fasta ) - GUNZIP_PRODIGAL_FNA ( PRODIGAL_GFF.out.nucleotide_fasta) - GUNZIP_PRODIGAL_GFF ( PRODIGAL_GFF.out.gene_annotations ) - ch_versions = ch_versions.mix(PRODIGAL_GFF.out.versions) - ch_annotation_faa = GUNZIP_PRODIGAL_FAA.out.gunzip - ch_annotation_fna = GUNZIP_PRODIGAL_FNA.out.gunzip - ch_annotation_gff = GUNZIP_PRODIGAL_GFF.out.gunzip - ch_annotation_gbk = Channel.empty() // Prodigal GBK and GFF output are mutually exclusive - - if ( params.save_annotations == true ) { - PRODIGAL_GBK ( ch_prepped_input, "gbk" ) - ch_versions = ch_versions.mix(PRODIGAL_GBK.out.versions) - ch_annotation_gbk = PRODIGAL_GBK.out.gene_annotations // Prodigal GBK output stays zipped because it is currently not used by any downstream subworkflow. - } - } else if ( params.annotation_tool == "pyrodigal" ) { - PYRODIGAL ( ch_prepped_input ) - GUNZIP_PYRODIGAL_FAA ( PYRODIGAL.out.faa ) - GUNZIP_PYRODIGAL_FNA ( PYRODIGAL.out.fna) - GUNZIP_PYRODIGAL_GFF ( PYRODIGAL.out.gff ) - ch_versions = ch_versions.mix(PYRODIGAL.out.versions) - ch_annotation_faa = GUNZIP_PYRODIGAL_FAA.out.gunzip - ch_annotation_fna = GUNZIP_PYRODIGAL_FNA.out.gunzip - ch_annotation_gff = GUNZIP_PYRODIGAL_GFF.out.gunzip - ch_annotation_gbk = Channel.empty() // Pyrodigal doesn't produce GBK - } else if ( params.annotation_tool == "prokka" ) { - PROKKA ( ch_prepped_input, [], [] ) - ch_versions = ch_versions.mix(PROKKA.out.versions) - ch_annotation_faa = PROKKA.out.faa - ch_annotation_fna = PROKKA.out.fna - ch_annotation_gff = PROKKA.out.gff - ch_annotation_gbk = PROKKA.out.gbk - } else if ( params.annotation_tool == "bakta" ) { - - // BAKTA prepare download - if ( params.annotation_bakta_db_localpath ) { - ch_bakta_db = Channel - .fromPath( params.annotation_bakta_db_localpath ) - .first() - } else { - BAKTA_BAKTADBDOWNLOAD ( ) - ch_versions = ch_versions.mix( BAKTA_BAKTADBDOWNLOAD.out.versions ) - ch_bakta_db = ( BAKTA_BAKTADBDOWNLOAD.out.db ) - } + if ( ( params.run_arg_screening && !params.arg_skip_deeparg ) || ( params.run_amp_screening ) || ( params.run_bgc_screening ) ) { + ANNOTATION( ch_input_for_annotation ) + ch_versions = ch_versions.mix( ANNOTATION.out.versions ) - BAKTA_BAKTA ( ch_prepped_input, ch_bakta_db, [], [] ) - ch_versions = ch_versions.mix(BAKTA_BAKTA.out.versions) - ch_annotation_faa = BAKTA_BAKTA.out.faa - ch_annotation_fna = BAKTA_BAKTA.out.fna - ch_annotation_gff = BAKTA_BAKTA.out.gff - ch_annotation_gbk = BAKTA_BAKTA.out.gbff - } + ch_new_annotation = ch_input_for_annotation + .join( ANNOTATION.out.faa ) + .join( ANNOTATION.out.gbk ) } else { + ch_new_annotation = ch_intermediate_input.fastas + } + + // Mix back the preannotated samples with the newly annotated ones + ch_prepped_input = ch_new_annotation + .filter { meta, fasta, faa, gbk -> meta.category != 'long' } + .mix( ch_intermediate_input.preannotated ) + .multiMap { + meta, fasta, faa, gbk -> + fastas: [meta, fasta] + faas: [meta, faa] + gbks: [meta, gbk] + } + + if ( params.run_bgc_screening ){ + + ch_prepped_input_long = ch_new_annotation + .filter { meta, fasta, faa, gbk -> meta.category == 'long'} + .mix( ch_intermediate_input.preannotated ) + .multiMap { + meta, fasta, faa, gbk -> + fastas: [meta, fasta] + faas: [meta, faa] + gbks: [meta, gbk] + } + } + + /* + TAXONOMIC CLASSIFICATION + */ + + // The final subworkflow reports need taxonomic classification. + // This can be either on NT or AA level depending on annotation. + // TODO: Only NT at the moment. AA tax. classification will be added only when its PR is merged. + if ( params.run_taxa_classification ) { + TAXA_CLASS ( ch_prepped_input.fastas ) + ch_versions = ch_versions.mix( TAXA_CLASS.out.versions ) + ch_taxonomy_tsv = TAXA_CLASS.out.sample_taxonomy - ch_annotation_faa = Channel.empty() - ch_annotation_fna = Channel.empty() - ch_annotation_gff = Channel.empty() - ch_annotation_gbk = Channel.empty() + } else { + ch_mmseqs_db = Channel.empty() + ch_taxonomy_querydb = Channel.empty() + ch_taxonomy_querydb_taxdb = Channel.empty() + ch_taxonomy_tsv = Channel.empty() } /* @@ -214,110 +185,200 @@ workflow FUNCSCAN { /* AMPs */ - if ( params.run_amp_screening ) { + if ( params.run_amp_screening && !params.run_taxa_classification ) { AMP ( - ch_prepped_input, - ch_annotation_faa + ch_prepped_input.fastas, + ch_prepped_input.faas .filter { meta, file -> - if ( file.isEmpty() ) log.warn("Annotation of following sample produced produced an empty FAA file. AMP screening tools requiring this file will not be executed: ${meta.id}") + if ( file != [] && file.isEmpty() ) log.warn("[nf-core/funcscan] Annotation of following sample produced an empty FAA file. AMP screening tools requiring this file will not be executed: ${meta.id}") !file.isEmpty() - } + + }, + ch_taxonomy_tsv, + ch_prepped_input.gbks ) ch_versions = ch_versions.mix(AMP.out.versions) + } else if ( params.run_amp_screening && params.run_taxa_classification ) { + AMP ( + ch_prepped_input.fastas, + ch_prepped_input.faas + .filter { + meta, file -> + if ( file != [] && file.isEmpty() ) log.warn("[nf-core/funcscan] Annotation of following sample produced an empty FAA file. AMP screening tools requiring this file will not be executed: ${meta.id}") + !file.isEmpty() + }, + ch_taxonomy_tsv + .filter { + meta, file -> + if ( file != [] && file.isEmpty() ) log.warn("[nf-core/funcscan] Taxonomy classification of the following sample produced an empty TSV file. Taxonomy merging will not be executed: ${meta.id}") + !file.isEmpty() + }, + ch_prepped_input.gbks + ) + ch_versions = ch_versions.mix( AMP.out.versions ) } /* ARGs */ - if ( params.run_arg_screening ) { - if (params.arg_skip_deeparg) { - ARG ( ch_prepped_input, [] ) + if ( params.run_arg_screening && !params.run_taxa_classification ) { + if ( params.arg_skip_deeparg ) { + ARG ( + ch_prepped_input.fastas, + [], + ch_taxonomy_tsv + ) } else { ARG ( - ch_prepped_input, - ch_annotation_faa + ch_prepped_input.fastas, + ch_prepped_input.faas .filter { meta, file -> - if ( file.isEmpty() ) log.warn("Annotation of following sample produced produced an empty FAA file. AMP screening tools requiring this file will not be executed: ${meta.id}") + if ( file.isEmpty() ) log.warn("[nf-core/funcscan] Annotation of following sample produced an empty FAA file. ARG screening tools requiring this file will not be executed: ${meta.id}") !file.isEmpty() + }, + ch_taxonomy_tsv + ) + } + ch_versions = ch_versions.mix( ARG.out.versions ) + } else if ( params.run_arg_screening && params.run_taxa_classification ) { + if ( params.arg_skip_deeparg ) { + ARG ( + ch_prepped_input.fastas, + [], + ch_taxonomy_tsv + .filter { + meta, file -> + if ( file.isEmpty() ) log.warn("[nf-core/funcscan] Taxonomy classification of the following sample produced an empty TSV file. Taxonomy merging will not be executed: ${meta.id}") + !file.isEmpty() } + ) + } else { + ARG ( + ch_prepped_input.fastas, + ch_prepped_input.faas + .filter { + meta, file -> + if ( file.isEmpty() ) log.warn("[nf-core/funcscan] Annotation of following sample produced an empty FAA file. ARG screening tools requiring this file will not be executed: ${meta.id}") + !file.isEmpty() + }, + ch_taxonomy_tsv + .filter { + meta, file -> + if ( file.isEmpty() ) log.warn("[nf-core/funcscan] Taxonomy classification of the following sample produced an empty TSV file. Taxonomy merging will not be executed: ${meta.id}") + !file.isEmpty() + } ) } - ch_versions = ch_versions.mix(ARG.out.versions) + ch_versions = ch_versions.mix( ARG.out.versions ) } /* BGCs */ - if ( params.run_bgc_screening ) { + if ( params.run_bgc_screening && !params.run_taxa_classification ) { BGC ( - ch_prepped_input, - ch_annotation_gff + ch_prepped_input_long.fastas, + ch_prepped_input_long.faas .filter { meta, file -> - if ( file.isEmpty() ) log.warn("Annotation of following sample produced produced an empty GFF file. AMP screening tools requiring this file will not be executed: ${meta.id}") + if ( file != [] && file.isEmpty() ) log.warn("[nf-core/funcscan] Annotation of following sample produced an empty GFF file. BGC screening tools requiring this file will not be executed: ${meta.id}") !file.isEmpty() }, - ch_annotation_faa + ch_prepped_input_long.gbks .filter { meta, file -> - if ( file.isEmpty() ) log.warn("Annotation of following sample produced produced an empty FAA file. AMP screening tools requiring this file will not be executed: ${meta.id}") + if ( file != [] && file.isEmpty() ) log.warn("[nf-core/funcscan] Annotation of following sample produced an empty FAA file. BGC screening tools requiring this file will not be executed: ${meta.id}") !file.isEmpty() }, - ch_annotation_gbk + ch_taxonomy_tsv + ) + ch_versions = ch_versions.mix( BGC.out.versions ) + } else if ( params.run_bgc_screening && params.run_taxa_classification ) { + BGC ( + ch_prepped_input_long.fastas, + ch_prepped_input_long.faas .filter { meta, file -> - if ( file.isEmpty() ) log.warn("Annotation of following sample produced produced an empty GBK file. AMP screening tools requiring this file will not be executed: ${meta.id}") + if ( file.isEmpty() ) log.warn("[nf-core/funcscan] Annotation of following sample produced an empty FAA file. BGC screening tools requiring this file will not be executed: ${meta.id}") + !file.isEmpty() + }, + ch_prepped_input_long.gbks + .filter { + meta, file -> + if ( file.isEmpty() ) log.warn("[nf-core/funcscan] Annotation of following sample produced an empty GBK file. BGC screening tools requiring this file will not be executed: ${meta.id}") + !file.isEmpty() + }, + ch_taxonomy_tsv + .filter { + meta, file -> + if ( file.isEmpty() ) log.warn("[nf-core/funcscan] Taxonomy classification of the following sample produced an empty TSV file. Taxonomy merging will not be executed: ${meta.id}") !file.isEmpty() } ) - ch_versions = ch_versions.mix(BGC.out.versions) + ch_versions = ch_versions.mix( BGC.out.versions ) } - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) + // + // Collate and save software versions + // + softwareVersionsToYAML(ch_versions) + .collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'nf_core_pipeline_software_mqc_versions.yml', + sort: true, + newLine: true + ).set { ch_collated_versions } // // MODULE: MultiQC // - workflow_summary = WorkflowFuncscan.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) - - methods_description = WorkflowFuncscan.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) - ch_methods_description = Channel.value(methods_description) + ch_multiqc_config = Channel.fromPath( + "$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? + Channel.fromPath(params.multiqc_config, checkIfExists: true) : + Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? + Channel.fromPath(params.multiqc_logo, checkIfExists: true) : + Channel.fromPath("${workflow.projectDir}/docs/images/nf-core-funcscan_logo_light.png", checkIfExists: true) + + summary_params = paramsSummaryMap( + workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? + file(params.multiqc_methods_description, checkIfExists: true) : + file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value( + methodsDescriptionText(ch_multiqc_custom_methods_description)) + + ch_multiqc_files = ch_multiqc_files.mix( + ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix( + ch_methods_description.collectFile( + name: 'methods_description_mqc.yaml', + sort: true + ) + ) - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - if(params.annotation_tool=='prokka'){ch_multiqc_files = ch_multiqc_files.mix( PROKKA.out.txt.collect{it[1]}.ifEmpty([])) } + if ( ( params.run_arg_screening && !params.arg_skip_deeparg ) || ( params.run_amp_screening && ( params.amp_run_hmmsearch || !params.amp_skip_amplify || !params.amp_skip_ampir ) ) || ( params.run_bgc_screening ) ) { + ch_multiqc_files = ch_multiqc_files.mix( ANNOTATION.out.multiqc_files.collect{it[1]} ) + } MULTIQC ( ch_multiqc_files.collect(), ch_multiqc_config.toList(), ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList() + ch_multiqc_logo.toList(), + [], + [] ) - multiqc_report = MULTIQC.out.report.toList() -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COMPLETION EMAIL AND SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) - } - NfcoreTemplate.dump_parameters(workflow, params) - NfcoreTemplate.summary(workflow, params, log) - if (params.hook_url) { - NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) - } + emit: + multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] } /*