From 1e4ac4aa2c612f9547f79f02ef7c651ccc9f657b Mon Sep 17 00:00:00 2001 From: eeaunin <34213553+eeaunin@users.noreply.github.com> Date: Tue, 14 Nov 2023 10:33:52 +0000 Subject: [PATCH] VecScreen (#4304) * vecscreen module * VecScreen module edit * VecScreen module edit * VecScreen module edit * VecScreen module update * VecScreen module update * VecScreen module minor edit * VecScreen module edit * VecScreen module edit * VecScreen module edit * VecScreen module update: switched to using new BioContainers image * Changes to the VecScreen module as requested by mahesh-panchal * Removing a line from modules/nf-core/ncbitools/vecscreen/main.nf, suggested by mahesh-panchal Co-authored-by: Mahesh Binzer-Panchal * Added the missing final end-of-line * Added the missing final end-of-line * Edited the VecScreen module and its test workflow to make it compatible with the recent change in the BLAST_MAKEBLASTDB module (which now requires a meta in its input) * The convention is to skip the md5sum of `versions.yml` * Fix for Docker * Making this an array --------- Co-authored-by: Mahesh Binzer-Panchal Co-authored-by: Matthieu Muffato --- .github/workflows/test.yml | 2 + modules/nf-core/ncbitools/vecscreen/main.nf | 46 +++++++++++++++++ modules/nf-core/ncbitools/vecscreen/meta.yml | 50 +++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ .../nf-core/ncbitools/vecscreen/main.nf | 12 +++++ .../ncbitools/vecscreen/nextflow.config | 17 +++++++ .../nf-core/ncbitools/vecscreen/test.yml | 9 ++++ 7 files changed, 140 insertions(+) create mode 100644 modules/nf-core/ncbitools/vecscreen/main.nf create mode 100644 modules/nf-core/ncbitools/vecscreen/meta.yml create mode 100644 tests/modules/nf-core/ncbitools/vecscreen/main.nf create mode 100644 tests/modules/nf-core/ncbitools/vecscreen/nextflow.config create mode 100644 tests/modules/nf-core/ncbitools/vecscreen/test.yml diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4104aaf7e66..526127d4534 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -259,6 +259,8 @@ jobs: tags: mitohifi/findmitoreference - profile: "conda" tags: nanoplot + - profile: "conda" + tags: ncbitools/vecscreen - profile: "conda" tags: parabricks/applybqsr - profile: "conda" diff --git a/modules/nf-core/ncbitools/vecscreen/main.nf b/modules/nf-core/ncbitools/vecscreen/main.nf new file mode 100644 index 00000000000..a277ecaee66 --- /dev/null +++ b/modules/nf-core/ncbitools/vecscreen/main.nf @@ -0,0 +1,46 @@ +process NCBITOOLS_VECSCREEN { + tag "$meta.id" + label 'process_single' + + container "docker.io/biocontainers/ncbi-tools-bin:6.1.20170106-6-deb_cv2" + + input: + tuple val(meta), path(fasta_file) + tuple val(adapters_database_meta), path(adapters_database_directory) + + output: + tuple val(meta), path("${meta.id}.vecscreen.out") , emit: vecscreen_output + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "The VecScreen module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // WARN: VecScreen doesn't output a version number and doesn't appear to have a Github repository. 1.0 is arbitrarily used here as the version number + """ + DB=`find -L ${adapters_database_directory} -maxdepth 1 -name "*.nin" | sed 's/\\.nin\$//'` + vecscreen -d \$DB ${args} -i ${fasta_file} -o ${prefix}.vecscreen.out + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + vecscreen: 1.0 + END_VERSIONS + """ + + stub: + // WARN: VecScreen doesn't output a version number and doesn't appear to have a Github repository. 1.0 is arbitrarily used here as the version number + """ + touch ${prefix}.vecscreen.out + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + vecscreen: 1.0 + END_VERSIONS + """ +} diff --git a/modules/nf-core/ncbitools/vecscreen/meta.yml b/modules/nf-core/ncbitools/vecscreen/meta.yml new file mode 100644 index 00000000000..45f0f7e2780 --- /dev/null +++ b/modules/nf-core/ncbitools/vecscreen/meta.yml @@ -0,0 +1,50 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: "NCBITOOLS_VECSCREEN" +description: NCBI tool for detecting vector contamination in nucleic acid sequences. This tool is older than NCBI's FCS-adaptor, which is for the same purpose +keywords: + - assembly + - genomics + - quality control + - contamination + - vector + - NCBI +tools: + - "ncbitools": + description: | + "NCBI libraries for biology applications (text-based utilities)" + homepage: "https://www.ncbi.nlm.nih.gov/tools/vecscreen/" + documentation: "https://www.ncbi.nlm.nih.gov/tools/vecscreen/interpretation/" + tool_dev_url: "https://www.ncbi.nlm.nih.gov/tools/vecscreen/" + licence: ["The Open Database License"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', taxid:'6973' ] + - fasta_file: + type: file + description: FASTA file that will be screened for contaminants + - adapters_database_file: + type: file + description: Path to a nucleotide BLAST database file with vector sequences + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', taxid:'9606' ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vecscreen_output: + type: file + description: VecScreen report file. This can be in different formats depending on the value of the optional -f parameter. 0 = HTML format, with alignments. 1 = HTML format, no alignments. 2 = Text list, with alignments. 3 = Text list, no alignments. default = 0 + pattern: "*.vecscreen.out" + +authors: + - "@eeaunin" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index cb56a844596..774b1c40ac4 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -2474,6 +2474,10 @@ ncbigenomedownload: - modules/nf-core/ncbigenomedownload/** - tests/modules/nf-core/ncbigenomedownload/** +ncbitools/vecscreen: + - modules/nf-core/ncbitools/vecscreen/** + - tests/modules/nf-core/ncbitools/vecscreen/** + nextclade/datasetget: - modules/nf-core/nextclade/datasetget/** - tests/modules/nf-core/nextclade/datasetget/** diff --git a/tests/modules/nf-core/ncbitools/vecscreen/main.nf b/tests/modules/nf-core/ncbitools/vecscreen/main.nf new file mode 100644 index 00000000000..3260bc24c04 --- /dev/null +++ b/tests/modules/nf-core/ncbitools/vecscreen/main.nf @@ -0,0 +1,12 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { BLAST_MAKEBLASTDB } from '../../../../../modules/nf-core/blast/makeblastdb/main.nf' +include { NCBITOOLS_VECSCREEN } from '../../../../../modules/nf-core/ncbitools/vecscreen/main.nf' + +workflow test_ncbitools_vecscreen { + input = [ file(params.test_data['homo_sapiens']['pacbio']['primers'], checkIfExists: true) ] + BLAST_MAKEBLASTDB ( [[id:'test'], input] ) + NCBITOOLS_VECSCREEN ( [ [id:'test'], input ], BLAST_MAKEBLASTDB.out.db) +} diff --git a/tests/modules/nf-core/ncbitools/vecscreen/nextflow.config b/tests/modules/nf-core/ncbitools/vecscreen/nextflow.config new file mode 100644 index 00000000000..1d0d070c0c2 --- /dev/null +++ b/tests/modules/nf-core/ncbitools/vecscreen/nextflow.config @@ -0,0 +1,17 @@ +process { + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + withName: BLAST_MAKEBLASTDB { + ext.args = '-dbtype nucl -blastdb_version 4' + } + withName: NCBITOOLS_VECSCREEN { + ext.args = '-f3' + } +} + +// The BioContainer runs things as the user `biodocker`, which has no write +// access to the directory and can't even touch `.command.trace`. +// Setting `userEmulation` to true fixes things, but only if actually loaded ! +// There has to be an explicit `-c ./tests/modules/nf-core/ncbitools/vecscreen/nextflow.config` +// in the test.yml +docker.userEmulation = true + diff --git a/tests/modules/nf-core/ncbitools/vecscreen/test.yml b/tests/modules/nf-core/ncbitools/vecscreen/test.yml new file mode 100644 index 00000000000..fbe6831b735 --- /dev/null +++ b/tests/modules/nf-core/ncbitools/vecscreen/test.yml @@ -0,0 +1,9 @@ +- name: "ncbitools vecscreen" + command: nextflow run ./tests/modules/nf-core/ncbitools/vecscreen -entry test_ncbitools_vecscreen -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/ncbitools/vecscreen/nextflow.config + tags: + - "ncbitools" + - "ncbitools/vecscreen" + files: + - path: "output/ncbitools/test.vecscreen.out" + md5sum: b1e5bff88cbc6195bf730e13dc7f5873 + - path: "output/ncbitools/versions.yml"