Skip to content

Commit

Permalink
VecScreen (nf-core#4304)
Browse files Browse the repository at this point in the history
* vecscreen module

* VecScreen module edit

* VecScreen module edit

* VecScreen module edit

* VecScreen module update

* VecScreen module update

* VecScreen module minor edit

* VecScreen module edit

* VecScreen module edit

* VecScreen module edit

* VecScreen module update: switched to using new BioContainers image

* Changes to the VecScreen module as requested by mahesh-panchal

* Removing a line from modules/nf-core/ncbitools/vecscreen/main.nf, suggested by mahesh-panchal

Co-authored-by: Mahesh Binzer-Panchal <[email protected]>

* Added the missing final end-of-line

* Added the missing final end-of-line

* Edited the VecScreen module and its test workflow to make it compatible with the recent change in the BLAST_MAKEBLASTDB module (which now requires a meta in its input)

* The convention is to skip the md5sum of `versions.yml`

* Fix for Docker

* Making this an array

---------

Co-authored-by: Mahesh Binzer-Panchal <[email protected]>
Co-authored-by: Matthieu Muffato <[email protected]>
  • Loading branch information
3 people authored Nov 14, 2023
1 parent bba7e36 commit 1e4ac4a
Show file tree
Hide file tree
Showing 7 changed files with 140 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,8 @@ jobs:
tags: mitohifi/findmitoreference
- profile: "conda"
tags: nanoplot
- profile: "conda"
tags: ncbitools/vecscreen
- profile: "conda"
tags: parabricks/applybqsr
- profile: "conda"
Expand Down
46 changes: 46 additions & 0 deletions modules/nf-core/ncbitools/vecscreen/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
process NCBITOOLS_VECSCREEN {
tag "$meta.id"
label 'process_single'

container "docker.io/biocontainers/ncbi-tools-bin:6.1.20170106-6-deb_cv2"

input:
tuple val(meta), path(fasta_file)
tuple val(adapters_database_meta), path(adapters_database_directory)

output:
tuple val(meta), path("${meta.id}.vecscreen.out") , emit: vecscreen_output
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
error "The VecScreen module does not support Conda. Please use Docker / Singularity / Podman instead."
}
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
// WARN: VecScreen doesn't output a version number and doesn't appear to have a Github repository. 1.0 is arbitrarily used here as the version number
"""
DB=`find -L ${adapters_database_directory} -maxdepth 1 -name "*.nin" | sed 's/\\.nin\$//'`
vecscreen -d \$DB ${args} -i ${fasta_file} -o ${prefix}.vecscreen.out
cat <<-END_VERSIONS > versions.yml
"${task.process}":
vecscreen: 1.0
END_VERSIONS
"""

stub:
// WARN: VecScreen doesn't output a version number and doesn't appear to have a Github repository. 1.0 is arbitrarily used here as the version number
"""
touch ${prefix}.vecscreen.out
cat <<-END_VERSIONS > versions.yml
"${task.process}":
vecscreen: 1.0
END_VERSIONS
"""
}
50 changes: 50 additions & 0 deletions modules/nf-core/ncbitools/vecscreen/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json
name: "NCBITOOLS_VECSCREEN"
description: NCBI tool for detecting vector contamination in nucleic acid sequences. This tool is older than NCBI's FCS-adaptor, which is for the same purpose
keywords:
- assembly
- genomics
- quality control
- contamination
- vector
- NCBI
tools:
- "ncbitools":
description: |
"NCBI libraries for biology applications (text-based utilities)"
homepage: "https://www.ncbi.nlm.nih.gov/tools/vecscreen/"
documentation: "https://www.ncbi.nlm.nih.gov/tools/vecscreen/interpretation/"
tool_dev_url: "https://www.ncbi.nlm.nih.gov/tools/vecscreen/"
licence: ["The Open Database License"]

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', taxid:'6973' ]
- fasta_file:
type: file
description: FASTA file that will be screened for contaminants
- adapters_database_file:
type: file
description: Path to a nucleotide BLAST database file with vector sequences

output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', taxid:'9606' ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- vecscreen_output:
type: file
description: VecScreen report file. This can be in different formats depending on the value of the optional -f parameter. 0 = HTML format, with alignments. 1 = HTML format, no alignments. 2 = Text list, with alignments. 3 = Text list, no alignments. default = 0
pattern: "*.vecscreen.out"

authors:
- "@eeaunin"
4 changes: 4 additions & 0 deletions tests/config/pytest_modules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2474,6 +2474,10 @@ ncbigenomedownload:
- modules/nf-core/ncbigenomedownload/**
- tests/modules/nf-core/ncbigenomedownload/**

ncbitools/vecscreen:
- modules/nf-core/ncbitools/vecscreen/**
- tests/modules/nf-core/ncbitools/vecscreen/**

nextclade/datasetget:
- modules/nf-core/nextclade/datasetget/**
- tests/modules/nf-core/nextclade/datasetget/**
Expand Down
12 changes: 12 additions & 0 deletions tests/modules/nf-core/ncbitools/vecscreen/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { BLAST_MAKEBLASTDB } from '../../../../../modules/nf-core/blast/makeblastdb/main.nf'
include { NCBITOOLS_VECSCREEN } from '../../../../../modules/nf-core/ncbitools/vecscreen/main.nf'

workflow test_ncbitools_vecscreen {
input = [ file(params.test_data['homo_sapiens']['pacbio']['primers'], checkIfExists: true) ]
BLAST_MAKEBLASTDB ( [[id:'test'], input] )
NCBITOOLS_VECSCREEN ( [ [id:'test'], input ], BLAST_MAKEBLASTDB.out.db)
}
17 changes: 17 additions & 0 deletions tests/modules/nf-core/ncbitools/vecscreen/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: BLAST_MAKEBLASTDB {
ext.args = '-dbtype nucl -blastdb_version 4'
}
withName: NCBITOOLS_VECSCREEN {
ext.args = '-f3'
}
}

// The BioContainer runs things as the user `biodocker`, which has no write
// access to the directory and can't even touch `.command.trace`.
// Setting `userEmulation` to true fixes things, but only if actually loaded !
// There has to be an explicit `-c ./tests/modules/nf-core/ncbitools/vecscreen/nextflow.config`
// in the test.yml
docker.userEmulation = true

9 changes: 9 additions & 0 deletions tests/modules/nf-core/ncbitools/vecscreen/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
- name: "ncbitools vecscreen"
command: nextflow run ./tests/modules/nf-core/ncbitools/vecscreen -entry test_ncbitools_vecscreen -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/ncbitools/vecscreen/nextflow.config
tags:
- "ncbitools"
- "ncbitools/vecscreen"
files:
- path: "output/ncbitools/test.vecscreen.out"
md5sum: b1e5bff88cbc6195bf730e13dc7f5873
- path: "output/ncbitools/versions.yml"

0 comments on commit 1e4ac4a

Please sign in to comment.