diff --git a/README.md b/README.md index 6ae0ff05e..7787921e0 100644 --- a/README.md +++ b/README.md @@ -283,7 +283,7 @@ To learn more about the docker pull rate limits and the open source software pro | [VIBRANT](https://hub.docker.com/r/staphb/vibrant/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/vibrant)](https://hub.docker.com/r/staphb/vibrant) | | https://github.com/AnantharamanLab/VIBRANT | | [VIGOR4](https://hub.docker.com/r/staphb/vigor4/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/vigor4)](https://hub.docker.com/r/staphb/vigor4) | | https://github.com/JCVenterInstitute/VIGOR4 | | [VirSorter2](https://hub.docker.com/r/staphb/virsorter2/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/virsorter2)](https://hub.docker.com/r/staphb/virsorter2/) | | https://github.com/jiarong/VirSorter2 | -| [VirulenceFinder](https://hub.docker.com/r/staphb/virulencefinder/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/virulencefinder)](https://hub.docker.com/r/staphb/virulencefinder/) | | https://bitbucket.org/genomicepidemiology/virulencefinder/src/master/
https://bitbucket.org/genomicepidemiology/virulencefinder_db/src/master/ | +| [VirulenceFinder](https://hub.docker.com/r/staphb/virulencefinder/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/virulencefinder)](https://hub.docker.com/r/staphb/virulencefinder/) | | https://bitbucket.org/genomicepidemiology/virulencefinder/src/master/
https://bitbucket.org/genomicepidemiology/virulencefinder_db/src/master/ | | [wtdbg2](https://hub.docker.com/r/staphb/wtdbg2/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/wtdbg2)](https://hub.docker.com/r/staphb/wtdbg2) | | https://github.com/ruanjue/wtdbg2 | You can also view the list of images on Docker hub here: https://hub.docker.com/r/staphb/ diff --git a/virulencefinder/3.0.0/Dockerfile b/virulencefinder/3.0.0/Dockerfile new file mode 100644 index 000000000..3588a1873 --- /dev/null +++ b/virulencefinder/3.0.0/Dockerfile @@ -0,0 +1,129 @@ +ARG VIRULENCEFINDER_VER="3.0.0" +ARG VIRULENCEFINDER_DB_VER="2.0.0" +# Database sometimes is not properly versioned, so using most recent commit made on 2024-04-06 would be something like +# see here: https://bitbucket.org/genomicepidemiology/virulencefinder_db/commits/bcf7f0b26271a59ca85715fa2ab8a0c380e5357b +# ARG VIRULENCEFINDER_DB_COMMIT_HASH="bcf7f0b26271a59ca85715fa2ab8a0c380e5357b" + +FROM ubuntu:jammy AS app + +# re-instantiating for use in the app layer +ARG VIRULENCEFINDER_VER +ARG VIRULENCEFINDER_DB_COMMIT_HASH +ARG VIRULENCEFINDER_DB_VER +ARG KMA_VER="1.4.15" + +# metadata +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="VirulenceFinder" +LABEL software.version="${VIRULENCEFINDER_VER}" +LABEL description="Tool for identifying the virulence genes in E. coli, Enterococcus, Staphylococcus aureus, & Listeria from reads or assemblies" +LABEL website="https://bitbucket.org/genomicepidemiology/virulencefinder" +LABEL license="https://bitbucket.org/genomicepidemiology/virulencefinder/src/master/" +LABEL maintainer="Curtis Kapsak" +LABEL maintainer.email="kapsakcj@gmail.com" +LABEL maintainer1="Erin Young" +LABEL maintainer1.email="eriny@utah.gov" + +# install dependencies; cleanup apt garbage +# ncbi-blast+ v2.12.0 (ubuntu:jammy), min required version is 2.8.1 +# python3 v3.10.12, min required version is 3.10 +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + procps \ + git \ + ncbi-blast+ \ + python3 \ + python3-pip \ + python3-setuptools \ + python3-dev \ + gcc \ + make \ + libz-dev \ + dos2unix \ + unzip \ + python-is-python3 && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* && \ + update-alternatives --install /usr/bin/python python /usr/bin/python3 10 + +# Install kma +# apt deps: libz-dev (for compiling) +RUN git clone --branch ${KMA_VER} --depth 1 https://bitbucket.org/genomicepidemiology/kma.git && \ + cd kma &&\ + make &&\ + mv kma kma_index kma_shm kma_update /usr/local/bin/ + +# download VIRULENCEFINDER database +# index database w/ kma +# NOTE: files HAVE to go into '/database' since that is the default location expected by virulencefinder +# dos2unix on the FASTA files to ensure they have LF line endings +RUN mkdir /database && \ + git clone --depth 1 https://bitbucket.org/genomicepidemiology/virulencefinder_db.git /databases && \ + cd /databases && \ + git fetch --depth 1 origin tag ${VIRULENCEFINDER_DB_VER} && \ + rm -rf .git && \ + dos2unix *.fsa && \ + python3 INSTALL.py kma_index + +# install virulencefinder to specific tag/version; make /data +RUN git clone --branch ${VIRULENCEFINDER_VER} --depth 1 https://bitbucket.org/genomicepidemiology/virulencefinder.git && \ + rm -rf /virulencefinder/.git && \ + cd /virulencefinder && \ + pip3 install . && \ + mkdir /data + +# set $PATH and locale settings for singularity compatibility +ENV PATH="/virulencefinder:${PATH}" \ + LC_ALL=C.UTF-8 \ + CGE_BLASTN=/usr/bin/blastn \ + CGE_VIRULENCEFINDER_DB=/databases + +# set final working directory for production docker image (app layer only) +WORKDIR /data + +# force bash shell so below lines to make an alias runs properly +SHELL ["/bin/bash", "-c"] + +# setting a janky alias for everyone that uses the "latest" tag +RUN echo -e '#!/bin/bash\npython -m virulencefinder "$@"' > /usr/bin/virulencefinder.py && \ + chmod +x /usr/bin/virulencefinder.py + +# default command is to pull up help options for virulencefinder +CMD [ "python", "-m", "virulencefinder", "-h" ] + +### START OF TEST STAGE ### +FROM app AS test + +RUN python -m virulencefinder -h && /usr/bin/virulencefinder.py -h + +# set working directory for test layer +WORKDIR /test + +# download an example assembly; test with VirulenceFinder +# Escherichia coli complete genome (Unicycler assembly) +# GenBank Nucleotide entry: https://www.ncbi.nlm.nih.gov/nuccore/CP113091.1/ +# BioSample:SAMN08799860 +RUN mkdir asm-input && \ + wget -q https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/012/224/845/GCA_012224845.2_ASM1222484v2/GCA_012224845.2_ASM1222484v2_genomic.fna.gz && \ + gunzip GCA_012224845.2_ASM1222484v2_genomic.fna.gz && \ + python -m virulencefinder -h && \ + which blastn && \ + head -n 5 /test/GCA_012224845.2_ASM1222484v2_genomic.fna && \ + python -m virulencefinder -ifa /test/GCA_012224845.2_ASM1222484v2_genomic.fna --extented_output -o asm-input && \ + ls asm-input && \ + cat asm-input/results_tab.tsv + +# download Illumina reads for the same sample ^ and test reads as input into VirulenceFinder +RUN mkdir /test/reads-input && \ + wget -q ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR690/006/SRR6903006/SRR6903006_1.fastq.gz && \ + wget -q ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR690/006/SRR6903006/SRR6903006_2.fastq.gz && \ + python -m virulencefinder -ifq SRR6903006_1.fastq.gz SRR6903006_2.fastq.gz --extented_output -o /test/reads-input && \ + cat /test/reads-input/results_tab.tsv + +# test using FASTA supplied with VirulenceFinder code; print help options +# expect to see hits to astA and 2 stx genes; unfortunately it finds astA and 3 stx genes (that don't match) +# issue created here: https://bitbucket.org/genomicepidemiology/virulencefinder/issues/11/test-results-do-not-match-expected-results +RUN cd /virulencefinder/tests && \ + python -m virulencefinder -ifa data/test.fsa -o . && \ + ls diff --git a/virulencefinder/3.0.0/README.md b/virulencefinder/3.0.0/README.md new file mode 100644 index 000000000..393ad9adb --- /dev/null +++ b/virulencefinder/3.0.0/README.md @@ -0,0 +1,115 @@ +# VirulenceFinder 3.0.0 Docker Image + +A docker image that contains VirulenceFinder, a tool for identifying virulence factors in E. coli, Enterococcus, S. aureus, and Listeria isolates from reads or assemblies + +[Link to StaPH-B DockerHub repository](https://hub.docker.com/r/staphb/virulencefinder) + +Main tool: + +- Main Code Repo: [https://bitbucket.org/genomicepidemiology/virulencefinder/src/master/](https://bitbucket.org/genomicepidemiology/virulencefinder/src/master/) +- VirulenceFinder database: [https://bitbucket.org/genomicepidemiology/virulencefinder_db/src/master/](https://bitbucket.org/genomicepidemiology/virulencefinder_db/src/master/) +- You may be familiar with the web version of VirulenceFinder: [https://cge.food.dtu.dk/services/VirulenceFinder/](https://cge.food.dtu.dk/services/VirulenceFinder/) + +Additional tools: + +- python 3.10.12 +- cgecore 1.5.6 +- biopython 1.84 +- numpy 2.1.2 +- pandas 2.2.3 +- tabulate 0.9.0 +- [kma](https://bitbucket.org/genomicepidemiology/kma/src/master/) 1.4.15 +- ncbi-blast+ 2.12.0 + +Database version: + +VirulenceFinder database version: tag 2.0.0 made on 2024-05-29. [Link to commit history](https://bitbucket.org/genomicepidemiology/virulencefinder_db/downloads/?tab=tags) + +## Usage + +```bash +usage: virulencefinder.py [-h] -i INFILE [INFILE ...] [-o OUTDIR] [-tmp TMP_DIR] [-mp METHOD_PATH] [-p DB_PATH] [-d DATABASES] [-l MIN_COV] [-t THRESHOLD] [-x] [-q] + +optional arguments: + -h, --help show this help message and exit + -i INFILE [INFILE ...], --infile INFILE [INFILE ...] + FASTA or FASTQ input files. + -o OUTDIR, --outputPath OUTDIR + Path to blast output + -tmp TMP_DIR, --tmp_dir TMP_DIR + Temporary directory for storage of the results from the external software. + -mp METHOD_PATH, --methodPath METHOD_PATH + Path to method to use (kma or blastn) + -p DB_PATH, --databasePath DB_PATH + Path to the databases + -d DATABASES, --databases DATABASES + Databases chosen to search in - if non is specified all is used + -l MIN_COV, --mincov MIN_COV + Minimum coverage + -t THRESHOLD, --threshold THRESHOLD + Minimum threshold for identity + -x, --extented_output + Give extented output with allignment files, template and query hits in fasta and a tab seperated file with gene profile results + -q, --quiet +``` + +## Notes and Recommendations + +- You do not need to supply a database or use the `-p` or `-d` flags + - Database is included in the image and is in the default/expected location within the image filesystem: `/database` + - (*NOT RECOMMENDED*) If you do need to use your own database, you will need to first index it with `kma` and use the `virulencefinder.py -p` flag. You can find instructions for this on the VirulenceFinder Bitbucket README. `kma` is included in this docker image for database indexing. + - VirulenceFinder does **NOT** create an output directory when you use the `-o` flag. You MUST create it beforehand or it will throw an error. + - **Default % Identity threshold: 90%**. Adjust with `-t 0.95` + - **Default % coverage threshold: 60%**. Adjust with `-l 0.70` + - Use the `-x` flag (extended output) if you want the traditional/legacy VirulenceFinder output files `results_tab.tsv results.txt Virulence_genes.fsa Hit_in_genome_seq.fsa`. Otherwise you will need to parse the default output file `data.json` for results + - (*RECOMMENDED*) Use raw reads due to the increased sensitivity (without loss of specificity) and the additional information gleaned from KMA output (specifically the depth metric). You also save time from having to assemble the genome first. [CITATION NEEDED, PROBABLY THE KMA PAPER] +- Querying reads: + - This will run VirulenceFinder with `kma` (instead of ncbi-blast+) + - Only one of the PE read files is necessary. There is likely little benefit to using both R1 and R2. It will take longer to run if you use both R1 and R2 files. +- Querying assemblies: + - This will run VirulenceFinder with `ncbi-blast+` + - VirulenceFinder does not clean up after itself. `tmp/` (which contains 7 different `.xml` files) will exist in the specified output directory + +## Example Usage: Docker + +```bash +# download the image +$ docker pull staphb/virulencefinder:latest + +# input files are in my PWD +$ ls +E-coli.skesa.fasta E-coli.R1.fastq.gz E-coli.R2.fastq.gz + +# make an output directory +$ mkdir output-dir-reads output-dir-asm + +# query reads, mount PWD to /data inside container (broken into two lines for readabilty) +$ docker run --rm -u $(id -u):$(id -g) -v $PWD:/data staphb/virulencefinder:latest \ + virulencefinder.py -i /data/E-coli.R1.fastq.gz -o /data/output-dir-reads + +# query assembly +$ docker run --rm -u $(id -u):$(id -g) -v $PWD:/data staphb/virulencefinder:latest \ + virulencefinder.py -i /data/E-coli.skesa.fasta -o /data/output-dir-asm +``` + +## Example Usage: Singularity + +```bash +# download the image +$ singularity build virulencefinder.latest.sif docker://staphb/virulencefinder:latest + +# files are in my PWD +$ ls +E-coli.skesa.fasta E-coli.R1.fastq.gz E-coli.R2.fastq.gz + +# make an output directory +$ mkdir output-dir-reads output-dir-asm + +# query reads; mount PWD to /data inside container +$ singularity exec --no-home -B $PWD:/data virulencefinder.latest.sif \ + virulencefinder.py -i /data/E-coli.R1.fastq.gz -o /data/output-dir-reads + +# assembly +$ singularity exec --no-home -B $PWD:/data virulencefinder.latest.sif \ + virulencefinder.py -i /data/E-coli.skesa.fasta -o /data/output-dir-asm +```