From edc39c132802970341014def0155cf2a9c76a12d Mon Sep 17 00:00:00 2001 From: Erin Young Date: Thu, 21 Mar 2024 17:06:54 +0000 Subject: [PATCH 01/10] adding virulencefinder version 2.0.5 --- README.md | 2 +- virulencefinder/2.0.5/Dockerfile | 108 ++++++++++++++++++++++++++++ virulencefinder/2.0.5/README.md | 119 +++++++++++++++++++++++++++++++ 3 files changed, 228 insertions(+), 1 deletion(-) create mode 100644 virulencefinder/2.0.5/Dockerfile create mode 100644 virulencefinder/2.0.5/README.md diff --git a/README.md b/README.md index 6ae0ff05e..60c026b93 100644 --- a/README.md +++ b/README.md @@ -283,7 +283,7 @@ To learn more about the docker pull rate limits and the open source software pro | [VIBRANT](https://hub.docker.com/r/staphb/vibrant/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/vibrant)](https://hub.docker.com/r/staphb/vibrant) | | https://github.com/AnantharamanLab/VIBRANT | | [VIGOR4](https://hub.docker.com/r/staphb/vigor4/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/vigor4)](https://hub.docker.com/r/staphb/vigor4) | | https://github.com/JCVenterInstitute/VIGOR4 | | [VirSorter2](https://hub.docker.com/r/staphb/virsorter2/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/virsorter2)](https://hub.docker.com/r/staphb/virsorter2/) | | https://github.com/jiarong/VirSorter2 | -| [VirulenceFinder](https://hub.docker.com/r/staphb/virulencefinder/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/virulencefinder)](https://hub.docker.com/r/staphb/virulencefinder/) | | https://bitbucket.org/genomicepidemiology/virulencefinder/src/master/
https://bitbucket.org/genomicepidemiology/virulencefinder_db/src/master/ | +| [VirulenceFinder](https://hub.docker.com/r/staphb/virulencefinder/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/virulencefinder)](https://hub.docker.com/r/staphb/virulencefinder/) | | https://bitbucket.org/genomicepidemiology/virulencefinder/src/master/
https://bitbucket.org/genomicepidemiology/virulencefinder_db/src/master/ | | [wtdbg2](https://hub.docker.com/r/staphb/wtdbg2/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/wtdbg2)](https://hub.docker.com/r/staphb/wtdbg2) | | https://github.com/ruanjue/wtdbg2 | You can also view the list of images on Docker hub here: https://hub.docker.com/r/staphb/ diff --git a/virulencefinder/2.0.5/Dockerfile b/virulencefinder/2.0.5/Dockerfile new file mode 100644 index 000000000..1ea63dcfa --- /dev/null +++ b/virulencefinder/2.0.5/Dockerfile @@ -0,0 +1,108 @@ +ARG VIRULENCEFINDER_VER="2.0.5" +# Database not properly versioned, so using most recent commit made on 2024-01-02 +# see here: https://bitbucket.org/genomicepidemiology/virulencefinder_db/commits/2b705359191a24f6db64f891ab07c93b0281e685 +ARG VIRULENCEFINDER_DB_COMMIT_HASH="2b705359191a24f6db64f891ab07c93b0281e685" + +FROM ubuntu:focal as app + +# re-instantiating for use in the app layer +ARG VIRULENCEFINDER_VER +ARG VIRULENCEFINDER_DB_COMMIT_HASH +ARG KMA_VER="1.4.14" + +# metadata +LABEL base.image="ubuntu:focal" +LABEL dockerfile.version="1" +LABEL software="VirulenceFinder" +LABEL software.version="${VIRULENCEFINDER_VER}" +LABEL description="Tool for identifying the virulence genes in E. coli, Enterococcus, Staphylococcus aureus, & Listeria from reads or assemblies" +LABEL website="https://bitbucket.org/genomicepidemiology/virulencefinder/src/master/" +LABEL license="https://bitbucket.org/genomicepidemiology/virulencefinder/src/master/" +LABEL maintainer="Curtis Kapsak" +LABEL maintainer.email="kapsakcj@gmail.com" + +# install dependencies; cleanup apt garbage +# ncbi-blast+ v2.9.0-2 (ubuntu:focal), min required version is 2.8.1 +# python3 v3.8.10, min required version is 3.5 +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + procps \ + git \ + ncbi-blast+ \ + python3 \ + python3-pip \ + python3-setuptools \ + python3-dev \ + gcc \ + make \ + libz-dev \ + dos2unix \ + unzip && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# install python dependencies +RUN pip3 install biopython==1.73 tabulate==0.7.7 cgecore==1.5.5 + +# Install kma +# apt deps: libz-dev (for compiling) +RUN git clone --branch ${KMA_VER} --depth 1 https://bitbucket.org/genomicepidemiology/kma.git && \ + cd kma &&\ + make &&\ + mv kma kma_index kma_shm kma_update /usr/local/bin/ + +# download VIRULENCEFINDER database using a specific commit hash to aid in reproducibility +# index database w/ kma +# NOTE: files HAVE to go into '/database' since that is the default location expected by serotyperfinder.py +# dos2unix on the FASTA files to ensure they have LF line endings +RUN mkdir /database && \ + git clone https://bitbucket.org/genomicepidemiology/virulencefinder_db.git /database && \ + cd /database && \ + git checkout ${VIRULENCEFINDER_DB_COMMIT_HASH} && \ + dos2unix *.fsa && \ + python3 INSTALL.py kma_index + +# install virulencefinder to specific tag/version; make /data +RUN git clone --branch ${VIRULENCEFINDER_VER} https://bitbucket.org/genomicepidemiology/virulencefinder.git && \ + mkdir /data + +# set $PATH and locale settings for singularity compatibility +ENV PATH="/virulencefinder:${PATH}" \ + LC_ALL=C.UTF-8 + +# set final working directory for production docker image (app layer only) +WORKDIR /data + +# default command is to pull up help options for virulencefinder +CMD [ "virulencefinder.py", "-h"] + +### START OF TEST STAGE ### +FROM app as test + +RUN virulencefinder.py -h + +# set working directory for test layer +WORKDIR /test + +# download an example assembly; test with VirulenceFinder +# Escherichia coli complete genome (Unicycler assembly) +# GenBank Nucleotide entry: https://www.ncbi.nlm.nih.gov/nuccore/CP113091.1/ +# BioSample:SAMN08799860 +RUN mkdir -v /test/asm-input && \ + wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/012/224/845/GCA_012224845.2_ASM1222484v2/GCA_012224845.2_ASM1222484v2_genomic.fna.gz && \ + gunzip GCA_012224845.2_ASM1222484v2_genomic.fna.gz && \ + virulencefinder.py -i /test/GCA_012224845.2_ASM1222484v2_genomic.fna -x -o /test/asm-input && \ + cat /test/asm-input/results_tab.tsv + +# download Illumina reads for the same sample ^ and test reads as input into VirulenceFinder +RUN mkdir /test/reads-input && \ + wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR690/006/SRR6903006/SRR6903006_1.fastq.gz && \ + virulencefinder.py -i SRR6903006_1.fastq.gz -mp kma -x -o /test/reads-input && \ + cat /test/reads-input/results_tab.tsv + +# test using FASTA supplied with VirulenceFinder code; print help options +# expect to see hits to astA and 2 stx genes; unfortunately it finds astA and 3 stx genes (that don't match) +# issue created here: https://bitbucket.org/genomicepidemiology/virulencefinder/issues/11/test-results-do-not-match-expected-results +RUN cd /virulencefinder/test && \ + virulencefinder.py -i test.fsa -o . -mp blastn -x -q && \ + virulencefinder.py --help diff --git a/virulencefinder/2.0.5/README.md b/virulencefinder/2.0.5/README.md new file mode 100644 index 000000000..a9831fc7f --- /dev/null +++ b/virulencefinder/2.0.5/README.md @@ -0,0 +1,119 @@ +# VirulenceFinder Docker Image + +A docker image that contains VirulenceFinder, a tool for identifying virulence factors in E. coli isolates from reads or assemblies + +[Link to StaPH-B DockerHub repository](https://hub.docker.com/r/staphb/virulencefinder) + +Main tool: + +- Main Code Repo: [https://bitbucket.org/genomicepidemiology/virulencefinder/src/master/](https://bitbucket.org/genomicepidemiology/virulencefinder/src/master/) +- VirulenceFinder database: [https://bitbucket.org/genomicepidemiology/virulencefinder_db/src/master/](https://bitbucket.org/genomicepidemiology/virulencefinder_db/src/master/) +- You may be familiar with the web version of VirulenceFinder: [https://cge.food.dtu.dk/services/VirulenceFinder/](https://cge.food.dtu.dk/services/VirulenceFinder/) + +Additional tools: + +- python 3.8.10 +- biopython 1.73 +- [kma](https://bitbucket.org/genomicepidemiology/kma/src/master/) 1.0.0 +- ncbi-blast+ 2.9.0 + +## Version information + +VirulenceFinder version: 2.0.4 [https://bitbucket.org/genomicepidemiology/virulencefinder/src/2.0.4/](https://bitbucket.org/genomicepidemiology/virulencefinder/src/2.0.4/) made on 2020-02-06 + +VirulenceFinder database version: commit `f678bdc15283aed3a45f66050d2eb3a6c9651f3f` made on 2023‑05‑03. [Link to commit history](https://bitbucket.org/genomicepidemiology/virulencefinder_db/commits/) + +## Requirements + +- Docker or Singularity +- E. coli raw reads (fastq.gz) or assembly (fasta) + - Illumina, Ion Torrent, Roche 454, SOLiD, Oxford Nanopore, and PacBio reads are supported. (I've only tested Illumina reads) + +## Usage + +```bash +usage: virulencefinder.py [-h] -i INFILE [INFILE ...] [-o OUTDIR] [-tmp TMP_DIR] [-mp METHOD_PATH] [-p DB_PATH] [-d DATABASES] [-l MIN_COV] [-t THRESHOLD] [-x] [-q] + +optional arguments: + -h, --help show this help message and exit + -i INFILE [INFILE ...], --infile INFILE [INFILE ...] + FASTA or FASTQ input files. + -o OUTDIR, --outputPath OUTDIR + Path to blast output + -tmp TMP_DIR, --tmp_dir TMP_DIR + Temporary directory for storage of the results from the external software. + -mp METHOD_PATH, --methodPath METHOD_PATH + Path to method to use (kma or blastn) + -p DB_PATH, --databasePath DB_PATH + Path to the databases + -d DATABASES, --databases DATABASES + Databases chosen to search in - if non is specified all is used + -l MIN_COV, --mincov MIN_COV + Minimum coverage + -t THRESHOLD, --threshold THRESHOLD + Minimum threshold for identity + -x, --extented_output + Give extented output with allignment files, template and query hits in fasta and a tab seperated file with gene profile results + -q, --quiet +``` + +## Notes and Recommendations + +- You do not need to supply a database or use the `-p` or `-d` flags + - Database is included in the image and is in the default/expected location within the image filesystem: `/database` + - (*NOT RECOMMENDED*) If you do need to use your own database, you will need to first index it with `kma` and use the `virulencefinder.py -p` flag. You can find instructions for this on the VirulenceFinder Bitbucket README. `kma` is included in this docker image for database indexing. + - VirulenceFinder does **NOT** create an output directory when you use the `-o` flag. You MUST create it beforehand or it will throw an error. + - **Default % Identity threshold: 90%**. Adjust with `-t 0.95` + - **Default % coverage threshold: 60%**. Adjust with `-l 0.70` + - Use the `-x` flag (extended output) if you want the traditional/legacy VirulenceFinder output files `results_tab.tsv results.txt Virulence_genes.fsa Hit_in_genome_seq.fsa`. Otherwise you will need to parse the default output file `data.json` for results + - (*RECOMMENDED*) Use raw reads due to the increased sensitivity (without loss of specificity) and the additional information gleaned from KMA output (specifically the depth metric). You also save time from having to assemble the genome first. [CITATION NEEDED, PROBABLY THE KMA PAPER] +- Querying reads: + - This will run VirulenceFinder with `kma` (instead of ncbi-blast+) + - Only one of the PE read files is necessary. There is likely little benefit to using both R1 and R2. It will take longer to run if you use both R1 and R2 files. +- Querying assemblies: + - This will run VirulenceFinder with `ncbi-blast+` + - VirulenceFinder does not clean up after itself. `tmp/` (which contains 7 different `.xml` files) will exist in the specified output directory + +## Example Usage: Docker + +```bash +# download the image +$ docker pull staphb/virulencefinder:2.0.4 + +# input files are in my PWD +$ ls +E-coli.skesa.fasta E-coli.R1.fastq.gz E-coli.R2.fastq.gz + +# make an output directory +$ mkdir output-dir-reads output-dir-asm + +# query reads, mount PWD to /data inside container (broken into two lines for readabilty) +$ docker run --rm -u $(id -u):$(id -g) -v $PWD:/data staphb/virulencefinder:2.0.1 \ + virulencefinder.py -i /data/E-coli.R1.fastq.gz -o /data/output-dir-reads + +# query assembly +$ docker run --rm -u $(id -u):$(id -g) -v $PWD:/data staphb/virulencefinder:2.0.1 \ + virulencefinder.py -i /data/E-coli.skesa.fasta -o /data/output-dir-asm +``` + +## Example Usage: Singularity + +```bash +# download the image +$ singularity build virulencefinder.2.0.4.sif docker://staphb/virulencefinder:2.0.4 + +# files are in my PWD +$ ls +E-coli.skesa.fasta E-coli.R1.fastq.gz E-coli.R2.fastq.gz + +# make an output directory +$ mkdir output-dir-reads output-dir-asm + +# query reads; mount PWD to /data inside container +$ singularity exec --no-home -B $PWD:/data virulencefinder.2.0.4.sif \ + virulencefinder.py -i /data/E-coli.R1.fastq.gz -o /data/output-dir-reads + +# assembly +$ singularity exec --no-home -B $PWD:/data virulencefinder.2.0.4.sif \ + virulencefinder.py -i /data/E-coli.skesa.fasta -o /data/output-dir-asm +``` From 25c21766d74e9026a1728d8d0a4c0f633d6b7cba Mon Sep 17 00:00:00 2001 From: Erin Young Date: Thu, 21 Mar 2024 17:13:53 +0000 Subject: [PATCH 02/10] updated versions --- virulencefinder/2.0.5/README.md | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/virulencefinder/2.0.5/README.md b/virulencefinder/2.0.5/README.md index a9831fc7f..e60f0a915 100644 --- a/virulencefinder/2.0.5/README.md +++ b/virulencefinder/2.0.5/README.md @@ -14,14 +14,12 @@ Additional tools: - python 3.8.10 - biopython 1.73 -- [kma](https://bitbucket.org/genomicepidemiology/kma/src/master/) 1.0.0 +- [kma](https://bitbucket.org/genomicepidemiology/kma/src/master/) 1.4.14 - ncbi-blast+ 2.9.0 -## Version information +Database version: -VirulenceFinder version: 2.0.4 [https://bitbucket.org/genomicepidemiology/virulencefinder/src/2.0.4/](https://bitbucket.org/genomicepidemiology/virulencefinder/src/2.0.4/) made on 2020-02-06 - -VirulenceFinder database version: commit `f678bdc15283aed3a45f66050d2eb3a6c9651f3f` made on 2023‑05‑03. [Link to commit history](https://bitbucket.org/genomicepidemiology/virulencefinder_db/commits/) +VirulenceFinder database version: commit `2b705359191a24f6db64f891ab07c93b0281e685` made on 2024-01-02. [Link to commit history](https://bitbucket.org/genomicepidemiology/virulencefinder_db/commits/) ## Requirements @@ -78,7 +76,7 @@ optional arguments: ```bash # download the image -$ docker pull staphb/virulencefinder:2.0.4 +$ docker pull staphb/virulencefinder:latest # input files are in my PWD $ ls @@ -88,11 +86,11 @@ E-coli.skesa.fasta E-coli.R1.fastq.gz E-coli.R2.fastq.gz $ mkdir output-dir-reads output-dir-asm # query reads, mount PWD to /data inside container (broken into two lines for readabilty) -$ docker run --rm -u $(id -u):$(id -g) -v $PWD:/data staphb/virulencefinder:2.0.1 \ +$ docker run --rm -u $(id -u):$(id -g) -v $PWD:/data staphb/virulencefinder:latest \ virulencefinder.py -i /data/E-coli.R1.fastq.gz -o /data/output-dir-reads # query assembly -$ docker run --rm -u $(id -u):$(id -g) -v $PWD:/data staphb/virulencefinder:2.0.1 \ +$ docker run --rm -u $(id -u):$(id -g) -v $PWD:/data staphb/virulencefinder:latest \ virulencefinder.py -i /data/E-coli.skesa.fasta -o /data/output-dir-asm ``` @@ -100,7 +98,7 @@ $ docker run --rm -u $(id -u):$(id -g) -v $PWD:/data staphb/virulencefinder:2.0. ```bash # download the image -$ singularity build virulencefinder.2.0.4.sif docker://staphb/virulencefinder:2.0.4 +$ singularity build virulencefinder.latest.sif docker://staphb/virulencefinder:latest # files are in my PWD $ ls @@ -110,10 +108,10 @@ E-coli.skesa.fasta E-coli.R1.fastq.gz E-coli.R2.fastq.gz $ mkdir output-dir-reads output-dir-asm # query reads; mount PWD to /data inside container -$ singularity exec --no-home -B $PWD:/data virulencefinder.2.0.4.sif \ +$ singularity exec --no-home -B $PWD:/data virulencefinder.latest.sif \ virulencefinder.py -i /data/E-coli.R1.fastq.gz -o /data/output-dir-reads # assembly -$ singularity exec --no-home -B $PWD:/data virulencefinder.2.0.4.sif \ +$ singularity exec --no-home -B $PWD:/data virulencefinder.latest.sif \ virulencefinder.py -i /data/E-coli.skesa.fasta -o /data/output-dir-asm ``` From 1d2550e622e38e5e891e601e0d0bd893b0f3b7b6 Mon Sep 17 00:00:00 2001 From: Erin Young Date: Tue, 14 May 2024 21:41:22 +0000 Subject: [PATCH 03/10] updated db commit --- virulencefinder/2.0.5/Dockerfile | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/virulencefinder/2.0.5/Dockerfile b/virulencefinder/2.0.5/Dockerfile index 1ea63dcfa..0fb6e87cb 100644 --- a/virulencefinder/2.0.5/Dockerfile +++ b/virulencefinder/2.0.5/Dockerfile @@ -1,7 +1,7 @@ ARG VIRULENCEFINDER_VER="2.0.5" -# Database not properly versioned, so using most recent commit made on 2024-01-02 -# see here: https://bitbucket.org/genomicepidemiology/virulencefinder_db/commits/2b705359191a24f6db64f891ab07c93b0281e685 -ARG VIRULENCEFINDER_DB_COMMIT_HASH="2b705359191a24f6db64f891ab07c93b0281e685" +# Database not properly versioned, so using most recent commit made on 2024-04-06 +# see here: https://bitbucket.org/genomicepidemiology/virulencefinder_db/commits/bcf7f0b26271a59ca85715fa2ab8a0c380e5357b +ARG VIRULENCEFINDER_DB_COMMIT_HASH="bcf7f0b26271a59ca85715fa2ab8a0c380e5357b" FROM ubuntu:focal as app @@ -16,10 +16,12 @@ LABEL dockerfile.version="1" LABEL software="VirulenceFinder" LABEL software.version="${VIRULENCEFINDER_VER}" LABEL description="Tool for identifying the virulence genes in E. coli, Enterococcus, Staphylococcus aureus, & Listeria from reads or assemblies" -LABEL website="https://bitbucket.org/genomicepidemiology/virulencefinder/src/master/" +LABEL website="https://bitbucket.org/genomicepidemiology/virulencefinder" LABEL license="https://bitbucket.org/genomicepidemiology/virulencefinder/src/master/" LABEL maintainer="Curtis Kapsak" LABEL maintainer.email="kapsakcj@gmail.com" +LABEL maintainer1="Erin Young" +LABEL maintainer1.email="eriny@utah.gov" # install dependencies; cleanup apt garbage # ncbi-blast+ v2.9.0-2 (ubuntu:focal), min required version is 2.8.1 From a8c7ad27e4b954a31fb31de92e2aa055c136f0e7 Mon Sep 17 00:00:00 2001 From: Erin Young Date: Tue, 25 Jun 2024 22:30:44 +0000 Subject: [PATCH 04/10] adding virulencefinder version 3.0.0 --- README.md | 2 +- virulencefinder/{2.0.5 => 3.0.0}/Dockerfile | 71 +++++++++++++-------- virulencefinder/{2.0.5 => 3.0.0}/README.md | 10 +-- 3 files changed, 48 insertions(+), 35 deletions(-) rename virulencefinder/{2.0.5 => 3.0.0}/Dockerfile (56%) rename virulencefinder/{2.0.5 => 3.0.0}/README.md (92%) diff --git a/README.md b/README.md index 60c026b93..7787921e0 100644 --- a/README.md +++ b/README.md @@ -283,7 +283,7 @@ To learn more about the docker pull rate limits and the open source software pro | [VIBRANT](https://hub.docker.com/r/staphb/vibrant/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/vibrant)](https://hub.docker.com/r/staphb/vibrant) |
  • 1.2.1
| https://github.com/AnantharamanLab/VIBRANT | | [VIGOR4](https://hub.docker.com/r/staphb/vigor4/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/vigor4)](https://hub.docker.com/r/staphb/vigor4) |
  • 4.1.20190131
| https://github.com/JCVenterInstitute/VIGOR4 | | [VirSorter2](https://hub.docker.com/r/staphb/virsorter2/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/virsorter2)](https://hub.docker.com/r/staphb/virsorter2/) |
  • 2.1
| https://github.com/jiarong/VirSorter2 | -| [VirulenceFinder](https://hub.docker.com/r/staphb/virulencefinder/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/virulencefinder)](https://hub.docker.com/r/staphb/virulencefinder/) |
  • [2.0.4](virulencefinder/2.0.4/)
  • [2.0.5](virulencefinder/2.0.5/)
| https://bitbucket.org/genomicepidemiology/virulencefinder/src/master/
https://bitbucket.org/genomicepidemiology/virulencefinder_db/src/master/ | +| [VirulenceFinder](https://hub.docker.com/r/staphb/virulencefinder/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/virulencefinder)](https://hub.docker.com/r/staphb/virulencefinder/) |
  • [2.0.4](virulencefinder/2.0.4/)
  • [3.0.0](virulencefinder/3.0.0/)
| https://bitbucket.org/genomicepidemiology/virulencefinder/src/master/
https://bitbucket.org/genomicepidemiology/virulencefinder_db/src/master/ | | [wtdbg2](https://hub.docker.com/r/staphb/wtdbg2/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/wtdbg2)](https://hub.docker.com/r/staphb/wtdbg2) |
  • 2.5
| https://github.com/ruanjue/wtdbg2 | You can also view the list of images on Docker hub here: https://hub.docker.com/r/staphb/ diff --git a/virulencefinder/2.0.5/Dockerfile b/virulencefinder/3.0.0/Dockerfile similarity index 56% rename from virulencefinder/2.0.5/Dockerfile rename to virulencefinder/3.0.0/Dockerfile index 0fb6e87cb..71abff3a4 100644 --- a/virulencefinder/2.0.5/Dockerfile +++ b/virulencefinder/3.0.0/Dockerfile @@ -1,17 +1,19 @@ -ARG VIRULENCEFINDER_VER="2.0.5" -# Database not properly versioned, so using most recent commit made on 2024-04-06 +ARG VIRULENCEFINDER_VER="3.0.0" +ARG VIRULENCEFINDER_DB_VER="2.0.0" +# Database sometimes is not properly versioned, so using most recent commit made on 2024-04-06 would be something like # see here: https://bitbucket.org/genomicepidemiology/virulencefinder_db/commits/bcf7f0b26271a59ca85715fa2ab8a0c380e5357b -ARG VIRULENCEFINDER_DB_COMMIT_HASH="bcf7f0b26271a59ca85715fa2ab8a0c380e5357b" +# ARG VIRULENCEFINDER_DB_COMMIT_HASH="bcf7f0b26271a59ca85715fa2ab8a0c380e5357b" -FROM ubuntu:focal as app +FROM ubuntu:jammy as app # re-instantiating for use in the app layer ARG VIRULENCEFINDER_VER ARG VIRULENCEFINDER_DB_COMMIT_HASH -ARG KMA_VER="1.4.14" +ARG VIRULENCEFINDER_DB_VER +ARG KMA_VER="1.4.15" # metadata -LABEL base.image="ubuntu:focal" +LABEL base.image="ubuntu:jammy" LABEL dockerfile.version="1" LABEL software="VirulenceFinder" LABEL software.version="${VIRULENCEFINDER_VER}" @@ -40,8 +42,10 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ make \ libz-dev \ dos2unix \ - unzip && \ - apt-get autoclean && rm -rf /var/lib/apt/lists/* + unzip \ + python-is-python3 && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* && \ + update-alternatives --install /usr/bin/python python /usr/bin/python3 10 # install python dependencies RUN pip3 install biopython==1.73 tabulate==0.7.7 cgecore==1.5.5 @@ -53,35 +57,45 @@ RUN git clone --branch ${KMA_VER} --depth 1 https://bitbucket.org/genomicepidemi make &&\ mv kma kma_index kma_shm kma_update /usr/local/bin/ -# download VIRULENCEFINDER database using a specific commit hash to aid in reproducibility +# download VIRULENCEFINDER database # index database w/ kma -# NOTE: files HAVE to go into '/database' since that is the default location expected by serotyperfinder.py +# NOTE: files HAVE to go into '/database' since that is the default location expected by virulencefinder # dos2unix on the FASTA files to ensure they have LF line endings RUN mkdir /database && \ - git clone https://bitbucket.org/genomicepidemiology/virulencefinder_db.git /database && \ - cd /database && \ - git checkout ${VIRULENCEFINDER_DB_COMMIT_HASH} && \ + git clone --depth 1 https://bitbucket.org/genomicepidemiology/virulencefinder_db.git /databases && \ + cd /databases && \ + git fetch --depth 1 origin tag ${VIRULENCEFINDER_DB_VER} && \ + rm -rf .git && \ dos2unix *.fsa && \ python3 INSTALL.py kma_index # install virulencefinder to specific tag/version; make /data -RUN git clone --branch ${VIRULENCEFINDER_VER} https://bitbucket.org/genomicepidemiology/virulencefinder.git && \ +RUN git clone --branch ${VIRULENCEFINDER_VER} --depth 1 https://bitbucket.org/genomicepidemiology/virulencefinder.git && \ + rm -rf /virulencefinder/.git && \ + cd /virulencefinder && \ + pip3 install . && \ mkdir /data # set $PATH and locale settings for singularity compatibility ENV PATH="/virulencefinder:${PATH}" \ - LC_ALL=C.UTF-8 + LC_ALL=C.UTF-8 \ + CGE_BLASTN=/usr/bin/blastn \ + CGE_VIRULENCEFINDER_DB=/databases # set final working directory for production docker image (app layer only) WORKDIR /data +# setting a janky alias for everyone that uses the "latest" tag +RUN echo -e '#!/bin/bash\npython -m virulencefinder "$@"' > /usr/bin/virulencefinder.py && \ + chmod +x /usr/bin/virulencefinder.py + # default command is to pull up help options for virulencefinder -CMD [ "virulencefinder.py", "-h"] +CMD python -m virulencefinder -h ### START OF TEST STAGE ### FROM app as test -RUN virulencefinder.py -h +RUN python -m virulencefinder -h && /usr/bin/virulencefinder.py -h # set working directory for test layer WORKDIR /test @@ -90,21 +104,26 @@ WORKDIR /test # Escherichia coli complete genome (Unicycler assembly) # GenBank Nucleotide entry: https://www.ncbi.nlm.nih.gov/nuccore/CP113091.1/ # BioSample:SAMN08799860 -RUN mkdir -v /test/asm-input && \ - wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/012/224/845/GCA_012224845.2_ASM1222484v2/GCA_012224845.2_ASM1222484v2_genomic.fna.gz && \ +RUN mkdir asm-input && \ + wget -q https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/012/224/845/GCA_012224845.2_ASM1222484v2/GCA_012224845.2_ASM1222484v2_genomic.fna.gz && \ gunzip GCA_012224845.2_ASM1222484v2_genomic.fna.gz && \ - virulencefinder.py -i /test/GCA_012224845.2_ASM1222484v2_genomic.fna -x -o /test/asm-input && \ - cat /test/asm-input/results_tab.tsv + python -m virulencefinder -h && \ + which blastn && \ + head -n 5 /test/GCA_012224845.2_ASM1222484v2_genomic.fna && \ + python -m virulencefinder -ifa /test/GCA_012224845.2_ASM1222484v2_genomic.fna --extented_output -o asm-input && \ + ls asm-input && \ + cat asm-input/results_tab.tsv # download Illumina reads for the same sample ^ and test reads as input into VirulenceFinder RUN mkdir /test/reads-input && \ - wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR690/006/SRR6903006/SRR6903006_1.fastq.gz && \ - virulencefinder.py -i SRR6903006_1.fastq.gz -mp kma -x -o /test/reads-input && \ + wget -q ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR690/006/SRR6903006/SRR6903006_1.fastq.gz && \ + wget -q ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR690/006/SRR6903006/SRR6903006_2.fastq.gz && \ + python -m virulencefinder -ifq SRR6903006_1.fastq.gz SRR6903006_2.fastq.gz --extented_output -o /test/reads-input && \ cat /test/reads-input/results_tab.tsv # test using FASTA supplied with VirulenceFinder code; print help options # expect to see hits to astA and 2 stx genes; unfortunately it finds astA and 3 stx genes (that don't match) # issue created here: https://bitbucket.org/genomicepidemiology/virulencefinder/issues/11/test-results-do-not-match-expected-results -RUN cd /virulencefinder/test && \ - virulencefinder.py -i test.fsa -o . -mp blastn -x -q && \ - virulencefinder.py --help +RUN cd /virulencefinder/tests && \ + python -m virulencefinder -ifa data/test.fsa -o . && \ + ls diff --git a/virulencefinder/2.0.5/README.md b/virulencefinder/3.0.0/README.md similarity index 92% rename from virulencefinder/2.0.5/README.md rename to virulencefinder/3.0.0/README.md index e60f0a915..9f3c53542 100644 --- a/virulencefinder/2.0.5/README.md +++ b/virulencefinder/3.0.0/README.md @@ -14,18 +14,12 @@ Additional tools: - python 3.8.10 - biopython 1.73 -- [kma](https://bitbucket.org/genomicepidemiology/kma/src/master/) 1.4.14 +- [kma](https://bitbucket.org/genomicepidemiology/kma/src/master/) 1.4.15 - ncbi-blast+ 2.9.0 Database version: -VirulenceFinder database version: commit `2b705359191a24f6db64f891ab07c93b0281e685` made on 2024-01-02. [Link to commit history](https://bitbucket.org/genomicepidemiology/virulencefinder_db/commits/) - -## Requirements - -- Docker or Singularity -- E. coli raw reads (fastq.gz) or assembly (fasta) - - Illumina, Ion Torrent, Roche 454, SOLiD, Oxford Nanopore, and PacBio reads are supported. (I've only tested Illumina reads) +VirulenceFinder database version: tag 2.0.0 made on 2024-05-29. [Link to commit history](https://bitbucket.org/genomicepidemiology/virulencefinder_db/downloads/?tab=tags) ## Usage From 422b196029cb40625f4de2d32f927d7c99495f2d Mon Sep 17 00:00:00 2001 From: Curtis Kapsak Date: Sat, 12 Oct 2024 16:11:30 -0400 Subject: [PATCH 05/10] as -> AS in FROM lines want to silence warning and trigger automation to build --- virulencefinder/3.0.0/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/virulencefinder/3.0.0/Dockerfile b/virulencefinder/3.0.0/Dockerfile index 71abff3a4..997554745 100644 --- a/virulencefinder/3.0.0/Dockerfile +++ b/virulencefinder/3.0.0/Dockerfile @@ -4,7 +4,7 @@ ARG VIRULENCEFINDER_DB_VER="2.0.0" # see here: https://bitbucket.org/genomicepidemiology/virulencefinder_db/commits/bcf7f0b26271a59ca85715fa2ab8a0c380e5357b # ARG VIRULENCEFINDER_DB_COMMIT_HASH="bcf7f0b26271a59ca85715fa2ab8a0c380e5357b" -FROM ubuntu:jammy as app +FROM ubuntu:jammy AS app # re-instantiating for use in the app layer ARG VIRULENCEFINDER_VER @@ -93,7 +93,7 @@ RUN echo -e '#!/bin/bash\npython -m virulencefinder "$@"' > /usr/bin/virulencefi CMD python -m virulencefinder -h ### START OF TEST STAGE ### -FROM app as test +FROM app AS test RUN python -m virulencefinder -h && /usr/bin/virulencefinder.py -h From 3132cb7bc7b2f46c0765b0ac81156ce86d322dd7 Mon Sep 17 00:00:00 2001 From: Curtis Kapsak Date: Sat, 12 Oct 2024 16:14:45 -0400 Subject: [PATCH 06/10] added mentions of Enterococcus, S. aureus, and Listeria to virulencefinder readme --- virulencefinder/3.0.0/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virulencefinder/3.0.0/README.md b/virulencefinder/3.0.0/README.md index 9f3c53542..fcd855a30 100644 --- a/virulencefinder/3.0.0/README.md +++ b/virulencefinder/3.0.0/README.md @@ -1,6 +1,6 @@ # VirulenceFinder Docker Image -A docker image that contains VirulenceFinder, a tool for identifying virulence factors in E. coli isolates from reads or assemblies +A docker image that contains VirulenceFinder, a tool for identifying virulence factors in E. coli, Enterococcus, S. aureus, and Listeria isolates from reads or assemblies [Link to StaPH-B DockerHub repository](https://hub.docker.com/r/staphb/virulencefinder) From 72db8e4f028a73521a4dbb8ef6a8d239a98119cc Mon Sep 17 00:00:00 2001 From: kapsakcj Date: Sat, 12 Oct 2024 16:29:56 -0400 Subject: [PATCH 07/10] updated CMD to recommended JSON argument syntax --- virulencefinder/3.0.0/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virulencefinder/3.0.0/Dockerfile b/virulencefinder/3.0.0/Dockerfile index 997554745..99f0deb90 100644 --- a/virulencefinder/3.0.0/Dockerfile +++ b/virulencefinder/3.0.0/Dockerfile @@ -90,7 +90,7 @@ RUN echo -e '#!/bin/bash\npython -m virulencefinder "$@"' > /usr/bin/virulencefi chmod +x /usr/bin/virulencefinder.py # default command is to pull up help options for virulencefinder -CMD python -m virulencefinder -h +CMD [ "python", "-m", "virulencefinder", "-h" ] ### START OF TEST STAGE ### FROM app AS test From fdaf2fcf3a8ebd767f9aee4522df5a4377a72e29 Mon Sep 17 00:00:00 2001 From: Curtis Kapsak Date: Sat, 12 Oct 2024 16:51:13 -0400 Subject: [PATCH 08/10] Update virulencefinder 3.0.0 readme with updated dependency versions --- virulencefinder/3.0.0/README.md | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/virulencefinder/3.0.0/README.md b/virulencefinder/3.0.0/README.md index fcd855a30..393ad9adb 100644 --- a/virulencefinder/3.0.0/README.md +++ b/virulencefinder/3.0.0/README.md @@ -1,4 +1,4 @@ -# VirulenceFinder Docker Image +# VirulenceFinder 3.0.0 Docker Image A docker image that contains VirulenceFinder, a tool for identifying virulence factors in E. coli, Enterococcus, S. aureus, and Listeria isolates from reads or assemblies @@ -12,10 +12,14 @@ Main tool: Additional tools: -- python 3.8.10 -- biopython 1.73 +- python 3.10.12 +- cgecore 1.5.6 +- biopython 1.84 +- numpy 2.1.2 +- pandas 2.2.3 +- tabulate 0.9.0 - [kma](https://bitbucket.org/genomicepidemiology/kma/src/master/) 1.4.15 -- ncbi-blast+ 2.9.0 +- ncbi-blast+ 2.12.0 Database version: From 986384b839eee5f685440b2815f78d6b702d9e2a Mon Sep 17 00:00:00 2001 From: kapsakcj Date: Sat, 12 Oct 2024 16:51:50 -0400 Subject: [PATCH 09/10] removed unnecessary and confusing pip3 install line from virulencefinder 3.0.0 dockerfile --- virulencefinder/3.0.0/Dockerfile | 3 --- 1 file changed, 3 deletions(-) diff --git a/virulencefinder/3.0.0/Dockerfile b/virulencefinder/3.0.0/Dockerfile index 99f0deb90..f55722ace 100644 --- a/virulencefinder/3.0.0/Dockerfile +++ b/virulencefinder/3.0.0/Dockerfile @@ -47,9 +47,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ apt-get autoclean && rm -rf /var/lib/apt/lists/* && \ update-alternatives --install /usr/bin/python python /usr/bin/python3 10 -# install python dependencies -RUN pip3 install biopython==1.73 tabulate==0.7.7 cgecore==1.5.5 - # Install kma # apt deps: libz-dev (for compiling) RUN git clone --branch ${KMA_VER} --depth 1 https://bitbucket.org/genomicepidemiology/kma.git && \ From 97286a87c4a438709e67df87a1896d24bb7f797e Mon Sep 17 00:00:00 2001 From: kapsakcj Date: Sat, 12 Oct 2024 17:07:14 -0400 Subject: [PATCH 10/10] updated comments w correct blast and python versions. also forced bash shell so alias is set properly (I was getting a weird error when running the virulencefinder.py command manually --- virulencefinder/3.0.0/Dockerfile | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/virulencefinder/3.0.0/Dockerfile b/virulencefinder/3.0.0/Dockerfile index f55722ace..3588a1873 100644 --- a/virulencefinder/3.0.0/Dockerfile +++ b/virulencefinder/3.0.0/Dockerfile @@ -26,8 +26,8 @@ LABEL maintainer1="Erin Young" LABEL maintainer1.email="eriny@utah.gov" # install dependencies; cleanup apt garbage -# ncbi-blast+ v2.9.0-2 (ubuntu:focal), min required version is 2.8.1 -# python3 v3.8.10, min required version is 3.5 +# ncbi-blast+ v2.12.0 (ubuntu:jammy), min required version is 2.8.1 +# python3 v3.10.12, min required version is 3.10 RUN apt-get update && apt-get install -y --no-install-recommends \ wget \ ca-certificates \ @@ -82,6 +82,9 @@ ENV PATH="/virulencefinder:${PATH}" \ # set final working directory for production docker image (app layer only) WORKDIR /data +# force bash shell so below lines to make an alias runs properly +SHELL ["/bin/bash", "-c"] + # setting a janky alias for everyone that uses the "latest" tag RUN echo -e '#!/bin/bash\npython -m virulencefinder "$@"' > /usr/bin/virulencefinder.py && \ chmod +x /usr/bin/virulencefinder.py