-
Notifications
You must be signed in to change notification settings - Fork 119
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add abricate v1.0.1 database for vibrio cholera (#963)
* add docker file, test files and database for vibrio database * fix tests * add maintainer2 * renamed database to include .fasta suffix; changed ADD to COPY as per docker reccommendation; updated test to download test FASTA instead of keeping copy in this repo; updated vibrio test commands to grep individually for expected genes * add README specific to abricate + Vcholerae DB; updated main README.md with relative links --------- Co-authored-by: kapsakcj <[email protected]>
- Loading branch information
Showing
4 changed files
with
202 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
ARG ABRICATE_VER="1.0.1" | ||
|
||
FROM ubuntu:focal as app | ||
|
||
# For easy upgrade later. ARG varibles only persist during docker image build time | ||
ARG ABRICATE_VER | ||
ARG ANY2FASTA_VERSION="0.4.2" | ||
|
||
LABEL base.image="ubuntu:focal" | ||
LABEL dockerfile.version="1" | ||
LABEL software="Abricate" | ||
LABEL software.version="1.0.1" | ||
LABEL description="Mass screening of contigs for AMR or virulence genes" | ||
LABEL website="https://github.com/tseemann/abricate" | ||
LABEL license="https://github.com/tseemann/abricate/blob/master/LICENSE" | ||
LABEL maintainer="Curtis Kapsak" | ||
LABEL maintainer.email="[email protected]" | ||
LABEL maintainer2="Inês Mendes" | ||
LABEL maintainer2.email="[email protected]" | ||
|
||
# install dependencies | ||
# removed: emboss | ||
# ncbi-blast+ version in apt for ubuntu:focal = v2.9.0 | ||
RUN apt-get update && apt-get install -y --no-install-recommends \ | ||
bioperl \ | ||
gzip \ | ||
unzip \ | ||
liblist-moreutils-perl \ | ||
libjson-perl \ | ||
libtext-csv-perl \ | ||
libfile-slurp-perl \ | ||
liblwp-protocol-https-perl \ | ||
libwww-perl \ | ||
libpath-tiny-perl \ | ||
git \ | ||
ncbi-blast+ \ | ||
wget && \ | ||
apt-get autoclean && rm -rf /var/lib/apt/lists/* | ||
|
||
# get any2fasta | ||
RUN wget https://github.com/tseemann/any2fasta/archive/refs/tags/v${ANY2FASTA_VERSION}.tar.gz && \ | ||
tar -xvf v${ANY2FASTA_VERSION}.tar.gz && \ | ||
rm v${ANY2FASTA_VERSION}.tar.gz && \ | ||
cd any2fasta-${ANY2FASTA_VERSION} && \ | ||
chmod +x any2fasta && \ | ||
cp any2fasta /usr/local/bin | ||
|
||
# download abricate; make /data; check dependencies | ||
RUN wget https://github.com/tseemann/abricate/archive/v${ABRICATE_VER}.tar.gz && \ | ||
tar -zxvf v${ABRICATE_VER}.tar.gz && \ | ||
rm -rf v${ABRICATE_VER}.tar.gz && \ | ||
mkdir /data && \ | ||
/abricate-${ABRICATE_VER}/bin/abricate --check | ||
|
||
# set $PATH | ||
# set perl locale settings for singularity compatibility | ||
ENV PATH="/abricate-${ABRICATE_VER}/bin:\ | ||
$PATH"\ | ||
LC_ALL=C | ||
|
||
# add custom database; rename FASTA file as 'sequences'; index database with --setupdb | ||
COPY vibrio_v1.0.0.fasta / | ||
RUN mkdir -v /abricate-${ABRICATE_VER}/db/vibrio && \ | ||
cp -v /vibrio_v1.0.0.fasta /abricate-${ABRICATE_VER}/db/vibrio/sequences && \ | ||
abricate --setupdb && \ | ||
abricate --list | ||
|
||
# final working directory is /data | ||
WORKDIR /data | ||
|
||
# testing layer starts here | ||
FROM app as test | ||
|
||
# Grab test Vcholerae genome, uncompress, and save to /data | ||
RUN wget -O - https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/018/083/565/GCA_018083565.2_PDT000793508.2/GCA_018083565.2_PDT000793508.2_genomic.fna.gz \ | ||
| gunzip -c > /data/GCA_018083565.2_PDT000793508.2_genomic.fna | ||
|
||
# to ensure this env variable is set in test layer | ||
ARG ABRICATE_VER | ||
|
||
# so that the below commands are run with /bin/bash shell and not /bin/sh - needed for bash-specific tricks below | ||
SHELL ["/bin/bash", "-c"] | ||
|
||
# tests shamelessly stolen and modified from: https://github.com/tseemann/abricate/blob/master/.travis.yml | ||
RUN set -x && \ | ||
cd /abricate-${ABRICATE_VER}/ && \ | ||
abricate --version && \ | ||
abricate --help && \ | ||
abricate --check && \ | ||
abricate --list && \ | ||
! abricate --doesnotexist && \ | ||
! abricate --threads 0 && \ | ||
! (abricate test/assembly.fa | grep '~~~') && \ | ||
abricate test/assembly.fa > 1.tab && \ | ||
abricate test/assembly.fa.gz > 2.tab && \ | ||
abricate test/assembly.gbk > 3.tab && \ | ||
abricate test/assembly.gbk.gz > 4.tab && \ | ||
abricate --nopath test/assembly.gbk.gz | grep '^assembly.gbk.gz' && \ | ||
abricate --summary {1,2,3,4}.tab > summary.tab && \ | ||
abricate --summary 1.tab 2.tab 1.tab 2>&1 | grep 'duplicate' && \ | ||
abricate --summary <(cat 1.tab 2.tab 3.tab) | wc -l | grep -w 4 && \ | ||
abricate test/assembly.txt |& grep ERROR && \ | ||
abricate not_exist.embl |& grep ERROR && \ | ||
abricate --threads `nproc` test/assembly.fa.bz2 | grep -i FOSFOMYCIN && \ | ||
abricate --threads `nproc` test/assembly.fa.bz2 | grep -i lactam && \ | ||
for DB in `abricate --list | cut -f1 | tail -n +2`; do abricate --db ${DB} test/assembly.fa > /dev/null ; done && \ | ||
abricate-get_db --help && \ | ||
abricate-get_db --db ncbi --dbdir . && \ | ||
! grep 'FUSIDIC ACID' ncbi/sequences && \ | ||
abricate --threads `nproc` --fofn test/fofn.txt | ||
|
||
# vibrio-specific abricate test | ||
# grep to find the 5 expected genes in the output TSV; error if not found | ||
RUN abricate --db vibrio /data/GCA_018083565.2_PDT000793508.2_genomic.fna | tee /data/abricate_vibrio.tsv && \ | ||
echo && \ | ||
echo "grepping for expected genes in abricate output now..." && \ | ||
grep 'ctxA' /data/abricate_vibrio.tsv && \ | ||
grep 'toxR' /data/abricate_vibrio.tsv && \ | ||
grep 'tcpA_ElTor' /data/abricate_vibrio.tsv && \ | ||
grep 'ompW' /data/abricate_vibrio.tsv && \ | ||
grep 'wbeN_O1' /data/abricate_vibrio.tsv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
# ABRicate v1.0.1 docker image + Vibrio cholerae db | ||
|
||
> ABRicate: Mass screening of contigs for antimicrobial resistance or virulence genes. It comes bundled with multiple databases: NCBI, CARD, ARG-ANNOT, Resfinder, MEGARES, EcOH, PlasmidFinder, Ecoli_VF and VFDB. | ||
Main tool : [ABRicate](https://github.com/tseemann/abricate) | ||
|
||
Additional tools: | ||
|
||
- any2fasta 0.4.2 | ||
- ncbi-blast+ 2.9.0 | ||
- perl 5.30.0 | ||
- bioperl 1.7.7-1 | ||
|
||
## Custom *Vibrio cholerae* database info | ||
|
||
This docker image includes a *Vibrio cholerae-specific* database of gene targets (traditionally used in PCR methods) for detecting O1 & O139 serotypes, toxin-production markers, and Biotype markers within the O1 serogroup ("El Tor" or "Classical" biotypes). These sequences were shared via personal communication with Dr. Christine Lee, of the National Listeria, Yersinia, Vibrio and Enterobacterales Reference Laboratory within the Enteric Diseases Laboratory Branch at CDC. | ||
|
||
The genes included (and their purpose) included in the database are as follows: | ||
|
||
- `ctxA` - Cholera toxin, an indication of toxigenic cholerae | ||
- `ompW` - outer membrane protein, a *V. cholerae* species marker (presence of any allele of this gene distinguishes *V. cholerae* from *V. parahaemolyticus* and *V. vulnificus*) | ||
- `tcpA` - toxin co-pilus A, used to infer Biotype, either "El Tor" or "Clasical" | ||
- database includes an allele for each Biotype. `tcpA_classical` and `tcpA_ElTor` | ||
- `toxR` - transcriptional activator (controls cholera toxin, pilus, and outer-membrane protein expression) - Species marker (allele distinguishes *V. cholerae* from *V. parahaemolyticus* and *V. vulnificus*) | ||
- `wbeN` - O antigen encoding region - used to identify the O1 serogroup | ||
- `wbfR` - O antigen encoding region - used to identify the O139 serogroup | ||
|
||
:warning: The database's FASTA file & index files are located within `/abricate-$1.0.1/db/vibrio/sequences` in the container's file system and can be utilized via the example command below. | ||
|
||
:warning: This database is identical in nucleotide sequence content to the `vibrio_230224.fasta` database included in the SRST2 container (located at `srst2/0.2.0-vibrio-230224/vibrio_230224.fasta`), but the FASTA headers were formatted for use with ABRicate. | ||
|
||
## Example Usage | ||
|
||
```bash | ||
# list out the available databases | ||
$ abricate --list | ||
DATABASE SEQUENCES DBTYPE DATE | ||
card 2631 nucl 2024-Apr-30 | ||
ncbi 5386 nucl 2024-Apr-30 | ||
vfdb 2597 nucl 2024-Apr-30 | ||
megares 6635 nucl 2024-Apr-30 | ||
ecoli_vf 2701 nucl 2024-Apr-30 | ||
argannot 2223 nucl 2024-Apr-30 | ||
ecoh 597 nucl 2024-Apr-30 | ||
plasmidfinder 460 nucl 2024-Apr-30 | ||
resfinder 3077 nucl 2024-Apr-30 | ||
vibrio 8 nucl 2024-Apr-30 | ||
|
||
# run ABRicate on an Vibrio cholerae assembly using custom database, saving results to file "/data/abricate_vibrio.tsv" | ||
$ abricate --db vibrio /data/GCA_018083565.2_PDT000793508.2_genomic.fna | tee /data/abricate_vibrio.tsv | ||
abricate --nopath --db vibrio /data/GCA_018083565.2_PDT000793508.2_genomic.fna | tee /data/abricate_vibrio.tsv | ||
Using nucl database vibrio: 8 sequences - 2024-Apr-30 | ||
Processing: /data/GCA_018083565.2_PDT000793508.2_genomic.fna | ||
#FILE SEQUENCE START END STRAND GENE COVERAGE COVERAGE_MAP GAPS %COVERAGE %IDENTITY DATABASE ACCESSION PRODUCT RESISTANCE | ||
Found 5 genes in /data/GCA_018083565.2_PDT000793508.2_genomic.fna | ||
Tip: did you know? abricate was named after 'A'nti 'B'acterial 'R'esistiance | ||
Done. | ||
GCA_018083565.2_PDT000793508.2_genomic.fna DADXRP020000001.1 3562 4338 + ctxA 1-777/777 =============== 0/0 100.00 100.00 Vibriov1.0.0 CP000627.1 ctxA_O395 cholera_toxin_gene | ||
GCA_018083565.2_PDT000793508.2_genomic.fna DADXRP020000001.1 520818 521702 + toxR 1-885/885 =============== 0/0 100.00 98.42 Vibriov1.0.0 CP000627.1 toxR_O395 Vcholerae_species_marker | ||
GCA_018083565.2_PDT000793508.2_genomic.fna DADXRP020000002.1 439805 440458 + ompW 1-654/654 =============== 0/0 100.00 99.08 Vibriov1.0.0 CP000626.1 ompW_O395 Vcholerae_species_marker | ||
GCA_018083565.2_PDT000793508.2_genomic.fna DADXRP020000008.1 101343 103820 + wbeN_O1 1-2478/2478 =============== 0/0 100.00 100.00 Vibriov1.0.0 NA wbeN_O1_INDRE O1_serotype_marker | ||
GCA_018083565.2_PDT000793508.2_genomic.fna DADXRP020000033.1 3271 3945 - tcpA_ElTor 1-675/675 =============== 0/0 100.00 100.00 Vibriov1.0.0 CP064350.1 tcpA_ElTor_C6706 ElTor_biotype | ||
|
||
``` |
Oops, something went wrong.