Skip to content

Commit

Permalink
Merge pull request #984 from StaPH-B/erin-artic
Browse files Browse the repository at this point in the history
Update artic for new medaka models
  • Loading branch information
jessicarowell authored May 31, 2024
2 parents 165c8d8 + 0e8075c commit 9b1cd3c
Show file tree
Hide file tree
Showing 3 changed files with 168 additions and 1 deletion.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ To learn more about the docker pull rate limits and the open source software pro
| [ANIclustermap](https://hub.docker.com/r/staphb/aniclustermap/) <br/> [![docker pulls](https://badgen.net/docker/pulls/staphb/aniclustermap)](https://hub.docker.com/r/staphb/aniclustermap) | <ul><li>[1.3.0](aniclusteramp/1.3.0/)</li></ul> | https://github.com/moshi4/ANIclustermap |
| [any2fasta](https://hub.docker.com/r/staphb/any2fasta/) <br/> [![docker pulls](https://badgen.net/docker/pulls/staphb/any2fasta)](https://hub.docker.com/r/staphb/any2fasta) | <ul><li>0.4.2</li></ul> | https://github.com/tseemann/any2fasta |
| [ARIBA](https://hub.docker.com/r/staphb/ariba/) <br/> [![docker pulls](https://badgen.net/docker/pulls/staphb/ariba)](https://hub.docker.com/r/staphb/ariba) | <ul><li>[2.14.4](./ariba/2.14.4/)</li><li>[2.14.6](./ariba/2.14.6/)</li><li>[2.14.7](./ariba/2.14.7/)</li></ul> | https://github.com/sanger-pathogens/ariba |
| [artic](https://hub.docker.com/r/staphb/artic) <br/> [![docker pulls](https://badgen.net/docker/pulls/staphb/artic)](https://hub.docker.com/r/staphb/artic) | <ul><li>[1.2.4-1.11.1 (artic-medaka)](artic/1.2.4-1.11.1/)</ul> | https://github.com/artic-network/fieldbioinformatics |
| [artic](https://hub.docker.com/r/staphb/artic) <br/> [![docker pulls](https://badgen.net/docker/pulls/staphb/artic)](https://hub.docker.com/r/staphb/artic) | <ul><li>[1.2.4-1.11.1 (artic-medaka)](artic/1.2.4-1.11.1/)</li><li>[1.2.4-1.12.0 (artic-medaka)](artic/1.2.4-1.12.0/)</li></ul> | https://github.com/artic-network/fieldbioinformatics |
| [artic-ncov2019](https://hub.docker.com/r/staphb/artic-ncov2019) <br/> [![docker pulls](https://badgen.net/docker/pulls/staphb/artic-ncov2019)](https://hub.docker.com/r/staphb/artic-ncov2019) | <ul><li>1.3.0</ul> | https://github.com/artic-network/fieldbioinformatics |
| [artic-ncov2019-epi2me](https://hub.docker.com/r/staphb/artic-ncov2019-epi2me) <br/> [![docker pulls](https://badgen.net/docker/pulls/staphb/artic-ncov2019-epi2me)](https://hub.docker.com/r/staphb/artic-ncov2019-epi2me) | <ul><li>0.3.10</ul> | https://github.com/epi2me-labs/wf-artic |
| [artic-ncov2019-medaka](https://hub.docker.com/r/staphb/artic-ncov2019-medaka) <br/> [![docker pulls](https://badgen.net/docker/pulls/staphb/artic-ncov2019-medaka)](https://hub.docker.com/r/staphb/artic-ncov2019-medaka) | <ul><li>1.1.0</ul> | https://github.com/artic-network/artic-ncov2019 |
Expand Down
99 changes: 99 additions & 0 deletions artic/1.2.4-1.12.0/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
ARG ARTIC_VER=1.2.4

FROM mambaorg/micromamba:1.4.9 as app

ARG ARTIC_VER
ARG MEDAKA_VER=1.12.0

LABEL base.image="mambaorg/micromamba:1.4.9"
LABEL dockerfile.version="1"
LABEL software="artic"
LABEL software.version="${ARTIC_VER}"
LABEL software1="medaka"
LABEL software1.version="${MEDAKA_VER}"
LABEL description="A bioinformatics pipeline for working with virus sequencing data sequenced with nanopore"
LABEL website="https://github.com/artic-network/fieldbioinformatics"
LABEL license="https://github.com/artic-network/fieldbioinformatics/blob/master/LICENSE"
LABEL sop="https://artic.network/ncov-2019/ncov2019-bioinformatics-sop.html"
LABEL maintainer="Erin Young"
LABEL maintainer.email="[email protected]"

USER root
WORKDIR /

RUN apt-get update && apt-get install -y --no-install-recommends \
wget \
ca-certificates \
procps \
gcc \
make \
pkg-config \
zlib1g-dev \
libbz2-dev \
liblzma-dev \
libcurl4-gnutls-dev \
libssl-dev \
python3-dev \
python3-pip \
python-is-python3 && \
apt-get autoclean && rm -rf /var/lib/apt/lists/*

RUN micromamba create -n artic -y -c conda-forge -c bioconda -c defaults \
artic=${ARTIC_VER} && \
micromamba clean -a -y && \
mkdir /data

ENV PATH="${PATH}:/opt/conda/envs/artic/bin/" \
LC_ALL=C.UTF-8

RUN artic --help

RUN pip install medaka==${MEDAKA_VER} && \
pip install pyabpoa && \
medaka --version

CMD artic --help

WORKDIR /data

##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- #####
##### Step 2. Set up the testing stage. #####
##### The docker image is built to the 'test' stage before merging, but #####
##### the test stage (or any stage after 'app') will be lost. #####
##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- #####

# A second FROM insruction creates a new stage
# new base for testing
FROM app as test
ARG ARTIC_VER

# print help and version info; check dependencies (not all software has these options available)
# Mostly this ensures the tool of choice is in path and is executable
RUN artic --help && \
artic --version && \
medaka --version

# listing available models
RUN medaka tools list\_models

WORKDIR /

# test that came with artic
RUN wget -q https://github.com/artic-network/fieldbioinformatics/archive/refs/tags/v${ARTIC_VER}.tar.gz && \
tar -vxf v${ARTIC_VER}.tar.gz && \
cd /fieldbioinformatics-${ARTIC_VER} && ls && \
bash ./test-runner.sh medaka && bash ./test-runner.sh nanopolish

WORKDIR /test

# using on "real" data (sample files were not sequenced with version 5.3.2 primers)
RUN wget -q ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR224/050/SRR22452250/SRR22452250_1.fastq.gz && \
artic guppyplex --min-length 400 --max-length 700 --directory . --prefix SRR22452250_1.fastq.gz --output SRR22452250_1_filtered.fastq.gz && \
mkdir -p dir/name/V5 && \
wget -q https://raw.githubusercontent.com/artic-network/primer-schemes/master/nCoV-2019/V5.3.2/SARS-CoV-2.primer.bed -O dir/name/V5/name.primer.bed && \
wget -q https://raw.githubusercontent.com/artic-network/primer-schemes/master/nCoV-2019/V5.3.2/SARS-CoV-2.reference.fasta -O dir/name/V5/name.reference.fasta && \
wget -q https://raw.githubusercontent.com/artic-network/primer-schemes/master/nCoV-2019/V5.3.2/SARS-CoV-2.scheme.bed -O dir/name/V5/name.scheme.bed && \
samtools faidx dir/name/V5/name.reference.fasta && \
artic minion --normalise 200 --skip-nanopolish --medaka --medaka-model r941_min_high_g360 --threads 4 --read-file SRR22452250_1_filtered.fastq.gz --scheme-directory ./dir --scheme-version 5 name final && \
ls final*

68 changes: 68 additions & 0 deletions artic/1.2.4-1.12.0/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# artic fieldbioinformatics container

Main tool : [artic](https://github.com/artic-network/fieldbioinformatics)

Additional tools:

- medaka=1.12.0

Full documentation: [https://github.com/artic-network/fieldbioinformatics](https://github.com/artic-network/fieldbioinformatics)

There is also a very useful SOP: [https://artic.network/ncov-2019/ncov2019-bioinformatics-sop.html](https://artic.network/ncov-2019/ncov2019-bioinformatics-sop.html)
And additional documentation: [https://artic.readthedocs.io/en/latest/](https://artic.readthedocs.io/en/latest/)

> A bioinformatics pipeline for working with virus sequencing data sequenced with nanopore.
WARNING : This container does not contain the primer schemes found at [https://github.com/artic-network/primer-schemes](https://github.com/artic-network/primer-schemes). Those will have to be downloaded and mounted separately.

## Example Usage with the artic primers

```bash
# get primers
git clone https://github.com/artic-network/primer-schemes

# download reads for example
wget -q ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR224/050/SRR22452250/SRR22452250_1.fastq.gz

# read filtering
docker run -v $(pwd):/data staphb/artic:latest artic guppyplex --min-length 400 --max-length 700 --directory . --prefix SRR22452250_1.fastq.gz --output SRR22452250_1_filtered.fastq.gz

# running the artic minion workflow with medaka
docker run -v $(pwd):/data staphb/artic:latest artic minion --normalise 200 --skip-nanopolish --medaka --medaka-model r941_min_high_g360 --threads 4 --read-file SRR22452250_1_filtered.fastq.gz --scheme-directory primer-schemes --scheme-version 5.3.2 nCoV-2019 test

# the result files will all start with test*
```

In general, any primer scheme can be used as long as it meets [artic's requirements](https://github.com/artic-network/primer-schemes).

This is the recommended directory structure with corresponding files:

```bash
${directory}/${name}/V${version}/${name}.primer.bed
${directory}/${name}/V${version}/${name}.scheme.bed
${directory}/${name}/V${version}/${name}.reference.fasta
${directory}/${name}/V${version}/${name}.reference.fasta.fai
```

The command to use this primer scheme would be

```bash
artic minion --normalise 200 --skip-nanopolish --medaka --medaka-model r941_min_high_g360 --threads 4 --read-file input.fastq.gz --scheme-directory ${directory} --scheme-version ${version} ${name} outputprefix
```

Different primer schemes can be validated via artic-tools (already in PATH) via

```bash
artic-tools validate_scheme ${basename}.primer.bed --outputInserts ${basename}.insert.bed
```

## Medaka models

Medaka updates frequently, and artic can throw errors when corresponding ONT models are not found.

These are the medaka models in this image:
```
Available: r103_fast_g507, r103_fast_snp_g507, r103_fast_variant_g507, r103_hac_g507, r103_hac_snp_g507, r103_hac_variant_g507, r103_min_high_g345, r103_min_high_g360, r103_prom_high_g360, r103_prom_snp_g3210, r103_prom_variant_g3210, r103_sup_g507, r103_sup_snp_g507, r103_sup_variant_g507, r1041_e82_260bps_fast_g632, r1041_e82_260bps_fast_variant_g632, r1041_e82_260bps_hac_g632, r1041_e82_260bps_hac_v4.0.0, r1041_e82_260bps_hac_v4.1.0, r1041_e82_260bps_hac_variant_g632, r1041_e82_260bps_hac_variant_v4.1.0, r1041_e82_260bps_joint_apk_ulk_v5.0.0, r1041_e82_260bps_sup_g632, r1041_e82_260bps_sup_v4.0.0, r1041_e82_260bps_sup_v4.1.0, r1041_e82_260bps_sup_variant_g632, r1041_e82_260bps_sup_variant_v4.1.0, r1041_e82_400bps_fast_g615, r1041_e82_400bps_fast_g632, r1041_e82_400bps_fast_variant_g615, r1041_e82_400bps_fast_variant_g632, r1041_e82_400bps_hac_g615, r1041_e82_400bps_hac_g632, r1041_e82_400bps_hac_v4.0.0, r1041_e82_400bps_hac_v4.1.0, r1041_e82_400bps_hac_v4.2.0, r1041_e82_400bps_hac_v4.3.0, r1041_e82_400bps_hac_v5.0.0, r1041_e82_400bps_hac_variant_g615, r1041_e82_400bps_hac_variant_g632, r1041_e82_400bps_hac_variant_v4.1.0, r1041_e82_400bps_hac_variant_v4.2.0, r1041_e82_400bps_hac_variant_v4.3.0, r1041_e82_400bps_hac_variant_v5.0.0, r1041_e82_400bps_sup_g615, r1041_e82_400bps_sup_v4.0.0, r1041_e82_400bps_sup_v4.1.0, r1041_e82_400bps_sup_v4.2.0, r1041_e82_400bps_sup_v4.3.0, r1041_e82_400bps_sup_v5.0.0, r1041_e82_400bps_sup_variant_g615, r1041_e82_400bps_sup_variant_v4.1.0, r1041_e82_400bps_sup_variant_v4.2.0, r1041_e82_400bps_sup_variant_v4.3.0, r1041_e82_400bps_sup_variant_v5.0.0, r104_e81_fast_g5015, r104_e81_fast_variant_g5015, r104_e81_hac_g5015, r104_e81_hac_variant_g5015, r104_e81_sup_g5015, r104_e81_sup_g610, r104_e81_sup_variant_g610, r10_min_high_g303, r10_min_high_g340, r941_e81_fast_g514, r941_e81_fast_variant_g514, r941_e81_hac_g514, r941_e81_hac_variant_g514, r941_e81_sup_g514, r941_e81_sup_variant_g514, r941_min_fast_g303, r941_min_fast_g507, r941_min_fast_snp_g507, r941_min_fast_variant_g507, r941_min_hac_g507, r941_min_hac_snp_g507, r941_min_hac_variant_g507, r941_min_high_g303, r941_min_high_g330, r941_min_high_g340_rle, r941_min_high_g344, r941_min_high_g351, r941_min_high_g360, r941_min_sup_g507, r941_min_sup_snp_g507, r941_min_sup_variant_g507, r941_prom_fast_g303, r941_prom_fast_g507, r941_prom_fast_snp_g507, r941_prom_fast_variant_g507, r941_prom_hac_g507, r941_prom_hac_snp_g507, r941_prom_hac_variant_g507, r941_prom_high_g303, r941_prom_high_g330, r941_prom_high_g344, r941_prom_high_g360, r941_prom_high_g4011, r941_prom_snp_g303, r941_prom_snp_g322, r941_prom_snp_g360, r941_prom_sup_g507, r941_prom_sup_snp_g507, r941_prom_sup_variant_g507, r941_prom_variant_g303, r941_prom_variant_g322, r941_prom_variant_g360, r941_sup_plant_g610, r941_sup_plant_variant_g610
Default consensus: r1041_e82_400bps_sup_v5.0.0
Default variant: r1041_e82_400bps_sup_variant_v5.0.0
```

0 comments on commit 9b1cd3c

Please sign in to comment.