-
Notifications
You must be signed in to change notification settings - Fork 119
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #984 from StaPH-B/erin-artic
Update artic for new medaka models
- Loading branch information
Showing
3 changed files
with
168 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
ARG ARTIC_VER=1.2.4 | ||
|
||
FROM mambaorg/micromamba:1.4.9 as app | ||
|
||
ARG ARTIC_VER | ||
ARG MEDAKA_VER=1.12.0 | ||
|
||
LABEL base.image="mambaorg/micromamba:1.4.9" | ||
LABEL dockerfile.version="1" | ||
LABEL software="artic" | ||
LABEL software.version="${ARTIC_VER}" | ||
LABEL software1="medaka" | ||
LABEL software1.version="${MEDAKA_VER}" | ||
LABEL description="A bioinformatics pipeline for working with virus sequencing data sequenced with nanopore" | ||
LABEL website="https://github.com/artic-network/fieldbioinformatics" | ||
LABEL license="https://github.com/artic-network/fieldbioinformatics/blob/master/LICENSE" | ||
LABEL sop="https://artic.network/ncov-2019/ncov2019-bioinformatics-sop.html" | ||
LABEL maintainer="Erin Young" | ||
LABEL maintainer.email="[email protected]" | ||
|
||
USER root | ||
WORKDIR / | ||
|
||
RUN apt-get update && apt-get install -y --no-install-recommends \ | ||
wget \ | ||
ca-certificates \ | ||
procps \ | ||
gcc \ | ||
make \ | ||
pkg-config \ | ||
zlib1g-dev \ | ||
libbz2-dev \ | ||
liblzma-dev \ | ||
libcurl4-gnutls-dev \ | ||
libssl-dev \ | ||
python3-dev \ | ||
python3-pip \ | ||
python-is-python3 && \ | ||
apt-get autoclean && rm -rf /var/lib/apt/lists/* | ||
|
||
RUN micromamba create -n artic -y -c conda-forge -c bioconda -c defaults \ | ||
artic=${ARTIC_VER} && \ | ||
micromamba clean -a -y && \ | ||
mkdir /data | ||
|
||
ENV PATH="${PATH}:/opt/conda/envs/artic/bin/" \ | ||
LC_ALL=C.UTF-8 | ||
|
||
RUN artic --help | ||
|
||
RUN pip install medaka==${MEDAKA_VER} && \ | ||
pip install pyabpoa && \ | ||
medaka --version | ||
|
||
CMD artic --help | ||
|
||
WORKDIR /data | ||
|
||
##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### | ||
##### Step 2. Set up the testing stage. ##### | ||
##### The docker image is built to the 'test' stage before merging, but ##### | ||
##### the test stage (or any stage after 'app') will be lost. ##### | ||
##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### | ||
|
||
# A second FROM insruction creates a new stage | ||
# new base for testing | ||
FROM app as test | ||
ARG ARTIC_VER | ||
|
||
# print help and version info; check dependencies (not all software has these options available) | ||
# Mostly this ensures the tool of choice is in path and is executable | ||
RUN artic --help && \ | ||
artic --version && \ | ||
medaka --version | ||
|
||
# listing available models | ||
RUN medaka tools list\_models | ||
|
||
WORKDIR / | ||
|
||
# test that came with artic | ||
RUN wget -q https://github.com/artic-network/fieldbioinformatics/archive/refs/tags/v${ARTIC_VER}.tar.gz && \ | ||
tar -vxf v${ARTIC_VER}.tar.gz && \ | ||
cd /fieldbioinformatics-${ARTIC_VER} && ls && \ | ||
bash ./test-runner.sh medaka && bash ./test-runner.sh nanopolish | ||
|
||
WORKDIR /test | ||
|
||
# using on "real" data (sample files were not sequenced with version 5.3.2 primers) | ||
RUN wget -q ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR224/050/SRR22452250/SRR22452250_1.fastq.gz && \ | ||
artic guppyplex --min-length 400 --max-length 700 --directory . --prefix SRR22452250_1.fastq.gz --output SRR22452250_1_filtered.fastq.gz && \ | ||
mkdir -p dir/name/V5 && \ | ||
wget -q https://raw.githubusercontent.com/artic-network/primer-schemes/master/nCoV-2019/V5.3.2/SARS-CoV-2.primer.bed -O dir/name/V5/name.primer.bed && \ | ||
wget -q https://raw.githubusercontent.com/artic-network/primer-schemes/master/nCoV-2019/V5.3.2/SARS-CoV-2.reference.fasta -O dir/name/V5/name.reference.fasta && \ | ||
wget -q https://raw.githubusercontent.com/artic-network/primer-schemes/master/nCoV-2019/V5.3.2/SARS-CoV-2.scheme.bed -O dir/name/V5/name.scheme.bed && \ | ||
samtools faidx dir/name/V5/name.reference.fasta && \ | ||
artic minion --normalise 200 --skip-nanopolish --medaka --medaka-model r941_min_high_g360 --threads 4 --read-file SRR22452250_1_filtered.fastq.gz --scheme-directory ./dir --scheme-version 5 name final && \ | ||
ls final* | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
# artic fieldbioinformatics container | ||
|
||
Main tool : [artic](https://github.com/artic-network/fieldbioinformatics) | ||
|
||
Additional tools: | ||
|
||
- medaka=1.12.0 | ||
|
||
Full documentation: [https://github.com/artic-network/fieldbioinformatics](https://github.com/artic-network/fieldbioinformatics) | ||
|
||
There is also a very useful SOP: [https://artic.network/ncov-2019/ncov2019-bioinformatics-sop.html](https://artic.network/ncov-2019/ncov2019-bioinformatics-sop.html) | ||
And additional documentation: [https://artic.readthedocs.io/en/latest/](https://artic.readthedocs.io/en/latest/) | ||
|
||
> A bioinformatics pipeline for working with virus sequencing data sequenced with nanopore. | ||
WARNING : This container does not contain the primer schemes found at [https://github.com/artic-network/primer-schemes](https://github.com/artic-network/primer-schemes). Those will have to be downloaded and mounted separately. | ||
|
||
## Example Usage with the artic primers | ||
|
||
```bash | ||
# get primers | ||
git clone https://github.com/artic-network/primer-schemes | ||
|
||
# download reads for example | ||
wget -q ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR224/050/SRR22452250/SRR22452250_1.fastq.gz | ||
|
||
# read filtering | ||
docker run -v $(pwd):/data staphb/artic:latest artic guppyplex --min-length 400 --max-length 700 --directory . --prefix SRR22452250_1.fastq.gz --output SRR22452250_1_filtered.fastq.gz | ||
|
||
# running the artic minion workflow with medaka | ||
docker run -v $(pwd):/data staphb/artic:latest artic minion --normalise 200 --skip-nanopolish --medaka --medaka-model r941_min_high_g360 --threads 4 --read-file SRR22452250_1_filtered.fastq.gz --scheme-directory primer-schemes --scheme-version 5.3.2 nCoV-2019 test | ||
|
||
# the result files will all start with test* | ||
``` | ||
|
||
In general, any primer scheme can be used as long as it meets [artic's requirements](https://github.com/artic-network/primer-schemes). | ||
|
||
This is the recommended directory structure with corresponding files: | ||
|
||
```bash | ||
${directory}/${name}/V${version}/${name}.primer.bed | ||
${directory}/${name}/V${version}/${name}.scheme.bed | ||
${directory}/${name}/V${version}/${name}.reference.fasta | ||
${directory}/${name}/V${version}/${name}.reference.fasta.fai | ||
``` | ||
|
||
The command to use this primer scheme would be | ||
|
||
```bash | ||
artic minion --normalise 200 --skip-nanopolish --medaka --medaka-model r941_min_high_g360 --threads 4 --read-file input.fastq.gz --scheme-directory ${directory} --scheme-version ${version} ${name} outputprefix | ||
``` | ||
|
||
Different primer schemes can be validated via artic-tools (already in PATH) via | ||
|
||
```bash | ||
artic-tools validate_scheme ${basename}.primer.bed --outputInserts ${basename}.insert.bed | ||
``` | ||
|
||
## Medaka models | ||
|
||
Medaka updates frequently, and artic can throw errors when corresponding ONT models are not found. | ||
|
||
These are the medaka models in this image: | ||
``` | ||
Available: r103_fast_g507, r103_fast_snp_g507, r103_fast_variant_g507, r103_hac_g507, r103_hac_snp_g507, r103_hac_variant_g507, r103_min_high_g345, r103_min_high_g360, r103_prom_high_g360, r103_prom_snp_g3210, r103_prom_variant_g3210, r103_sup_g507, r103_sup_snp_g507, r103_sup_variant_g507, r1041_e82_260bps_fast_g632, r1041_e82_260bps_fast_variant_g632, r1041_e82_260bps_hac_g632, r1041_e82_260bps_hac_v4.0.0, r1041_e82_260bps_hac_v4.1.0, r1041_e82_260bps_hac_variant_g632, r1041_e82_260bps_hac_variant_v4.1.0, r1041_e82_260bps_joint_apk_ulk_v5.0.0, r1041_e82_260bps_sup_g632, r1041_e82_260bps_sup_v4.0.0, r1041_e82_260bps_sup_v4.1.0, r1041_e82_260bps_sup_variant_g632, r1041_e82_260bps_sup_variant_v4.1.0, r1041_e82_400bps_fast_g615, r1041_e82_400bps_fast_g632, r1041_e82_400bps_fast_variant_g615, r1041_e82_400bps_fast_variant_g632, r1041_e82_400bps_hac_g615, r1041_e82_400bps_hac_g632, r1041_e82_400bps_hac_v4.0.0, r1041_e82_400bps_hac_v4.1.0, r1041_e82_400bps_hac_v4.2.0, r1041_e82_400bps_hac_v4.3.0, r1041_e82_400bps_hac_v5.0.0, r1041_e82_400bps_hac_variant_g615, r1041_e82_400bps_hac_variant_g632, r1041_e82_400bps_hac_variant_v4.1.0, r1041_e82_400bps_hac_variant_v4.2.0, r1041_e82_400bps_hac_variant_v4.3.0, r1041_e82_400bps_hac_variant_v5.0.0, r1041_e82_400bps_sup_g615, r1041_e82_400bps_sup_v4.0.0, r1041_e82_400bps_sup_v4.1.0, r1041_e82_400bps_sup_v4.2.0, r1041_e82_400bps_sup_v4.3.0, r1041_e82_400bps_sup_v5.0.0, r1041_e82_400bps_sup_variant_g615, r1041_e82_400bps_sup_variant_v4.1.0, r1041_e82_400bps_sup_variant_v4.2.0, r1041_e82_400bps_sup_variant_v4.3.0, r1041_e82_400bps_sup_variant_v5.0.0, r104_e81_fast_g5015, r104_e81_fast_variant_g5015, r104_e81_hac_g5015, r104_e81_hac_variant_g5015, r104_e81_sup_g5015, r104_e81_sup_g610, r104_e81_sup_variant_g610, r10_min_high_g303, r10_min_high_g340, r941_e81_fast_g514, r941_e81_fast_variant_g514, r941_e81_hac_g514, r941_e81_hac_variant_g514, r941_e81_sup_g514, r941_e81_sup_variant_g514, r941_min_fast_g303, r941_min_fast_g507, r941_min_fast_snp_g507, r941_min_fast_variant_g507, r941_min_hac_g507, r941_min_hac_snp_g507, r941_min_hac_variant_g507, r941_min_high_g303, r941_min_high_g330, r941_min_high_g340_rle, r941_min_high_g344, r941_min_high_g351, r941_min_high_g360, r941_min_sup_g507, r941_min_sup_snp_g507, r941_min_sup_variant_g507, r941_prom_fast_g303, r941_prom_fast_g507, r941_prom_fast_snp_g507, r941_prom_fast_variant_g507, r941_prom_hac_g507, r941_prom_hac_snp_g507, r941_prom_hac_variant_g507, r941_prom_high_g303, r941_prom_high_g330, r941_prom_high_g344, r941_prom_high_g360, r941_prom_high_g4011, r941_prom_snp_g303, r941_prom_snp_g322, r941_prom_snp_g360, r941_prom_sup_g507, r941_prom_sup_snp_g507, r941_prom_sup_variant_g507, r941_prom_variant_g303, r941_prom_variant_g322, r941_prom_variant_g360, r941_sup_plant_g610, r941_sup_plant_variant_g610 | ||
Default consensus: r1041_e82_400bps_sup_v5.0.0 | ||
Default variant: r1041_e82_400bps_sup_variant_v5.0.0 | ||
``` |