From 7925d4d597dc993ed4564c4db1eb78b8fae3a5fe Mon Sep 17 00:00:00 2001 From: Abhinav Sharma Date: Fri, 20 Dec 2024 15:19:51 +0200 Subject: [PATCH] add experimental processes fix imports Signed-off-by: Abhinav Sharma fix output channel name fix qualifier name bundle the rdanalyzer fasta file minor tweaks update the config as per new aliases remove the mention of "EXP" since the new options are disabled by default tweak the publishdirs fix the spotyping tag update the output format fix output paths fix the output names tweak tbprofiler outputs tweak spotyping outputs [ci skip] update outputs disable output for version fix channel inputs for collate process implement a concatenation for spotyping enable spotyping cat process generate txt output is not useful add the spotyping cat process fix invocation iter on process definition iterate on spotyping fix the spotyping command iter spotyping channel patch the spotyping txt file work with patched results dev collect xls make the workflow unique make the channel contents unique revert to previous collect -> unique publish and overwrite the previous results from spotyping update the pattern publish excel sheet separately add ntmprofiler Signed-off-by: Abhinav Sharma # Conflicts: # conf/docker.config --- CHANGELOG.md | 2 + README.md | 48 +-- conf/apptainer.config | 65 ++++ conf/docker.config | 14 +- conf/singularity.config | 12 + default_params.config | 59 ++- modules/rdanalyzer/main.nf | 43 +++ modules/spotyping/main.nf | 45 +++ modules/tbprofiler/fastq_profile.nf | 72 ++++ modules/utils/cat_spotyping.nf | 44 +++ nextflow.config | 1 + resources/rdanalyzer/RDs30.fasta | 550 ++++++++++++++++++++++++++++ workflows/quality_check_wf.nf | 51 ++- 13 files changed, 972 insertions(+), 34 deletions(-) create mode 100644 conf/apptainer.config create mode 100644 modules/rdanalyzer/main.nf create mode 100644 modules/spotyping/main.nf create mode 100644 modules/tbprofiler/fastq_profile.nf create mode 100644 modules/utils/cat_spotyping.nf create mode 100644 resources/rdanalyzer/RDs30.fasta diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a042faa..afcc05fd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,6 @@ # CHANGELOG FOR THE MAGMA PIPELINE VERSIONS + + ## v2.0.0 diff --git a/README.md b/README.md index 7e6f685f..54938686 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ The `java` version should NOT be an `internal jdk` release! You can check the re Notice the `LTS` next to `OpenJDK` line. -```bash +```bash $ java -version openjdk version "17.0.7" 2023-04-18 LTS @@ -90,7 +90,7 @@ S0002,/full_path_to_directory_of_fastq_files/S0002_01_R1.fastq.gz,full_path_to_d S0003,/full_path_to_directory_of_fastq_files/S0003_01_R1.fastq.gz, ``` -If you have the metadata from sequencing instrument, you can specify further information in the samplesheet +If you have the metadata from sequencing instrument, you can specify further information in the samplesheet ```csv Study,Sample,Library,Attempt,R1,R2,Flowcell,Lane,Index Sequence @@ -156,15 +156,15 @@ Which could be provided to the pipeline using `-params-file` parameter as shown ```console nextflow run 'https://github.com/TORCH-Consortium/MAGMA' \ - -profile conda_local, server \ - -r v1.1.1 \ - -params-file my_parameters_1.yml + -profile conda_local, server \ + -r v1.1.1 \ + -params-file my_parameters_1.yml ``` # Analysis -## Running MAGMA using Nextflow Tower +## Running MAGMA using Nextflow Tower You can also use Seqera Platform (aka Nextflow Tower) to run the pipeline on any of the supported cloud platforms and monitoring the pipeline execution. @@ -181,11 +181,11 @@ You can run the pipeline using Conda, Mamba or Micromamba package managers to in You can find out the location of conda environments using `conda env list`. [Here's](https://docs.conda.io/projects/conda/en/4.6.0/_downloads/52a95608c49671267e40c689e0bc00ca/conda-cheatsheet.pdf) a useful cheatsheet for conda operations. -You can use the `conda` based setup for the pipeline for running MAGMA +You can use the `conda` based setup for the pipeline for running MAGMA - On a local linux machine(e.g. your laptop or a university server) -- On an HPC cluster (e.g. SLURM, PBS) in case you don't have access to container systems like Singularity, Podman or Docker +- On an HPC cluster (e.g. SLURM, PBS) in case you don't have access to container systems like Singularity, Podman or Docker -All the requisite softwares have been provided as a `conda` recipe (i.e. `yml` files) +All the requisite softwares have been provided as a `conda` recipe (i.e. `yml` files) - [magma-env-1.yml](./conda_envs/magma-env-1.yml) - [magma-env-2.yml](./conda_envs/magma-env-2.yml) @@ -208,7 +208,7 @@ $ conda env create -n magma-env-2 --file magma-env-2.yml Once the environments are created, you can make use of the pipeline parameter `conda_envs_location` to inform the pipeline of the names and location of the conda envs. -Next, you need to load the WHO Resistance Catalog within `tb-profiler`; basically the [instructions](https://github.com/TORCH-Consortium/MAGMA/blob/master/conda_envs/setup_conda_envs.sh#L20-L23), which are used to build the necessary containers. +Next, you need to load the WHO Resistance Catalog within `tb-profiler`; basically the [instructions](https://github.com/TORCH-Consortium/MAGMA/blob/master/conda_envs/setup_conda_envs.sh#L20-L23), which are used to build the necessary containers. 1. Download [magma_resistance_db_who_v1.zip](https://github.com/TORCH-Consortium/MAGMA/files/14559680/resistance_db_who_v1.zip) and unzip it @@ -250,7 +250,7 @@ We provide [two docker containers](https://github.com/orgs/TORCH-Consortium/pack > 🚧 **Container build script**: The script used to build these containers is provided [here](./containers/build.sh). -Although, you don't need to pull the containers manually, but should you need to, you could use the following commands to pull the pre-built and provided containers +Although, you don't need to pull the containers manually, but should you need to, you could use the following commands to pull the pre-built and provided containers ```console docker pull ghcr.io/torch-consortium/magma/magma-container-1:1.1.1 @@ -262,13 +262,13 @@ docker pull ghcr.io/torch-consortium/magma/magma-container-2:1.1.1 > :memo: **Have singularity or podman instead?**:
If you do have access to Singularity or Podman, then owing to their compatibility with Docker, you can still use the provided docker containers. -Here's the command which should be used +Here's the command which should be used ```console nextflow run 'https://github.com/torch-consortium/magma' \ - -params-file my_parameters_2.yml \ - -profile docker,pbs \ - -r v1.1.1 + -params-file my_parameters_2.yml \ + -profile docker,pbs \ + -r v1.1.1 ``` > :bulb: **Hint**:
@@ -307,7 +307,7 @@ errors. Including these is optional, if unknown or irrelevant, just fill in with a '1' as shown in example_MAGMA_samplesheet.csv) ``` -## (Optional) GVCF datasets +## (Optional) GVCF datasets We also provide some reference GVCF files which you could use for specific use-cases. @@ -319,7 +319,7 @@ containing GVCF reference dataset for ~600 samples is provided for augmenting sm ``` use_ref_gvcf = false -ref_gvcf = "/path/to/FILE.g.vcf.gz" +ref_gvcf = "/path/to/FILE.g.vcf.gz" ref_gvcf_tbi = "/path/to/FILE.g.vcf.gz.tbi" ``` @@ -335,7 +335,7 @@ Tim Huepink and Lennert Verboven created an in-depth tutorial of the features of We have also included a presentation (in PDF format) of the logic and workflow of the MAGMA pipeline as well as posters that have been presented at conferences. Please refer the [docs](./docs) folder. -# Interpretation +# Interpretation The results directory produced by MAGMA is as follows: @@ -347,7 +347,7 @@ The results directory produced by MAGMA is as follows: └── vcf_files ``` -## QC Statistics Directory +## QC Statistics Directory In this directory you will find files related to the quality control carried out by the MAGMA pipeline. The structure is as follows: @@ -412,7 +412,7 @@ MAGMA also notes the presence of all variants in in tier 1 and tier 2 drug resis - **Phylogeny** -Contains the outputs of the IQTree phylogenetic tree construction. +Contains the outputs of the IQTree phylogenetic tree construction. > :memo: By default we recommend that you use the **ExDRIncComplex** files as MAGMA was optimized to be able to accurately call positions on the edges of complex regions in the *Mtb* genome @@ -422,7 +422,7 @@ Contains the SNP distance tables. > :memo: By default we recommend that you use the **ExDRIncComplex** files as MAGMA was optimized to be able to accurately call positions on the edges of complex regions in the *Mtb* genome -## `vcf_files` Directory +## `vcf_files` Directory ```bash /path/to/results_dir/vcf_files @@ -463,7 +463,7 @@ Contains the SNP distance tables. > Unfiltered structural variants detected by the MAGMA pipeline -## Libraries Directory +## Libraries Directory > Contains files related to FASTQ validation and FASTQC analysis @@ -472,9 +472,9 @@ Contains the SNP distance tables. > Contains vcf files for major|minor|structural variants for each individual samples -# Citations +# Citations -The MAGMA paper has been published here: https://doi.org/10.1371/journal.pcbi.1011648 +The MAGMA paper has been published here: https://doi.org/10.1371/journal.pcbi.1011648 The XBS variant calling core was published here: https://doi.org/10.1099%2Fmgen.0.000689 diff --git a/conf/apptainer.config b/conf/apptainer.config new file mode 100644 index 00000000..12543724 --- /dev/null +++ b/conf/apptainer.config @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2021-2024 MAGMA pipeline authors, see https://doi.org/10.1371/journal.pcbi.1011648 + * + * This file is part of MAGMA pipeline, see https://github.com/TORCH-Consortium/MAGMA + * + * For quick overview of GPL-3 license, please refer + * https://www.tldrlegal.com/license/gnu-general-public-license-v3-gpl-3 + * + * - You MUST keep this license with original authors in your copy + * - You MUST acknowledge the original source of this software + * - You MUST state significant changes made to the original software + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program . If not, see . + */ +process { + + + withName: + '.*SPOTYPING.*' { + container = "quay.io/biocontainers/spotyping:2.1--hdfd78af_4" + } + + withName: + '.*RDANALYZER.*' { + container = "quay.io/biocontainers/rd-analyzer:1.01--hdfd78af_0" + } + + + withName: + '.*TBPROFILER.*' { + container = "ghcr.io/torch-consortium/magma/biocontainer-tbprofiler:6.3.0--1" + } + + withName: + 'NTMPROFILER.*' { + container = "ghcr.io/torch-consortium/magma/biocontainer-ntmprofiler:0.4.0" + } + + withName: + 'ISMAPPER.*|GATK.*|LOFREQ.*|DELLY.*|MULTIQC.*|FASTQC.*|UTILS.*|FASTQ.*|SAMPLESHEET.*' { + container = "ghcr.io/torch-consortium/magma/magma-container-1:2.0.0" + } + + withName: + 'BWA.*|IQTREE.*|SNPDISTS.*|SNPSITES.*|BCFTOOLS.*|BGZIP.*|SAMTOOLS.*|SNPEFF.*|CLUSTERPICKER.*' { + container = "ghcr.io/torch-consortium/magma/magma-container-2:1.1.1" + } + +} + + +apptainer { + enabled = true +} diff --git a/conf/docker.config b/conf/docker.config index df22845b..753be2b6 100644 --- a/conf/docker.config +++ b/conf/docker.config @@ -26,8 +26,18 @@ process { withName: - 'TBPROFILER.*' { - container = "ghcr.io/torch-consortium/magma/biocontainer-tbprofiler:6.3.0" + '.*SPOTYPING.*' { + container = "quay.io/biocontainers/spotyping:2.1--hdfd78af_4" + } + + withName: + '.*RDANALYZER.*' { + container = "quay.io/biocontainers/rd-analyzer:1.01--hdfd78af_0" + } + + withName: + '.*TBPROFILER.*' { + container = "ghcr.io/torch-consortium/magma/biocontainer-tbprofiler:6.3.0--1" } withName: diff --git a/conf/singularity.config b/conf/singularity.config index cfea0b8b..e386b14f 100644 --- a/conf/singularity.config +++ b/conf/singularity.config @@ -25,6 +25,18 @@ */ process { + + withName: + '.*SPOTYPING.*' { + container = "quay.io/biocontainers/spotyping:2.1--hdfd78af_4" + } + + withName: + '.*RDANALYZER.*' { + container = "quay.io/biocontainers/rd-analyzer:1.01--hdfd78af_0" + } + + withName: 'TBPROFILER.*' { container = "ghcr.io/torch-consortium/magma/biocontainer-tbprofiler:6.3.0" diff --git a/default_params.config b/default_params.config index fb88d6ee..88491c2b 100644 --- a/default_params.config +++ b/default_params.config @@ -18,6 +18,7 @@ * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License @@ -101,6 +102,24 @@ skip_phylogeny_and_clustering = false //OR true skip_complex_regions = false //OR true + +// Enable execution of MAGMA's tbprofiler container (with who+ database) on +// FASTQ files + +skip_ntmprofiler = false // OR true + +skip_tbprofiler_fastq = true // OR false + +skip_spotyping = true + +// Flags for experimental features + +//NOTE: NOT working yet +skip_rdanalyzer = true +ref_fasta_rdanalyzer = "${projectDir}/resources/rdanalyzer/RDs30.fasta" + + + //NOTE: PICK ONE of the following parameters related to IQTREE. iqtree_standard_bootstrap= false iqtree_fast_ml_only= false @@ -186,7 +205,7 @@ fastq_validator_path = "fastq_validator.sh" //NOTE:Control the global publishing behavior, which is used as default in case there is no process specific config provided -save_mode = 'symlink' +save_mode = 'symlink' // 'copy' should_publish = true //NOTE: If enabled, the BAM results from HaplotypeCaller processes would be published @@ -371,7 +390,7 @@ DELLY_CALL { } NTMPROFILER_PROFILE { - results_dir = "${params.outdir}/non-tuberculous_mycobacteria/per_sample/" + results_dir = "${params.outdir}/analyses/non-tuberculous_mycobacteria/per_sample/" } @@ -443,7 +462,7 @@ UTILS_MERGE_COHORT_STATS { //----------------------- NTMPROFILER_COLLATE { - results_dir = "${params.outdir}/non-tuberculous_mycobacteria/cohort" + results_dir = "${params.outdir}/analyses/non-tuberculous_mycobacteria/cohort" prefix = "ntmprofiler.collate" } @@ -461,7 +480,7 @@ GATK_GENOTYPE_GVCFS { arguments = " -G StandardAnnotation -G AS_StandardAnnotation --sample-ploidy 1 " - should_publish = false + should_publish = true } @@ -470,7 +489,7 @@ SNPEFF { arguments = " -nostats -ud 100 Mycobacterium_tuberculosis_h37rv " - should_publish = false + should_publish = true } @@ -678,6 +697,36 @@ TBPROFILER_COLLATE__COHORT { prefix = "major_variants" } + +TBPROFILER_FASTQ_PROFILE { + results_dir = "${params.outdir}/analyses/others/per_sample/tbprofiler_fastq/" + arguments = "--csv" + should_publish = false +} + +TBPROFILER_FASTQ_COLLATE { + results_dir = "${params.outdir}/analyses/drug_resistance/tbprofiler_fastq/" + prefix = "fastq" +} + + +SPOTYPING { + results_dir = "${params.outdir}/analyses/spotyping/results_excel" + arguments = "" // Or "--noQuery" +} + +UTILS_CAT_SPOTYPING { + results_dir = "${params.outdir}/analyses/spotyping/" + arguments = "" +} + + +RDANALYZER { + results_dir = "${params.outdir}/analyses/others/per_sample/rdanalyzer/" + arguments = "" +} + + TBPROFILER_VCF_PROFILE__LOFREQ { results_dir = "${params.outdir}/analyses/drug_resistance/minor_variants_lofreq/" arguments = " --depth 0,0 --af 0,0 --strand 0 --sv_depth 0,0 --sv_af 0,0 --sv_len 100000,50000 " diff --git a/modules/rdanalyzer/main.nf b/modules/rdanalyzer/main.nf new file mode 100644 index 00000000..f35b06e5 --- /dev/null +++ b/modules/rdanalyzer/main.nf @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2021-2024 MAGMA pipeline authors, see https://doi.org/10.1371/journal.pcbi.1011648 + * + * This file is part of MAGMA pipeline, see https://github.com/TORCH-Consortium/MAGMA + * + * For quick overview of GPL-3 license, please refer + * https://www.tldrlegal.com/license/gnu-general-public-license-v3-gpl-3 + * + * - You MUST keep this license with original authors in your copy + * - You MUST acknowledge the original source of this software + * - You MUST state significant changes made to the original software + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program . If not, see . + */ +process RDANALYZER { + + publishDir params.results_dir, mode: params.save_mode, enabled: params.should_publish + + input: + tuple val(genomeName), val(meta), path(genomeReads) + path(ref_fasta_rdanalyzer) + + output: + tuple path("${genomeName}.result"), path("${genomeName}.depth") + + + script: + + """ + RD-Analyzer-extended.py ${ref_fasta_rdanalyzer} -o ${genomeName} ${genomeReads} + """ +} diff --git a/modules/spotyping/main.nf b/modules/spotyping/main.nf new file mode 100644 index 00000000..ba64b76b --- /dev/null +++ b/modules/spotyping/main.nf @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021-2024 MAGMA pipeline authors, see https://doi.org/10.1371/journal.pcbi.1011648 + * + * This file is part of MAGMA pipeline, see https://github.com/TORCH-Consortium/MAGMA + * + * For quick overview of GPL-3 license, please refer + * https://www.tldrlegal.com/license/gnu-general-public-license-v3-gpl-3 + * + * - You MUST keep this license with original authors in your copy + * - You MUST acknowledge the original source of this software + * - You MUST state significant changes made to the original software + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program . If not, see . + */ +process SPOTYPING { + tag "$genomeName" + publishDir params.results_dir, mode: params.save_mode, enabled: params.should_publish, pattern: "*.xls" + + input: + tuple val(genomeName), val(meta), path(genomeReads) + + output: + path("*.patched.txt") , emit: txt + path("SITVIT*.xls") , emit: xls, optional:true + + script: + + """ + SpoTyping.py ${genomeReads} --output ${genomeName}.txt ${params.arguments} + + awk '{print "${genomeName}\t" \$0}' ${genomeName}.txt > ${genomeName}.patched.txt + """ + +} diff --git a/modules/tbprofiler/fastq_profile.nf b/modules/tbprofiler/fastq_profile.nf new file mode 100644 index 00000000..7b7b8404 --- /dev/null +++ b/modules/tbprofiler/fastq_profile.nf @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2021-2024 MAGMA pipeline authors, see https://doi.org/10.1371/journal.pcbi.1011648 + * + * This file is part of MAGMA pipeline, see https://github.com/TORCH-Consortium/MAGMA + * + * For quick overview of GPL-3 license, please refer + * https://www.tldrlegal.com/license/gnu-general-public-license-v3-gpl-3 + * + * - You MUST keep this license with original authors in your copy + * - You MUST acknowledge the original source of this software + * - You MUST state significant changes made to the original software + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program . If not, see . + */ +process TBPROFILER_FASTQ_PROFILE { + tag "$sampleName" + label 'process_medium' + publishDir params.results_dir, mode: params.save_mode, enabled: params.should_publish, pattern: "!*json" + + input: + tuple val(sampleName), val(meta), path(reads) + + output: + tuple val(meta), path("results/*.json"), emit: json + tuple val(meta), path("results/*.csv") , emit: csv, optional: true + tuple val(meta), path("results/*.txt") , emit: txt, optional: true + + //NOTE: Disable these outputs since we are not using them + // path "versions.yml" , emit: versions + // tuple val(meta), path("bam/*.bam") , emit: bam + // tuple val(meta), path("vcf/*.vcf.gz") , emit: vcf + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${sampleName}" + def input_reads = meta.single_end ? "--read1 $reads" : "--read1 ${reads[0]} --read2 ${reads[1]}" + """ + tb-profiler \\ + profile \\ + $args \\ + ${params.arguments} \\ + --prefix ${prefix} \\ + --threads $task.cpus \\ + $input_reads + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tbprofiler: \$( echo \$(tb-profiler --version 2>&1) | sed 's/TBProfiler version //') + END_VERSIONS + """ + + stub: + """ + mkdir results + touch results/${sampleName}.results.json + """ + +} diff --git a/modules/utils/cat_spotyping.nf b/modules/utils/cat_spotyping.nf new file mode 100644 index 00000000..aafa9aa4 --- /dev/null +++ b/modules/utils/cat_spotyping.nf @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2021-2024 MAGMA pipeline authors, see https://doi.org/10.1371/journal.pcbi.1011648 + * + * This file is part of MAGMA pipeline, see https://github.com/TORCH-Consortium/MAGMA + * + * For quick overview of GPL-3 license, please refer + * https://www.tldrlegal.com/license/gnu-general-public-license-v3-gpl-3 + * + * - You MUST keep this license with original authors in your copy + * - You MUST acknowledge the original source of this software + * - You MUST state significant changes made to the original software + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program . If not, see . + */ + + +process UTILS_CAT_SPOTYPING { + publishDir params.results_dir, mode: params.save_mode, enabled: params.should_publish + + input: + path("results_text/*") + + output: + path("*.cat.txt") + path("results_text") + + script: + + """ + cat results_text/*txt >> spotyping.cat.txt + """ + +} diff --git a/nextflow.config b/nextflow.config index a1cb9198..8a7b159b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -84,6 +84,7 @@ profiles { conda_local { includeConfig 'conf/conda_local.config' } docker { includeConfig 'conf/docker.config' } singularity { includeConfig 'conf/singularity.config' } + apptainer { includeConfig 'conf/apptainer.config' } podman { includeConfig 'conf/podman.config' } // Executor specific settings diff --git a/resources/rdanalyzer/RDs30.fasta b/resources/rdanalyzer/RDs30.fasta new file mode 100644 index 00000000..abb898d0 --- /dev/null +++ b/resources/rdanalyzer/RDs30.fasta @@ -0,0 +1,550 @@ +>RD9_1 +TCGCCCCGGCAGCCAGGCGTCGCGCGAGTTCGCCGCCGATCTGGCTGCGGCCGCCGAAAATTACTACCGG +AGCAGCGCCCGTGTCGTCCACGGCTGCGATTATTGCCTGCGCTAGCGTGAGTGGCGATGGTCAACACCAC +TACGCGGCTTAGTGACGACGCGCTGGCGTTTCTTTCCGAACGCCATCTGGCCATGCTGACCACGCTGCGG +GCGGACAACTCGCCGCACGTGGTGGCGGTAGGTTTCACCTTCGACCCCAAGACTCACATCGCGCGGGTCA +TCACCACCGGCGGCTCCCAAAAGGCCGTCAATGCCGACCGCAGTGGGCTTGCCGTGCTCAGCCAGGTCGA +CGGCGCGCGCTGGCTCTCACTGGAGGGTAGGGCGGCGGTGAACAGCGACATCGACGCCGTGCGCGACGCC +GAGCTGCGCTACGCGCAGCGCTATCGCACCCCGCGTCCCAATCCACGCCGAGTGGTCATCGAGGTCCAGA +TTGAGCGCGTGCTGGGATCCGCGGATCTGCTCGACCGGGCCTGACAACCGAGGTCATGGCGGCAGTAGGT +AATGCACCCAGGCGCCACCGGCGGGCCCGGCCACGGCGTGCAGACGGGCGTTCTGATTGCCCGTTCGGGG +CAGGGTAAAGTCCGCGCCGATGGCTGTGCAGGCTAGGGCAGCCCCGGCGAAGACCACGGGTGCCGGCGTC +ACGGTCCACCTGCCTGCCGCGTCCCGACAGGCCGCAGGGTGTGGGTCACCGCACGATGCGGCGACCCAGC +GGCCATCCGCGCCCTGCAGGGCGCATGCTCCGGCACCGGCACGCGGTTCGTCCGGTGCCCAGCTCCACAA +CGACGCCTGAATGCGGCCGTCTTCGGGGAGCAGCTGATCGAAGCCGAACAGATTGACCCCGCAATCGGTC +ATCGCCGGCACCTTCGGCGGGGTAAGCGCCTGCGGATTGGCCGGTGGACGGGTCGGGTTGGCCAACGCCG +TGGCCAGCGTGGAGTCCTCGTAATAGCGGACCAGTCGCCAAGCGTAGACACCGCGGCCATAGGTGGCATC +GCAGGCCGGGTATGGCCGGTAGCCGGAGTTCGAGCCGCTTTCCAGCTCAACGCCGCTCCAGTCGAAGACG +GCGGCCGACCAACCTGGCGCACAAGACCCGACGAGCACGGCTCGTGCGCCGGATGCGCGGATTTCCTCCC +GCGACACGTCGAGTGGAAGCGGGACACAGCCGTTGGTGGC +>RD711_2 +CCCGGGACGGTCGGTGCGGTCGATTGCCGCGGCGGCGCCCCTGGAACCCGCGAGACTGATCTGCTGGACC +CGGCCAACAGCGTGCGCTTCGTCGACGCCCTGTTGCTCGCCGGCGGCAGCGCCTACGGTCTGGCCGCCGC +CGATGGCGTCATGCGCTGGCTAGAGGAACACCGGCGCGGCGTCGCGATGGACAGCGGCGTGGTGCCCATC +GTGCCGGGCGCGGTGATTTTCGACCTTCCGGTCGGCGGCTGGAATTGTCGGCCGACGGCCGATTTCGGCT +ATTCGGCCTGTGCGGCAGCCGGAGTCGACGTCGCGGTCGGGACGGTGGGCGTGGGGGTTGGGGCGCGCGC +CGGAGCGCTCAAGGGCGGTGTCGGGACTGCATCGGCTACCCTGCAGTCCGGTGTGACCGTCGGTGTCCTT +GCTGTGGTAAATGCCGCTGGCAACGTCGTCGATCCAGCCACCGGCTTGCCGTGGATGGCCGACCTAGTCG +GCGAGTTCGCGTTGAGGGCCCCGCCGGCCGAGCAGATTGCTGCGCTGGCGCAGTTATCGTCCCCGCTGGG +AGCCTTCAACACCCCGTTCAATACGACGATCGGTGTGATTGCGTGTGACGCCGCGCTGAGCCCTGCGGCT +TGCCGGCGCATCGCGATTGCCGCCCACGACGGGTTGGCCCGCACCATCCGGCCGGCACACACCCCCTTGG +ATGGCGACACGGTTTTCGCGCTGGCCACCGGCGCGGTAGCGGTGCCGCCGGAGGCCGGCGTGCCGGCCGC +ATTGTCTCCGGAGACTCAGCTGGTCACCGCGGTCGGTGCGGCGGCGGCTGATTGCCTGGCTCGTGCGGTG +CTGGCCGGCGTGCTCAATGCTCAGCCGGTAGCCGGAATACCGACCTAC +>RD702_3 +CGCACCGCGAGTCGGACTTGTTGGCGTTCGAGATCGTCATCGAGCGGTCGCAGCCCGGCGCCGTGATGGC +GGCGTACAACAAGGTCAACGGAGATTACGCTGCCGGCAACGACCACTTGCTCAACGACGTGCTGAAAGGT +GCTTGGGGATACCGCGGTTGGGTGATGTCGGATTGGGGCGGAACACCCAGCTGGGAGTGCGCGCTGGCCG +GCCTGGACCAAGAGTGCGGTGCGCAGATCGATGCAGTGCTGTGGCAGTCGGAAGCATTCACCGACCGCCT +GCGTGCCGCCTACGCCGACGGCAATCTACCCAAGGGGCGCCTGTCGGACATGGTACGGCGGATCCTGCGG +TCGATGTTTGCCGTCGGAATCGACCGATGGAAACCAGCGCCGGCGCCGGACATGAATGCGCACAACGAGA +TTGCCGCACAGATGGCGCGGCAAGGAATCGTGCTGCTGCAAAACCGAGGGCTGCTGCCGCTCGCTCCCGA +ATCGGCCGGGCGTATTGCCGTCATCGGCGGCTATGCACACCTCGGTGTGCCAGCCGGTTACGGTTCGAGC +GCCGTCACCCCGCCGGGGGGCTATGCGGGCGTGATACCGATCGGTGGGTCTGGCTTGGCAGCCGGGTTGC +GTAATCTCTACCTGCTGCCGTCAAGCCCGCTGAGTGAGTTGCGAAAGCGGTTGCCCAACGCGCAGTTCGA +GTTCGATCCTGGCATCAACCCGGCGG +>RD4_4 +GGGTACGCGCCCGAATACGTCGAAGGCATGTGGCGGATGCTGCAGACCGACGAGCCCGACGACTTCGTTT +TGGCGACCGGGCGCGGTTTCACCGTGCGTGAGTTCGCGCGGGCCGCGTTCGAGCATGCCGGTTTGGACTG +GCAGCAGTACGTGAAATTCGACCAACGCTATCTGCGGCCCACCGAGGTGGATTCGCTGATCGGCGACGCG +ACCAAGGCTGCCGAATTGCTGGGCTGGAGGGCTTCGGTGCACACTGACGAGTTGGCTCGGATCATGGTCG +ACGCGGACATGGCGGCGCTGGAGTGCGAAGGCAAGCCGTGGATCGACAAGCCGATGATCGCCGGCCGGAC +ATGAACGCGCACACCTCGGTCGGCCCGCTTGACCGCGCGGCCCGGGTCTACATCGCCGGGCATCGCGGCC +TGGTCGGGTCCGCGCTGCTACGCACGTTTGCGGGCGCGGGGTTCACCAACCTGCTGGTGCGGTCACGCGC +CGAGCTTGATCTGACGGATCGGGCCGCGACGTTCGACTTCGTTCTCGAGTCGAGGCCGCAGGTCGTCATC +GACGCGGCGGCCCGGGTCGGCGGCATCCTGGCCAACGACACCTACCCGGCCGATTTCCTGTCGGAAAACC +TCCAGATCCAGGTCAACCTGCTGGATGCCGCCGTGGCGGCGCGGGTGCCGCGGCTGCTGTTCCTGGGCTC +GTCGTGCATCTACCCGAAACTCGCCCCGCAGCCGATCCCGGAGAGCGCGCTGCTCACCGGTCCGTTGGAG +CCGACCAACGACGCGTACGCGATCGCCAAAATCGCCGGCATCCTTGCGGTCCAGGCGGTGCGCCGCCAAC +ATGGCCTGCCGTGGATCTCGGCGATGCCCACCAACCTGTACGGGCCAGGCGACAACTTTTCGCCGTCCGG +CTCGCATCTGCTGCCGGCACTCATCCGCCGCTATGACGAGGCCAAAGCCAGTGGCGCGCCCAACGTGACC +AACTGGGGCACCGGCACGCCCCGACGGGAGTTGCTGCACGTCGACGACCTGGCGAGCGCATGCCTGTATC +TGCTGGAACATTTCGACGGGCCGACCCATGTCAACGTGGGAACCGGCATCGACCACACCATCGGCGAGAT +CGCCGAGATGGTCGCCTCGGCGGTAGGCTATAGCGGCGAAACCCGCTGGGATCCAAGCAAACCGGACGGA +ACACCACGCAAACTGCTGGATGTTTCGGTGCTACGGGAGGCGGGATGGCGGCCTTCGATCGCGCTGCGCG +ACGGCATCGAGGCGACGGTGGCGTGGTATCG +>RD1bcg_5 +ACTGGCGGTCGTCGCCGCAAGCGTGCAGCGCCGGATCTCGACGCGACACAGAAATCCTTAAGGCCGGCGG +CCAAGGGGCCGAAGGTGAAGAAGGTGAAGCCCCAGAAACCGAAGGCCACGAAGCCGCCCAAAGTGGTGTC +GCAGCGCGGCTGGCGACATTGGGTGCATGCGTTGACGCGAATCAACCTGGGCCTGTCACCCGACGAGAAG +TACGAGCTGGACCTGCACGCTCGAGTCCGCCGCAATCCCCGCGGGTCGTATCAGATCGCCGTCGTCGGTC +TCAAAGGTGGGGCTGGCAAAACCACGCTGACAGCAGCGTTGGGGTCGACGTTGGCTCAGGTGCGGGCCGA +CCGGATCCTGGCTCTAGACGCGGATCCAGGCGCCGGAAACCTCGCCGATCGGGTAGGGCGACAATCGGGC +GCGACCATCGCTGATGTGCTTGCAGAAAAAGAGCTGTCGCACTACAACGACATCCGCGCACACACTAGCG +TCAATGCGGTCAATCTGGAAGTGCTGCCGGCACCGGAATACAGCTCGGCGCAGCGCGCGCTCAGCGACGC +CGACTGGCATTTCATCGCCGATCCTGCGTCGAGGTTTTACAACCTCGTCTTGGCTGATTGTGGGGCCGGC +TTCTTCGACCCGCTGACCCGCGGCGTGCTGTCCACGGTGTCCGGTGTCGTGGTCGTGGCAAGTGTCTCAA +TCGACGGCGCACAACAGGCGTCGGTCGCGTTGGACTGGTTGCGCAACAACGGTTACCAAGATTTGGCGAG +CCGCGCATGCGTGGTCATCAATCACATCATGCCGGGAGAACCCAATGTCGCAGTTAAAGACCTGGTGCGG +CATTTCGAACAGCAAGTTCAACCCGGCCGGGTCGTGGTCATGCCGTGGGACAGGCACATTGCGGCCGGAA +CCGAGATTTCACTCGACTTGCTCGACCCTATCTACAAGCGCAAGGTCCTCGAATTGGCCGCAGCGCTATC +CGACGATTTCGAGAGGGCTGGACGTCGTTGAGCGCACCTGCTGTTGCTGCTGGTCCTACCGCCGCGGGGG +CAACCGCTGCGCGGCCTGCCACCACCCGGGTGACGATCCTGACCGGCAGACGGATGACCGATTTGGTACT +GCCAGCGGCGGTGCCGATGGAAACTTATATTGACGACACCGTCGCGGTGCTTTCCGAGGTGTTGGAAGAC +ACGCCGGCTGATGTACTCGGCGGCTTCGACTTTACCGCGCAAGGCGTGTGGGCGTTCGCTCGTCCCGGAT +CGCCGCCGCTGAAGCTCGACCAGTCACTCGATGACGCCGGGGTGGTCGACGGGTCACTGCTGACTCTGGT +GTCAGTCAGTCGCACCGAGCGCTACCGACCGTTGGTCGAGGATGTCATCGACGCGATCGCCGTGCTTGAC +GAGTCACCTGAGTTCGACCGCACGGCATTGAATCGCTTTGTGGGGGCGGCGATCCCGCTTTTGACCGCGC +CCGTCATCGGGATGGCGATGCGGGCGTGGTGGGAAACTGGGCGTAGCTTGTGGTGGCCGTTGGCGATTGG +CATCCTGGGGATCGCTGTGCTGGTAGGCAGCTTCGTCGCGAACAGGTTCTACCAGAGCGGCCACCTGGCC +GAGTGCCTACTGGTCACGACGTATCTGCTGATCGCAACCGCCGCAGCGCTGGCCGTGCCGTTGCCGCGCG +GGGTCAACTCGTTGGGGGCGCCACAAGTTGCCGGCGCCGCTACGGCCGTGCTGTTTTTGACCTTGATGAC +GCGGGGCGGCCCTCGGAAGCGTCATGAGTTGGCGTCGTTTGCCGTGATCACCGCTATCGCGGTCATCGCG +GCCGCCGCTGCCTTCGGCTATGGATACCAGGACTGGGTCCCCGCGGGGGGGATCGCATTCGGGCTGTTCA +TTGTGACGAATGCGGCCAAGCTGACCGTCGCGGTCGCGCGGATCGCGCTGCCGCCGATTCCGGTACCCGG +CGAAACCGTGGACAACGAGGAGTTGCTCGATCCCGTCGCGACCCCGGAGGCTACCAGCGAAGAAACCCCG +ACCTGGCAGGCCATCATCGCGTCGGTGCCCGCGTCCGCGGTCCGGCTCACCGAGCGCAGCAAACTGGCCA +AGCAACTTCTGATCGGATACGTCACGTCGGGCACCCTGATTCTGGCTGCCGGTGCCATCGCGGTCGTGGT +GCGCGGGCACTTCTTTGTACACAGCCTGGTGGTCGCGGGTTTGATCACGACCGTCTGCGGATTTCGCTCG +CGGCTTTACGCCGAGCGCTGGTGTGCGTGGGCGTTGCTGGCGGCGACGGTCGCGATTCCGACGGGTCTGA +CGGCCAAACTCATCATCTGGTACCCGCACTATGCCTGGCTGTTGTTGAGCGTCTACCTCACGGTAGCCCT +GGTTGCGCTCGTGGTGGTCGGGTCGATGGCTCACGTCCGGCGCGTTTCACCGGTCGTAAAACGAACTCTG +GAATTGATCGACGGCGCCATGATCGCTGCCATCATTCCCATGCTGCTGTGGATCACCGGGGTGTACGACA +CGGTCCGCAATATCCGGTTCTGAGCCGGATCGGCTGATTGGCGGTTCCTGACAGAACATCGAGGACACGG +CGCAGGTTTGCATACCTTCGGCGCCCGACAAATTGCTGCGATTGAGCGTGTGGCGCGTCCGGTAAAATTT +GCTCGATGGGGAACACGTATAGGAGATCCGGCAATGGCTGAACCGTTGGCCGTCGATCCCACCGGCTTGA +GCGCAGCGGCCGCGAAATTGGCCGGCCTCGTTTTTCCGCAGCCTCCGGCGCCGATCGCGGTCAGCGGAAC +GGATTCGGTGGTAGCAGCAATCAACGAGACCATGCCAAGCATCGAATCGCTGGTCAGTGACGGGCTGCCC +GGCGTGAAAGCCGCCCTGACTCGAACAGCATCCAACATGAACGCGGCGGCGGACGTCTATGCGAAGACCG +ATCAGTCACTGGGAACCAGTTTGAGCCAGTATGCATTCGGCTCGTCGGGCGAAGGCCTGGCTGGCGTCGC +CTCGGTCGGTGGTCAGCCAAGTCAGGCTACCCAGCTGCTGAGCACACCCGTGTCACAGGTCACGACCCAG +CTCGGCGAGACGGCCGCTGAGCTGGCACCCCGTGTTGTTGCGACGGTGCCGCAACTCGTTCAGCTGGCTC +CGCACGCCGTTCAGATGTCGCAAAACGCATCCCCCATCGCTCAGACGATCAGTCAAACCGCCCAACAGGC +CGCCCAGAGCGCGCAGGGCGGCAGCGGCCCAATGCCCGCACAGCTTGCCAGCGCTGAAAAACCGGCCACC +GAGCAAGCGGAGCCGGTCCACGAAGTGACAAACGACGATCAGGGCGACCAGGGCGACGTGCAGCCGGCCG +AGGTCGTTGCCGCGGCACGTGACGAAGGCGCCGGCGCATCACCGGGCCAGCAGCCCGGCGGGGGCGTTCC +CGCGCAAGCCATGGATACCGGAGCCGGTGCCCGCCCAGCGGCGAGTCCGCTGGCGGCCCCCGTCGATCCG +TCGACTCCGGCACCCTCAACAACCACAACGTTGTAGACCGGGCCTGCCAGCGGCTCCGTCTCGCACGCAG +CGCCTGTTGCTGTCCTGGCCTCGTCAGCATGCGGCGGCCAGGGCCCGGTCGAGCAACCCGGTGACGTATT +GCCAGTACAGCCAGTCCGCGACGGCCACACGCTGGACGGCCGCGTCAGTCGCAGTGTGCGCTTGGTGCAG +GGCAATCTCCTGTGAGTGGGCAGCGTAGGCCCGGAACGCCCGCAGATGAGCGGCCTCGCGGCCGGTAGCG +GTGCTGGTCATGGGCTTCATCAGCTCGAACCACAGCATGTGCCGCTCATCGCCCGGTGGATTGACATCCA +CCGGCGCCGGCGGCAACAAGTCGAGCAAACGCTGATCGGTAGTGTCGGCCAGCTGAGCCGCCGCCGAGGG +GTCGACGACCTCCAGCCGCGACCGGCCCGTCATTTTGCCGCTCTCCGGAATGTCATCTGGCTCCAGCACA +ATCTTGGCCACACCGGGATCCGAACTGGCCAACTGCTCCGCGGTACCGATCACCGCCCGCAGCGTCATGT +CGTGGAAAGCCGCCCAGGCTTGCACGGCCAAAACCGGGTAGGTGGCACAGCGTGCAATTTCGTCAACCGG +GATTGCGTGATCCGCGCTGGCCAAGTACACCTTATTCGGCAATTCCATCCCGTCGGGTATGTAGGCCAGC +CCATAGCTGTTGGCCACGACGATGGAACCGTCGGTGGTCACCGCGGTGATCCAGAAGAACCCGTAGTCGC +CCGCGTTGTTGTCGGACGCGTTGAGCGCCGCCGCGATGCGTCGCGCCAACCGCAGCGCATCA +>RD1mic_6 +CCATGGGTCAGCTGCAACAGCTCGTGGCGGCCGGCGGTGGCCCCAGCCAACTGGCCAGCATGGGCAGCCA +ACAAGCGCAACTGATCTCGTCGCAGGCCCAGCAAGGAGGCCAGCAGCACGCCACCCTCGTGAGCGACAAG +AAGGAAGACGAGGAAGGCGTGGCCGAGGCGGAGCGTGCACCCATCGACGCTGGCACCGCGGCCAGCCAAC +GGGGGCAGGAGGGGACCGTCCTTTGATCGGACACCGAGTCGCCAGCAGGTCTGTGCCATAGCGAGTCGAA +GCCATAGCGAGTAGAAAGTTAAACGTAGAGGAGGGTTCAACCCATGACCGGATTTCTCGGTGTCGTGCCT +TCGTTCCTGAAGGTGCTGGCGGGCATGCACAACGAGATCGTGGGTGATATCAAAAGGGCGACCGATACGG +TCGCCGGGATTAGCGGACGAGTTCAGCTTACCCATGGTTCGTTCACGTCGAAATTCAATGACACGCTGCA +AGAGTTTGAGACCACCCGTAGCAGCACGGGCACGGGTTTGCAGGGAGTCACCAGCGGACTGGCCAATAAT +CTGCTCGCAGCCGCCGGCGCCTACCTCAAGGCCGACGATGGCCTAGCCGGTGTTATCGACAAGATTTTCG +GTTGATCATGACGGGTCCGTCCGCTGCAGGCCGCGCGGGCACCGCCGACAACGTGGTCGGCGTCGAGGTA +ACCATCGACGGCATGTTGGTGATCGCCGATCGGTTACACCTGGTTGATTTCCCTGTCACGCTTGGGATTC +GGCCGAATATCCCGCAAGAGGATCTGCGAGACATCGTCTGGGAACAGGTGCAGCGTGACCTCACAGCGCA +AGGGGTGCTCGACCTCCACGGGGAGCCCCAACCGACGGTCGCGGAGATGGTCGAAACCCTGGGCAGGCCA +GATCGGACCTTGGAGGGTCGCTGGTGGCGGCGCGACATTGGCGGCGTCATGGTGCGCTTCGTCGTGTGCC +GCAGGGGCGACCGCCATGTGATCGCGGCGCGCGACGGCGACATGCTGGTGCTGCAGTTGGTGGCGCCGCA +GGTCGGCTTGGCGGGCATGGTGACAGCGGTGCTGGGGCCCGCCGAACCCGCCAACGTCGAACCCCTGACG +GGTGTGGCAACCGAGCTAGCCGAATGCACAACCGCGTCCCAATTGACGCAATACGGTAT +>RD2seal_7 +CGCACTGGAGTACCTCGACCGCGACGATGTGCCCGATGAGGTCAAACAGAAGATCATCGGGGTGCTCGAC +CGGGTGGGCACCCTGACCAACCTGCACGAGAAGTACGCCCGGATAGCCCTGAAACTTGTTTCTGACATTC +CCAACCCGCGAATCCTGGAACTTGGTGCGGGCCATGGCAAGCTCTCAGCGAAAATCCTCGAGCTACACCC +GACAGCGACGGTGACGATCAGCGATCTAGATCCCACCTCGGTGGCCAACATCGCCGCGGGAGAGCTGGGA +ACACATCCGCGAGCACGCACCCAAGTGATCGACGCCACCGCAATCGACGGCCACGACCACAGCTATGACC +TGGCGGTCTTCGCGCTGGCATTTCACCACCTGCCGCCTACGGTCGCCTGCAAAGCGATCGCCGAGGCCAC +CCGGGTGGGGAAGCGCTTTCTGATCATCGACCTCAAACGGCAGAAACCGCTGTCGTTCACGCTCTCTTCG +GTGCTGCTACTGCCGCTCCACCTACTGCTGCTGCCATGGTCGTCGATGCGCTCGAGCATGCACGACGGCT +TTATCAGCGCACTACGTGCCTACAGTCCCTCGGCGTTGCAGACGCTTGCCCGCGCCGCCGATCCGGGAAT +GCAGGTTGAAATCTTGCCCGCACCGACCAGGCTATTCCCGCCATCGCTCGCCGTTGTGTTCTCCCGTTCG +AGCTCAGCGCCAACGGAATCTAGCGAGTGCTCGGCCGATCGCCAACCCGGCGAATGATTCGGTAGTAGTG +CAGATAAGCCATCGCCGGTACCACGATGAACGTGATCACGATCAAAGCAATCGAGAAGTAGTTCGGACCA +CCCCGCACTAGAAAGATGCAGCGGTAGTCGTAGGACACTGCCA +>RD2bcg_8 +ATGCGCGGCACGACCGCCGGCGGGTGCGCTCACCAGCGCCAAGGTCGTCGCAACCACGACACCAACGATG +CGAACAAGGCTGCGTGGAGTCATCTGCACATGCTGACATACTGCCGGCGACCGAGGTGGCGGTGGGCCGC +TGAGACATGACGTGCCTCACGTCGTCGGCGCCCACGCAGCCCCAGGTCAGAACGGTAGCCTTAGGCGATG +ACCGACTCTGTGGTCGTCCGCGTCAAGCCCGGCAGTCACAAAGGACCCCTGGTCGAGGTCGGTCCCAACG +GTGAGCTGATTATCTACGTCCGCGAGCCGGCGATTGATGGCAAGGCCAACGATGCGGTCACCCGGCTGCT +CGCAGCTCACCTTCAATTGCCAAAGAGCCGAGTCAAATTGGTGTCCGGAGCGACGTCGCGGTTCAAGCGT +TTCCGTCTGAGTCGTTAAGTTCAACCTGTTTGAGGAAGCGGGTCCAGCAAGGCCGGGACATCGAGACCAA +GCCGCGCTAACACAACAACATGCTGGCGTCGGTCAACCCGGTCGGCGGCGGCGTTGCTGGCCCCGGTACA +GACCGCTTGCCGCCGCCCTCACCGTGTCGGTAATTCGCGCGATGATCGGACTGTCCAGTTTCCAGCATTG +CCAATAGAGAGGGACGTCGAGGTGTATGTCGCAGACCCGTACGAACGATCCATCGGCAAGCGGAGATGCT +GCCAGCTTCTCGGGGAACATGCCCCATCCCAGCCCGGCGCGCGCTGCGGCGGTGAAGCCCTCTGTGGTCG +GGACAAAGTGCGTCGGTCTGGTGATGGCGCGACGAAAGGCCTTACGCACCAACATGTCCTGCAGCCCATC +GTCACGATTCCACGCCAGTGACGGAGCTTTAGCCGCCGCGGCGGCAGTGAACCCGTCGGATAGATGGCGC +TGGACGAATGGCCTGCTGGCCACTGGTAGGTAGCGCATTTCACCCAGCGGGTGCACCCGGCAGCCCGGCA +CCGGGTTCCGCTCGGTGGTCACCGCGCCCATCGCCACACCCTCCCGTAGCAGCCGCGCGGAATGGTCCTG +GTCCTCGATCCGAACGTCGAGCAGGACGTCGCCGAGACCGTCGAACACGGCCGAAAACCATGTCGCCATG +GAATCGGCGTTTACCGCAATGGTGATCCGCGTGCGTTTCAGCGACGCGTTGCCACCCATTTCAGCGAGCG +CCTCGGACTCGAGCAACGCTGTTTGCGCGGCCAACCGCAACAGCGGGATACCTGCGGTCGTCGCCCGACA +TGGCTTTTCCCTGACCACCAGCACCTGGCCGACCTGCTGCTCCAACGACTTGATGCGCTGACTGACAGCC +GACGGGGTGACATGTAGGCGCTCCGCGGCCGCATCGAAGCTGCCCAGTTCGACCACGGCAGCCAATGCGG +CCAGCTGTGGACCGTCAAGCTGCGGATCCACCATCTCAGGTGTAGACCATCTGCGGAGCGTCGCACTGCA +CATTAATAATGCTAATGTAAATGAAGAATTATTAGCTATACTGACCCATACAAACTGCCTAGTGTCGATT +GCGTGAACTCACCACTGGTCGT +>RD7_9 +CGAGAGCGACGCGCCATACCTGCCGCTGAACGACGGCTACAACTGGAAGGGCGACCCCAACGCCACGGTG +CCGGGTTTGGGGTCCGGCCAGGACATCCCGCAGACATGGCAAACGATGCTGCTGCCGCCGGGCAGCTGAC +GGTGATGGAGGGAGGACACGATGTCGGTAGCAGTGGATTCCGACGCCGAGGATGACGCCGTATCGGAGAT +CGCTGAGGCAGCCGGCGTGTCGCCGGCCCCAGCCAAACCATCCATGTCGGCGCCGCGGCGCATGCTGCTG +TTCGGCCTGGTCGTCGTCGTCGCTTTGGCGGTGCTGTTGTGTTGCTGGGGATTTCGCGTCCAGCGGGCAC +GCCATGCGCAGGACCAGCGTGGTCACTTCCTGCAAGCGGCCCGGCAGTGCGCGCTGAACCTAACGACCAT +CGACTGGCGCAACGCCGAGGCGGATGTGCGCCGCATTCTGGACGGCGCCACAGGCGAGTTTTACAACGAC +TTCGCCCAGCGGTCCCAGCCCTTCGTCGAAGTACTGAGGCACGCAAAGGCCAGCACGGTCGGCACGATCA +CCGAGGCCGGGCTGCAGACGCAGACCGCCGACACGGCCCAGGCGCTGGTGGCGGTGTCCGTGCAAACGTC +GAATGCCGGCGAAGCCGACCCGGTTCCACGAGCGTGGCGAATGCGCATCACCGTGCAGCGGGTCGGCGAC +CGGGTCAAGGTGTCCGACGTCGGGTTCGTGCCGTGAGCTGGTCGCGGGTGATCGCCTACGGGC +>RD8_10 +CGCTGCCGTCGGACGCCTACATGATGGGCATCGATCCGGTCGAGCAGCGACGAATGATGCAGGAGTCCCT +CGAGGCGATTCTCGCGCTGTTCCGTGCCGCACCTGACGAGCGAATCGACCGCCACTCCGACTGGTTCACC +CTGCGTGAAGCGCAATTGCACATCCGCCCCTACACCTGGCCGTACCCCGAAATCGCTACCGCAGCCATGA +TTTCGCCATCGGGTCCGCGACTGGCCGGTGCGCTGGGCACGTCGCTGTTATCACTGTCGATGTCAGTGCC +CGGCGGCTACGCTGCGCTGGAAACAGCGTGGGGCGTGGTGCGGGAGCAGGCCGCCAAAGCTGGGCGGGGC +GAGCCGGATCGCGCCGATTGGCGGGTGTTGAGCATCATGCACTTGTCGGACAGCCGCGACCAGGCGATCG +ACGACTGCACTTACGGGTTACCCGACTTCTCGAGGTACTTCGGCGCGGCAGGGTTTGTCCCGTTGGCGAA +CACCGTGGAAGGCACCCAGTCGTCTCGGGAATTCGTCGAGCAATACGCGGCCAAGGGAAATTGCTGCATC +GGCACGCCCGATGACGCGATCGCCCACATTGAAGACTTGCTGCACCGGTCGGGTGGCTTCGGAACGTTGC +TACTGCTCGGCCACGACTGGGCCCCGCCACCGGCAACCTTTCACTCCTATGAGCTGTTCGCCCGTGCTGT +GATTCCTTATTTCAAGGGACAACTCGCGGCGCCGCGGGCGTCGCACGAATGGGCTAGAGGCAAGCGCGAC +CAATTGATTGGCCGCGCCGGCGAAGCGGTCGTCAAAGCCATCACCGAGCACGTCGCCGAACAAGGGGAAG +CGGGCAGCTGACGCGGGCGCAGTGTTCCCAACGACGACATGCCCGTGTATCGGGCGCCAAAGTCGACGCT +GATCGGCCCGCCCTGCGCGGACCCAACTTAGGACCCGGGT +>RD10_11 +CACCGGCTCACCCCGGAGCGCAGGTTCGCCACCGCCACCCTGGCGCTGATTGACGTGAAGGCGACGGCCA +AGTTGCTGGGGGCGACGATCAACGACATGGTGCTGGCCATGTCGACCGGCGCTCTGCGTACCCTGCTATT +GCGCTATGACGGCAAGGCCGAACCGCTGCTGGCGTCGGTCCCGGTGAGTTACGACTTCTCACCGGAGCGG +ATCTCCGGTAACCGCTTCACCGGAATGCTGGTGGCGCTGCCTGCCGACTCCGACGACCCGTTGCAGCGGG +TGCGCGTCTGTCACGAAAACGCGGTCTCCGCCAAGGAGAGCCACCAGCTTTTGGGACCGGAGTTGATCAG +CCGCTGGGCGGCTTACTGGCCACCTGCCGGTGCGGAAGCCTTGTTCCGGTGGTTGTCTGAGCGCGACGGG +CAGAACAAGGTACTCAACTTGAATATCTCGAATGTTCCCGGTCCGCGCGAACGCGGCCGCGTGGGG +>RD12bovis_12 +CCACTTTTCGCCAGGCTTGCGAGGCTGTGGCGCCGGGCGGTGTAGTGGCGTGGGAGGCATGGCGGCGGCC +CATCGATGTCGCTCGGGATACCCGTCGAGCCGAATGGTGCTTGAAGCCAGGCCAGCCCGAGTCTGAACTT +CCCGCCGGCTTCACGGTGATTCGGGTGGTCGACACCGATGGTTCAGAGCCGTCGCGGCGCATCATCGCCC +AACGGTCACTGTGAACGGTCCCTGGTTGTATGCGCACGTCCTTTGTTGAGAACCCGTTTCGCACCGCTCC +GATACCGCCAGTCTGATGCACCGACCGCGCCGCCTCCCACCCGCGGAAGCTAACGAGGTGTGCATGAAAC +CGGGGCGGTTCAGCAGCCCGGTTAATTGACAATCTGTGAAGAGGTTCCCACGACAATGGGCACGTTGGGC +TCGCGATGTCGCGCGATTCGAGCGAGGTTGGGTGACGTTCCCGTTTGAGGATCTCGCCCCAGGGCGATGG +GTTGGCGGGATGTCGATGTACCCGGAAGAGCAAAACGTGGCATGCGATAACGATCCGAGAGGAGTGCGAT +GACAAGCACCTCGATTCCGACGTTCCCGTTCGACCGGCCGGTCCCGACGGAGCCGTCCCCAATGCTGTCG +GAACTGAGAAACAGCTGTCCGGTAGCCCCGATAGAGTTGCCCTCGGGGCACACAGCATGGCTCGTCACTC +GCTTTGACGATGTAAAGGGAGTGCTGTCCGACAAGCGTTTCAGCTGCAGGGCGGCAGCGCACCCGTCGTC +GCCCCCGTTCGTGCCGTTCGTGCAGCTTTGCCCCAGCTTGTTGAGCATCGATGGGCCCCAACACACCGCG +GCCCGCCGTCTGCTCGCGCAGGGCCTAAATCCCGGCTTCATCGCACGCATGCGGCCCGTTGTCCAACAGA +TCGTCGACAATGCGCTCGACG +>RD12can_13 +TACGGGCAGGTAAGGCCGGTGGGCGTGTCGTAGCCCAGTAGTGGGCGGTCATCGCGTGATCCTTCGAAAC +GACCAGCAAAAGTCAATCGAAGGAAATGACGCAATGACCTCTTCTCATCTTATCGACGCCGAGCAGCTTC +TGGCTGACCAACTCGCACAGGCGAGCCCGGATCTGCTGCGCGGGCTGCTCTCGACGTTCATCGCCGCCTT +GATGGGGGCTGAAGCCGACGCCCTGTGCGGGGCGGGCTACCGCGAACGCAGCGATGAGCGGTCCAATCAG +CGCAACGGCTACCGCCACCGTGATTTCGACACCCGTGCCGCAACCATCGACGTCGCGATCCCCAAGCTGC +GCCAGGGCAGCTATTTCCCGGACTGGCTGCTGCAGCGCCGCAAGCGAGCTGAACGCGCACTGACCAGCGT +GGTGGCGACCTGCTACCTGCTGGGAGTATCCACTCGCCGGATGGAGCGCCTGGTCGAAACACTTGGTGTG +ACAAAGCTTTCCAAGTCGCAAGTGTCGATCATGGCCAAAGAGCTCGACGAAGCCGTAGAGGCGTTTCGGA +CCCG +>RD105_14 +GCCGCGGGATCAGACGGGCTCACTGTGGCCCGCATCGAGTCCGAGATCGGGGCCTTGGAGTTCCTGAACG +AACTGCGCACTGAACTCAAGAGTGGACAGTTTCGACCTCAACCGGTGCGGGAACGCAAGATCCCCAAACC +GGGCGGGTTGGGCAAGGTACGGCGGCTGGGGATTCCCACAGTGGCCGACCGGGTCGTTCAGGCGGCGTTG +AAACTGGTGCTAGAACCCATCTTTGAGACCGACTTCGAGCCGGTCTCCTACGGGTTTCGGCCCGCGCGAC +GCGCGCACGACACGATCGCTGAGATTCACTTGTTCGGCACCCAGGAGTATCGCTGGGTGCTCGACGCTGA +TATCAAGGCGTGCTTTGACCGCATCGACCACGCGGACCTGATGGACCGGGTGCGTCACCGGATCAAAGAC +AAGCGGGTGTTGCGGCTGGTGAACTGGCAGCGCATTCGGCATCGCTGGAATTGGACCGACGTCCGCCGCT +GGCTCACCGACCCCACCGGGCGGTGGCACCCCATCAGCGCGGACGGGATCACCCTGTTTAACCCCGCCGC +GGTGCCCATTCGGCGATACCGCTATCGGGGCAACACGATCCCCACTCCCTGGACTCAGG +>RD239_15 +CATGCACGGCGTGCAGGTGTGGAGTGGCCCCACAGACGCCGAACCGCCCGACCGGCCGATCCCAGGCCCG +CTGAAGTGGGACCTGACCCGTGGTGTGGCCACCGACACCCCGGAGTCACTGACCAACAGCGGCAAGAATC +CCGAGGTCGAGATCACCTACGGCCGAGCCTTCGCCGAAGACCTGCCGGCGCGCGAGCTCAATCCGAACGA +AACCCAGGTGCTTGCCATGGCAGTTAAAGCCAAGCCCGGCAAAACACTATGCAGCATTTGGGATCTCACT +GATTGGCAAGGAACACCCATCCGGATCGGCTTCGTGGCGCGAAGCGCTCTGGAGCCGGGACCAAACGGCC +GCGATCACCTGGTCGCCCGGGCAATGAATTGGCGTGCTGAGACCAAGGCCCCTGCAGTGCCCGTCGACGA +CTTGGCTCAGCGGATCCTTATCGGACTGGCGCAGGCCGGAGTCCACCGGGCACTGGTCGATCTCAAAACC +TGGACCCTGCTGAAATGGCTCGACCAACCCTGCTCTTTCTACGACTGGCGGCGTAGCGCGGCCGATGGGC +CTCGTCTACATCCCGACGACCAGCACGTGATCGACGCCATGACAAGAGACCTCGCCAACGGATCGGCCAG +TCATGTGCTGCGCTTGCCTGGGCACGACGTCGATTGGGTGCCGGTCCATGTCACCGTCAACCGGATAGAG +CTCGAACCGGATACCTTCGCTGGACTGGTCGCTCTGCGACTGCCCACCGACGAAGAACTTGCCGACGCCG +GACTGCCGAAAGCCACCGACGTCACCACCTGACAACCAGTCCTTTCGACTCAGCAACGGCAGCTGCCGAT +CCG +>RD750_16 +TCTGCGCCAACGGCCCAGGTCGTCCGAGAAGGCCAGCCTTGACCTGTACAGCTGTGGCGACCCGAACGTT +GCACAGCTTGGC +>RD142_17 +CCGCAGGACATCGCCGCATGGCACGACGGTGACGCCAGCGTGTCATCGGTTGACCCGGCTGACCGGGTGA +CTCTCGACGACGAGGTCCGGCTGGCTTTGCTGATCATGCTCGAGCGCCTCGGCCCCGCGGAGCGGGTGGT +GTTCGTGCTGCACGAGATCTTTGGGCTGCCCTACCAGCAAATCGCCACGACGATTGGCAGCCAGGCCTCC +ACATGCCGGCAGCTGGCTCATCGGGCCCGTCGCAAGATCAACGAATCGCGCATTGCGGCCAGCGTGGAGC +CAGCCCAGCATCGCGTCGTCACCAGAGCTTTCATCGAAGCCTGCTCCAACGGAGACCTGGACACCCTGCT +CGAGGTGCTGGATCCGGGTGTCGCCGGCGAGATCGACGCCCGCAAAGGCGTTGTCGTCGTGGGCGCGGAT +CGGGTTGGCCCGACCATCCTGCGCCACTGGAGTCACCCCGCCACCGTCCTGGTAGCCCAGCCGGTGTGCG +GTCAACCGGCGGTGCTGGCCTTTGTCAACCGAGCGCTTGCCGGCGTGTTGGCCCTGTCGATCGAGGCCGG +CAAGATCACAAAAATCCATGTCTTAGTGCAGCCT +>RD150_18 +GTATAGTTGCAGTATTAGACGAACGGGGTCGCCGGCGACGGGTGCAGCATGATTTCGGAGTCGTTGGCGC +ATACTGTCCGGCCAGCGTGCCGCAGTAGCAAGCTACAAGCCGCCGCGGCAGCAAGTACGGCGGCGACGGT +CAGCAACGCGAGATGGTAAGTGCCGGTGGCGTCTTTGAGGTGGCCGGTGGCGTAGGGACCGGCGAAGCTC +GCCAGACTGGCCACGGCATTGACCGTCGCGATGGCCACGGCGACCCGGGGACCGGCCAGCGCGGCGGTGC +AACGGCTCCAGAAAGCGGGCATCGCGGCAAGGATTCCGGCGACGGCGATGGTCAGCCAACTCAGCGTCAC +TATCGGTGACATCGGACTCAATGCCGCACCGAGCGCGGCGCTGCCCGCGGCCGTTGTTGGCAGTGTGATA +TGGCCCGCTTGGGCGCCCGAGCGGTCGATGCTGCGGTGGCTCCAGGCCAACATGGCCAGCGCGGCGACAC +CGTACGGCAGGGCCGCCAACGTGGCAGCGGTCAGCGTGGCGGTGCCGTGTGCCAGCGACGCAACTAGTTG +GGGCAGAAAGAACTGCAACGCATACAGCGCGAAATACAGGCCCCCGTAGACGACAGCGAAAAGGACAAGA +TCCCAACCGGCTCCACTCGACCGACCGGTCGGGGCAGGGGTGTCCTCGGTCAGCCGGGCCGACAGCTCTG +CACGTTCCTCGGGGGTGAGCCAGCTTGCCCGTTGCGGGTTATCCGGCAACAGGCGCCGAAGAAGCGGCGC +CAGCAGCAGTGCAGGCAATGCCTCGATCACAAACATTGCCCGCCAGCCGGGTAGCCCGGCCATGTGAACG +TGGCCGACGATCAGCCCAGACAGCGGCAGGCCGACCGTGTTGGCGACCGGAATGGCCAGCAGAAAGGTGG +CTACGGCGCGGGCTCGCTGCGCGCACGGAAACCACACCGTCAGATACGCGATGACGCCGGGGAAGAAGCC +GCCCTCGGCGACGCCGAGGGCGAAGCGCGCCAGATACAAGGTGTGCGCGCTGGTGACCAAGGCCGTGGCC +GCCGAGCACACACCCCAAGCCAGGACGACCGCCGTGAGCGTTCGACCGGCACCGAAGCGCGCCAACGCCG +CGTTGGCGGGAACCTGGAACAGGACGTAGCCGAGGAAGAAGACGCCGGCGGCGGTGCCGTATGCGGTGGC +GCTCAGGCGCAGGTCGGCGTTCATCGCCAGGGCTGCGACCGAGATGTTGGCCCGATCAACGAAGTTGATC +ACATACAACACGAACAGCAGGGGCAACAGCCGGCGCGCGGCCTTGCCCAGGGCATTGTGCGTGGGGCTTG +CCGCGATTGTCGCCACCTGCGGCTCCTTCCGTGGGCCTGTCGAACAATTGCATCATGAAATGACCCCAAC +CCGGTCTTTGTAGTCCGGCGTGTCACTAACACGATCGGTTATGTCATTGCAGTAAAACGGATTTGGCGTT +GCGCCGGATGTGTTTCGCCGTCAATCTCGGCGTAGGGGCCGGCGAAGAACAGGCTCCGGCCCGCCCGCTG +TGGTGGGGCGAGCAGGATGTCGCGGCCGATCGACCACGCGATGTGGTTGGCCTGCAGGTTCGCGAACAGG +CCGTGGGTGCCGTACTTCGTCGCACAGGAGGCGTCCGCCGGTAGCCAACCCAGCCCAGCGACGAAGAATT +CCGCCCAGCAGTGGTAGCCGCACACCTCGCAATCCTGCGCACCGGGCTGCGGTAGCTCCAAGGCCTGACC +GAGCACAAATCGTGCGGGGATGTCGACCGATCGGCACAGCGAGACGAACAATGCGTGGATGTCGTTGCAG +TTGCCCACCGAGCAGGTCAGGGCATGCTCGGTGCTGCCCAGGAAAGACTGCTTCGTCGCGTCGTAGTCCA +TGGCGCCGGTGACGTAGTCGTAGATGCGACGGGCCTGTTCGAGCGGGTTGGTCTCGGGGCCGACGACGTC +TTGGGCCAACGTACGGGTGCGCTCATCGACATCGACATGTGCTTCGGGGATCAAGGCGCGGCTGAACAAT +TGCGCCGTGGCCAACGGGCGGGCCCGTGCCGGATCCGGAGCATGCCCGATCGCCCGGCGTTCCACAACAT +AGCGGATAGACCAACTCGCCGCCGTCGCCAAGCGCAGCCGGCTGTACAACATCAGGTTCCCGAACTCCGG +CTCACGCGTGAGGTCATAGGGATCCTCGCTGGTCACCTCGACGTCCAGAACGCGTTGAAACGCGCCGTCA +CCGATGACCGGGCACCACATCTCGACGGTGTGGGCACCTTGGGTGGAATCGATCGTGATGTGATCGGTGA +TTTCGAACAGCCCGATCGTCGCATCCGCGTGTGCGGATACCGCGGGGTCGGTGATCGTCATCGGTTAGCT +CCTTCCGCTGAGACTGGTTTATGTTCGAACAACCGGCAGATCGGCTGCCAGCCATTCGGAGAACCCGCCG +TCGAGTCGGCGGGCAGAAAATCCGTTGGGGCGCAACAG +>RD181_19 +AGCCGAACCAGGCCAGCCACCACCAAGACGGCGCGGGAAAGGCCAGCGCGGGTAACCCGCCGAACACCAA +CGCTGCCGCACAACCAATGACCGGTTGTCGCCGGGCTCCCGCGCGCAACGCCATGCCGATCAGCATGCCG +GCCACATTCGCCTGCGTCGAGGAAAAGAGCAGACTAAGACCGGCAGTCCCCGCCAGAAAGGGAGTGATTT +GCATGGCCAAGGATCTGGTCGCCACGGTGCCCGATCTTTCCGGGAAGCTGGCAA +>RD207_20 +AAGTCGCCTCAAAGCAACGGTTGGGGGCTCCAGGCTGATTGTCGTCGTTTGCGTCCCACATTAGTTCGAG +GTCGTAAGCGTCTGGGGATTCACGATCGCCAGGCTTGCTCAATGCCCGGGCAGGCGTGCTTACTTGCACA +GCGGTGGTCCTGGGAATGCCAAACACATTTATGGCCACCAGCGCCGCCTGCATCGCAGGGGTGCCGGAAC +TGGTATTCAGCAGAATGGTTCGATCAGGGAACTCAGCCGACAGTTCAACCAGGTGGTTGCGGAAAACCGG +CACGAAAAGGTCGAACCTGTGCACCGACGGGTTGGTATAGGTGACTATGCGAACGTCGGTCTCAGGCGCG +AGCCGCGTGATTGCCGCGGAGTACCGCCGGTCCGCGTTCTCAAAGGCAGCTATCTCGGCGCTGAGGAATA +GCACGACAACTATTGGTCGATAGTGGCGGACGATGTGTAGCATCGGGCCGTCGCCGAGCGCGGTGATCGG +GTCCGCAGTTCCGATAGGCGAGAACAGGATCATTCGGCTCTCCTGATCGACAGCTCGCACTGACCCATCT +CGTAGCATATGTTGTCGATCTTGGTTCGCTTCAAGACAAGTGGTGAGACGCGTAGTTCGCGCGTCTTGTC +GACGTGCTTGACTACCTTCCCGAACTGGGCGTCGAGCACCTTCGCCATGTCGTCTTGGTCGGTGACAAAG +GTCTTGCTCCGATAGCCGGCTCCGCCGCCCAGATAGACAATTGGGCCAACTATCGCGTTCACGCCAGGGT +ACATGGCTCTGTACTCCGCGTAACGCGCCTGATTCACGGACGCGGCTGTCTCGGCCAGCGTTTCAAGGAA +CCGCTCGCCCTCACGCCAGCCGCCGCGAGCGGTGGGACTGGTGTCGACCACCACGCGGTGCGAGATTGAG +GTTCCCGGCGCCAAACATTCCCGGAAGAGCGGCAGGCCATCAGGCTTGCCGTGGACATTCATGTCCATCT +TCTGGCAGATCAGCAGATCGCTTGTTCTCAGTGCAGGTGAGTCGGTGACCCTGATCGCCTGAAACAGGTC +GTTGACCGCGTCTTGCGGACGGGTGTTGGGGCGCCCCGATTTGCGCAACTCCTTCCGCTCAAACCTTTCG +CCGTACTGCCGGTGCTCCCGCGTCTGGTGTCCCGGAACACGAACAGGTTGGGCCGTCCGCTTATGCACAA +GCGACTGCAGGTAGATGCTGCGAAGCATTCCCTTGACAGTCGAACCCGGCACGTAGGGCCTTCCAAGAGG +GTCTTTGATGAAAGCGTGAATCTCGTTGAGCGTAAGCTTCTTTCGAGTCATGCGCCCGCCTCGACCACGA +GATGCACGTCGCGGTTCGATCGACCCGATCTTCACCTCGTAACCTCGATGCTTAGCAGGATCCAGCTTGA +CCGCGTTTGGCTCTACCCACTCTTTGAGTGGCGCCGTCGCCTGTGCCCCATCGGTGTTCATGACGAACGC +TTCGAAAGACTTCCTCTTGTGAGCCGGAATGTCTGCGTAAAGAAGTTCCATGTCCGGGAAGTAGACCCGG +TCGCCCTCCACGTGGTACTCCTTCGAGGTCCGCTTCTCGCCGGATCCGATAAACACCGGCCCCAGGCACC +GCAGCGTGAGTTCGAACGGCTTCAGGTAGGTGTTCATGCGGCGGACTCCGGGAGTGCGAGAAATAGCGGT +CGCGCGTAGCTGTAGACCGGATGGTTTCCGCCCAGGCTGACGTCGAGGATGCCTCCTTGGAAGGGTCGCG +AGAAGACCGAGCCGGCGGCGAATTTGTAGATGTCGCGTTTGCGCAGGGGCATGTCAGCGTATGTGCTCGA +CGCGACGAATCCACTGCGCTTGACGAGGCGGTACGTCGCGCCGGCGAGTGCGGCTTCGAGCTCGTCGTCC +GTGGGTAGGGATGTCGTGAGCGTCATCAGACTGGCCGCGTCGACTGTCGGCGTGAGTGCGGCGGGTGCTT +CTGACTCGGTAAGGTTAAACGCTCCGAACCCGCTTGTCCGTTCGCCGCCCAGCGCGGAGATCCCTTTCAA +CAGCCTGGTGAGTAGGCCGAGCTCGGACTCGGATCCGGTCGCCAGCAACCACAGACCCGCGTCCAGCTCG +AACCGGAAGTAGCCGACACGGTACGGGTCGGCGTCTTTCTTTCCGTTGTGGATCGCTGCCTTCGCTGACA +CGGCGTGGACACCGATCTTGGTCTGCCGCG +>RD115_21 +CGGTCTCCTCGGGGGTTCGGGCACCGAGATCCAGCCCGATCGGGCTGGACAACCGGCTCAGCTCGGCGTC +GGTCAGGCCCGCCGCGCGTAGCCGATCCATCCGGTCGTCGTGCGTCTTGCGTGATCCCATCGCCCCCACG +TATCCGACACCCAGGCGCAGCGCCACCTCGAGCACCGGGACGTCGAACTTCGGATCGTGGGTGAGCACGC +AGATCACCGTGCGCTCGTCGATACCACCCGCCTCCGCCTGGGCAGCCAGATAGCGGTGGGGCCATGCGAC +GACGACGTCATCGGCCGTCGGAAAGCGCGCTGGCGTGGCGAATACCGCGCGGGCGTCGCAGACGGTGACC +CGGTAGCCGAGGAACGAACCCTGCCGCGCCAGCGCGGCGGCGAAGTCGATGGCACCGAACACCAGCATCC +GCGGGCGCGGCGCGTGGCTGGACACGAAGACCTCCATGCCCTCGCCACGCCGCTGCCCATCGGGCCCATA +TTCGAGGATCTCGCTGCGGCCCACCGCGAGCAGACCCCGCGCATCGTCGATAACCGCCGCATCGGCACGC +GCCGAACCCAGCGAACCCGTCACGGGGCTCTTTGTGTCGGGCCGGATCACCAGTCGGCGACCCACCCGCC +GCTCGTCCGGATGGGCGATGACGGTCGCGATGGCGACCGGGCGTTGCGCGCCGATGTCGTCGGCCAGCTC +GCCCAGCTCGGGAAACGTGGCCCGCGATACGGGCTCGACGAAGACGTCGATGATGCCGCCACAGGTCAGG +CCTACCGCGAATGCGGTATCGTCGCTGACTCCGTAGTGTTCCAGCCGCGGTATCCCGGTTTGGGCCACCT +CGGCGGCCAGCTCATATACCGCACCCTCCACGCAGCCGCCCGACACCGACCCACTTACCGAACCGTCCGG +GGCTACCACCATCGCGGCCCCTGGGGGCCGCGGCGCTGACCGCAAGGTTCGCACCACCGTCGCGACCCCC +GCGGTGTCACCGGCGGCCCAGATCGCCATCAGCTCGGCAAGCACTTCACGCACGCTTCCCAAAGTAGGCT +TCAGTGCATGACCCCGGCTCAACTTCGGGCCTATTCGGCGGTGGTTCGCCTGGGCTCGGTACGGGCGGCC +GCCGCGGAACTCGGTCTTTCCGACGCCGGAGTCTCCATGCACGTCGCGGCGCTGCGCAAGGAACTCGACG +ACCCGCTGTTTACCAGGACCGGTGCCGGGCTGGCGTTCACGCCCGGCGGGCTGCGGCTGGCCAGCCGCGC +GGTCGAAATCCTGGGCCTGCAACAACAAACCGCGATCGAGGTCACCGAGGCCGCCCACGGGCGTCGGTTG +CTGCGCATCGCCGCCTCCAGCGCCTTCGCCGAACACGCCGCGCCGGGCCTGATCGAGCTCTTCTCGTCTC +GGGCCGACGACCTTTCGGTCGAGTTGAGCGTGCATCCCACCAGCCGGTTCCGCGAACTGATCTGCTCGCG +CGCCGTCGACATCGCGATCGGCCCGGCCAGTGAGAGCTCGATCGGTTCCGACGGCTCGATCTTTCTACGG +CCCTTCCTGAAGTATCAGATCATCACCGTCGTCGCGCCGAATAGCCCACTGGCCGCAGGCATTCCGATGC +CCGCGCTGTTGCGTCACCAGCAATGGATGTTGGGTCCGTCCGCCGGCAGCGTAGATGGTGAGATCGCAAC +CATGTTGCGCGGCTTGGCGATTCCGGAGTCCCAGCAACGGATCTTCCAGAGCGATGCCGCCGCGCTGGAG +GAGGTCATGCGCGTCGGGGGCGCCACGCTGGCCATTGGCTTTGCGGTCGCCAAGGATCTTGCCGCCGGAC +GGTTGGTGCACGTGACCGGTCCTGGGCTGGATCGCGCCGGCGAGTGGTGTGTGGCGACATTGGCGCCTTC +GGCCCGCCAACCCGCCGTCTCCGAGCTTGTTGGCTTCATCAGCACCCCGAGGTGTATTCAGGCGATGATC +CCGGGTAGCGGGGTCGGGGTGACGCGGTTCCGCCCAAAGGTCCACGTCACCCTGTGGAGCTAGCTACTTC +GACTTGAAAGGCTCGGCGCGCCGGTCCGCCCGTTGACGGGGCCCGGCTGCGAGGATTAGCCAGTTCCCTT +GTCGCACAGGAGCGTTGAGGCTATCGCCGTACGCCTACTGCGTGCGATCAGCGCTTGCTCGTTCCATACC +ACAGGGTGCGGCCCAGGTGCAAGGTTCACTG +>RD122_22 +CGTTGCCCCTTGATCAATACGTTAGTGAGCGCTAACGTATTGGCGTGTGCCCGACATGCTGGAAGTCGCG +GCAGAGCCAACCCGGCGCCGGCTGCTACAGCTCCTGGCACCGGGTGAACGCACCGTTACCCAGCTTGCGT +CGCAGTTCACGGTCACCCGTTCGGCGATATCGCAGCACCTCGGCATGCTCGCCGAAGCGGGATTGGTTAC +CGCCCGCAAACAGGGCCGGGAACGGTACTACCGGCTCGATGAGCGCGGGGTGCTGCGGCTTCGTGCGCTC +ATGGAGTCCTTCTGGAGCGACGAGCTGGACCGTCTTGTCGCCGATGCCGCCCACTACCCGCCGTCACAAG +GAGACTGTGCCATGCCGTTCGAGAAAGCGGTCGTCGTGCCCTTGGATCCGACCAGCACCTTCGCGCTCAT +CACCCAGCCCGACAGGCTTCGGCGCTGGATGGCCGTCGCCGCGCGTATCGAGCTGCGCACCGGTGGCGCT +TATCGCTGGACGGTGACTCCGGGGCATAGCGCGGCCGGCACCGTCATCGACGTCGACCCCGGCAAGCGGG +TGGTCTTCACCTGGGGTTGGGAGGACCACGGCGACCCCCCGCCGGGCGGGTCGACGGTGACCATCACGCT +GACCCCGGTCGACGGCGGCACCGAGGTCCGGCTGGTCCACGACGGGCTGACCGCGCAGCAGGCCGCCCGG +CACGCCAAAGGGTGGAACCACTTCCTGGACCGGCTGGTCGTCGCCGGCCAACGCGGTGACGCCGGTCCCG +ACGAATGGGCCGCAGCGCCCGATCCGCTCGACGAATTATCTTGTGCCGAAGCAACATTGGCCGTTCTTCA +GCACGTACTGCGCGGGATAGGCGCCTCTGACCTGACCAGGCAGACACCGTGTACGGAATATGACGTTTCG +CAACTGGCGGATCATTTGCTGCGCTCGCTGGCGATCATCGGCGCTGCGGCGGGCGCGCAGCTGGCGCCCC +GCGATGTGGACGCGCCACTGGAAACCCAGGTGGCCGACGCGGCGCAGGCCGTGATGGAAGCCTGGCGGCG +GCGTGGCTTGGCGGGCACGGTGGAGCTGAACTCGAACCAGGTGCCTGCGACGGTGCCGGTCGGCATCCTG +TGCCTAGAATTTCTGGTCCACGCTTGGGATTTCGCGATTGCCACCGGTTCTC +>RD174_23 +AGCTCATGCGTAACCACGGTCAGTTCTCCTTTGCTTTGTCCTGTAACCACAAGTCGTGTCGTCTGCTGCT +CAGCTACCTGTCATCTCGACCGCCTCCCCGGACGCGGCGCGCTCGGCGACACAGGGTTGGTCGGTATCCA +CCGCGAGAACGACCTGGACCAACTCGCCCAAGGCTCGCGCCAGGTGACTGTCGGCCAGCGCATACCGAAC +CTGCCGGCCCTCATAGGTTGCGACTACCAGCCCGCAGCCCCGCAAACACGACAGATGGTTGGACACATTC +GATCGGGTCAACCCGAGGTGCGCAGCTAGCTGGCCGGGATAGCAAACGCCATCCAGCAACGCCACCAGAA +TCCGGCACCGCGTCGGATCAGCCAGAGCCCGGCCGAGTCGAGCCAGGGCCGATTCCCGCATCTCACACGT +CAGCATAGATCAAATAGTACACCATATACTGGTATAACAGCAAGAGCTGAATTGTACATCCATAGCAGAT +ATGATCGGCGCGCGTCACAAGCTTCCGGCCGCAGAGCCGCCAACTCACGATATCGTTAACCGATATCCCG +AGCCGATAGCTGGCGGGCTCGGGTGGTGGCCAGCGGCGCTGCGACGAAAGGTGTGACCGTCATGAAACAG +ACACCACCGGCGGCCGTCGGCCGTCGTCACCTGCTCGAGATCTCAGCATCCGCAGCCGGTGTGATCGCGC +TTTCGGCGTGTAGTGGGTCGCCGCCCGAGCCCGGCAAAGGCCGGCCCGACACAACCCCGGAACAGGAAGT +CCCGGTCACCGCGCCCGAGGACTTGATGCGCGAACACGGAGTGCTCAAACGCATCCTGCTGATCT +>RD182_24 +GTCAGTTGCTCACCGGGTCGGCCCGTCGCTGACCGGACGACCCGTTGTTCGGGTGCGTGGCACACGACAC +CAACCGGTATCGTCTGTTGCCGTGACTTCTCCGATTGCTCCGAATACCAAAAGCGACGGTTCTCGCTGAT +GACTACCCCACCCGACAAGGCGCGGCGCCGGTTTCTTCGCGACGCCTACAAGAACGCTGAGCGCGTCGCA +CGAACCGCTTTGCTCACAATCGACCAGGACCAGCTTGAGCAGCTGCTCGACTACGTCGACGAGAGACTCG +GCGAACAGCCTTGTGACCACACCGCCCGGCATGCGCAACGATGGGCCCAATCACACCGCATCGAATGGGA +GACGCTGGCCGAGGGCCTACAAGAGTTTGGTGGCTACTGCGATTGTGAGATCGTAATGAATGTCGAACCT +GAGGCGATCTTCGGCTAGTCCTCTGCCGGCGATGTTCTCATAACGACATGGCAAGCCACGCGCTTGACTA +AACTCAGCCGACGTCAAACCGCCTGTCCCCGATATGCCCTGCGAGGTTGCCTCGTGGCTGATGACTCAAA +CGACACCGCGACCGATGTCGAACCCGACTACCGGTTCACCCTTGCCAACGAGCGGACCTTCCTGGCCTGG +CAGCGCACCGCTCTAGGCCTGCTGGCCGCGGCGGTCGCCCTGGTGCAGCTCGTCCCGGAACTGACGATCC +CCGGCGCACGCCAGGTGCTCGGTGTGGTGCTCGCGATTTTGGCAATCCTCACCAGCGGAATGGGTCTGCT +GCGCTGGCAGCAGGCGGATCGCGCCATGCGCCGGCACCTGCCATTGCCCCGTCACCCCACACCGGGCTAC +CTCGCGGTGGGGCTCTGCGTGGTCGGGGTCGTCGCGCTCGCATTGGTGGTAGCCAAGGCGATCACCGGGT +GAACCGTCACTCGACGGCAGCGAGCGATCGCGGGCTGCAGGCCGAACGGACGACGCTGGCCTGGACCCGG +ACGGCCTTTGCGTTGCTGGTCAACGGCGTGTTGCTGACGCTCAAGGACACGCAAGGCGCCGACGGGCCGG +CTGGGCTGATCCCGGCCGGCCTAGCTGGTGCTGCGGCCTCGTGCTGCTATGTGATCGCTCTACAACGCCA +ACGAGCACTTTCGCACCGCCCGCTACCGGCACGAATCACTCCCCGCGGCCAGGTCCACATCCTCGCGACA +GCGGTGCTGGTGCTTATGGTCGTCACCGCCTTTGCTCAACTGCTCTAGCGC +>RD183_25 +CGATAGCGCCACCAACCGACACAGATCGTCACGAACCGAAACCGGGGCCGGCACCCTTCACACGCTACTG +CGCCTGGCTCACCGAGGACATGTGGAAGTCGGGAATCCGCAGCGGCGGCATCGCGGTACGGGTAACCCAA +TCTGACCATTCGCGCGGCAGTGTCGGCTCGCTGACACCTGCTTCGGTGGCCCGTCGCAGCAGGTCCAGTG +GGCTTTCGTTAAACCGGAAGTTGTTGACCGCCGCGCTGACCTCGCCGTCTTCGACCAGGTAGACACCGTC +GCGGGTCAGCCCGGTGAGCAGCAGCGTGGTCGGGTCGACCTCGCGGATGTACCACAGCGTGGTCAGCAAC +AGTCCGCGCTCGGTGCCCGCGATCATGTCGGCGAGATCGGCCGACCCGCCGGTCATGATCAAGTTGTCGG +CGGCGACCGCAACTGGGGCGTCGAATTTGGCGGCAGTGGCCCGTGGATACGCCAGCGCATTGATCACACC +GCTGCGGATCCAGTCCACCTGGCTGATTTCCATGCCGTTGTCGAACACCGATTGCGTCTCCGAGGAGTTG +CTCACCGCCACAAACGGCGTACACGCCAGACCCGGCGCAGCCGGATCGGTGAACAACGTCAGCGGCAGCT +CGGTCAACCGCTCTCCCACCCGGGTTCCACCGCCAGGAGCCGAGAAAGCGGTTCGGCCCTCCTGCGCGCC +GCGCCCGGCCATCGACCAACCCAGGTAGATCATCATGTCGGCCACCGTCGACGGAGGCATGATGGTCTGG +TAGCGCCCGGCCGGCAGCTCGACGGTGCGTTGCGCCCACCGCAGCCGCGTCGACAGCCGCTCGAGCATCA +GATCGATGGGCACCTCGACGAAATCGGGTGTGCCGATCCCCACCCAAGCGCTGGCGTCGCCGCGTTTGGC +GTTGATCTCGATCGCCCCGGTGGGCTGGGTGTAGCGGCGGCGCAGACCCGTCGACGATGCCAGAAACGTC +GTGGACACACTGCGGTGCGCGTAGCCGTACAAGCGGTCGGCCCCGCGGAAGCCCCTGCTCAGTGAGCCGG +CGATACCGGTGAAAACCCCTGCCCCGGTGCCCGGAACCGGGGCATCCCAGTCGTCGGGCTCTCCGGTATC +GGCAAGCAGCGGCGCGGCATCACCGGCCTCCGGCGCGGAGCGGGCCGCGTCCTGGGAGGACACCACCAGA +CCGGGCAGCACCGACGGGTCCACTTCGGCGGAGACCACGGAGCCGACGAAGGCGCTATCTCCCCGTCGGA +CGATCGAAATCACGGTGACGTTTCGGCTGTGGGAAACGCCGTTGGTGGTCATCGAATTGCCCGCCCAACG +CAGTGTCGCCTCGACCTTTTCGGTGACCAGCACCATGGTCTCGTCCGCCCGGCCAGACCTGGCCGCTTCC +TTTAAAACGATGTTGACGGCGTGCTGCGGCTCGATCATCGACCACCTTCAGTACGAGTATTGAGCACATT +GACGCCCCGGAACAACGCCGACGGACAGCCATGGCTGACCGCGGCAACCTGGCCGGGCTGGGCCTTGCCG +CAGTTGATGGCTCCGCCCATTCGCCAGGTCGACGGCCCGCCCACGGCTTCCATGGCATTCCAGAAATCGG +TGGTGCTCGATTGATAGGCGACATCACGCAGCTGCCCGTACAGCTGGCCACCTCGGATGCGGAAGAAACG +CTGGCCGGTGAACTGAAAGTTGTAGCGCTGCATGTCGATCGACCATGACTTGTCGCCGACAATATAGATC +CCGTCGTCGACCCGGCCGATCAGGTCCGCGGTGCTGAGGTCTTCGATGCCCGGCTGCAGCGATATGTTGG +CCATCCGCTGGATCGGCACGTGATGTGGCGAGTCGGCATACGAGCAGCCGTTGGAACGTGGCTCCCCCAA +CCGTGGGGCGAACGCCCGGTCGAGCTGGTAACCAACGAACACCCCGTCACGCACTAGATCCCAGCTCTGC +GCGGCCACTCCCTCGTCGTCGTAACCGACGGTGGCCAAGCCGAATTCGGCGGTACGGTCGGCGGTCACGT +TCATCACCGGCGAGCCGTAGCGCAGGGTGCCGAGTTTGTCTGGGGTGGCAAACGATGTCCCGGCATAGGC +AGCCTCGTAGCCGATGGCACGGTCGTATTCGGTTGCGTGGCCGATGGATTCGTGAATAGTCAGCCATAGG +TTAGTGGGGTCGATCACCAGGTCGGTGGGCCCCGGCATCACGCTAGGCGCTCGGACCTTCTCGGCCAACA +GCGATGGCAGCTGCGCGAGCTCGTCGGTCCAGTTCCAGATCTCGTCGCCGGCCACCACTTCCCAGCCCCG +GGCGGTCGGCGGAGCCAACGTCCGCATCGATTCGAAGTTGCCCGCCGCGGAATCAACAGCAACCGCATCC +AGGCACGGCAGCAGCCGCACCCGCTGTTGGGTAATCGATGACCCGAAGGTGTCGGCGTAGAAGGTCTGCT +CCTTGACGGCGTTCAAGCTGGCCGATACGTGGTCGATGCCGTCGGCGTCCAGTAACCGCCCGGAGTAGTC +GCGCAGCACGGCGATCTTCTCGGAGGCGGGAACGCCGAACGGATCGATCCGGTAGTTCGAGACCCACTCC +GCGTCGGTGTATACGGGCTCGGGCGCCAATCTGACCCGCTCGGTGTTCAGCGCCGCCAGCACGGTAGCCA +CGTGTACCGCATGGCGAGCGGTCGCGGCCGCGACGTCGGGTGCCAACTCAGCATGGGAGGCGAATCCCCA +CGTGCCCGCGACGATTACCCGGACGGCCAGGCCGAGCTCACGGCTGATCACCGCGGTCTCCAGCTCACCG +TCACGCAGTTGGATGATCTCGGTGCTAATGCGGTGAACCCGCAGGTCGGCGTGGCTGGCCCCGGCCGTGG +CGGCCGCCGACAATGCGGCGTCGGCCAACTGCTGGCGCGGCAGGTCCA +>RD193_26 +TTCGGCGCCGACCACTACCATGGCGCCGATGTTCTGCTCAGCGAGGCCAGCGAGCAGCTCCCCGACCGTG +GCGTCGGGGTTGATCGTCACCACCGCCGCCCCCTTGTTCCGCAAGACGTCCGCGATGCGCATCAAGGCCT +CCCGCCGGTGGTGAGCTGGTTCACACCAGGCTACGGCGAACTCGGGCGGCGGGAAAGCCGATACCGGAAT +ATGCGGCATCTAGCACCCGAACCCGCAGGTGCCCGGCGGTCGGTAGCTGCGTAGCCCGGGCAGGAATTCG +GCCGCCGACAACGCCCATGTCGGCCGCATCCTCGAGGCTAAAACTCGTTGGCCATCAGCCGAATCGGTCG +ATCGGGGCCGCTGGATCCATCGAGCTTGTCAGGATAGGGCCATGCTTGAGATCACGTTGCTCGGAACTGG +GAGCCCCATTCCCGACCCGGACCGTGCCGGACCATCCACTCTGGTGCGGGCCGGCGCGCAGGCGTTCCTG +GTGGACTGCGGT +>RD219_27 +GCCGGCGCCTACCTGTGCCGGCTGCTGCGGCACATGGCCGACAACGGCTACACGGTGGCAACGCCGCGCG +ATGCGCAGGACTGCGCGCTGGACGTTGGCATGTTCGACCAGCTGAACTCCGGCTATGTGAAGCGCGGCCA +GGACATCATGCCGCGCCAGGGCTCCAAGCATCCGTGGAGGGTGCTCATGCACTACGAGAAGGACGCCAAG +ATCCTGCTCGAAGACCCCATCGATGACGGCGTGCTGCACTTCGCCGCAGCGGCCCAAGACCACGCGGCGG +CCTGAGCATCATGAACCTGCGCAAAAACGTCATCCGGTCCGTATTACGTGGTGCCCGGCCACTGTTCGCT +TCCCGCCGGCTGGGTATTGCCGGCCGTCGAGTCCTGCTGGCGACGCTGACGGCCGGCGCGCGCGCCCCCA +AGGGCACCCGCTTTCAGCGCGTCAGCATCGCCGGTGTCCCGGTCCAGCGGGTGCAACCCCCCCATGCGGC +AACCAGCGGGACGCTGATCTACCTGCACGGCGGTGCCTACGCCCTGGGCAGCGCCCGGGGCTACCGCGGC +CTGGCCGCCCAGCTCGCGGCGGCGGCCGGAATGACGGCGCTGGTCCCCGACTACACCCGCGCACCGCACG +CCCACTATCCAGTGGCCCTCGAAGAGATGGCTGCGGTGTACACCCGCTTGCTCGACGACGGGCTCGACCC +GAAAACGACCGTCATCGCCGGTGATTCGGCTGGCGGAGGGTTGACCCTGGCGCTGGCCATGGCGCTGCGC +GATCGCGGCATCCAGGCCCCGGCCGCACTCGGCCTGATCTGCCCGTGGGCCGATCTCGCCGTCGACATCG +AAGCGACGCGACCGGCGCTGCGCGATCCGCTCATTCTTCCGTCGATGTGCACCGAATGGGCGCCGCGCTA +CGTAGGGTCCTCCGATCCGCGGCTGCCCGGTATCTCCCCGGTCTACGGCGACATGAGCGGCCTGCCGCCC +ATCGTCATGCAGACCGCGGGCGACGATCCGATCTGCGTTGACGCGGACAAGATCGAAACCGCCTGCGCCG +CTTCGAAAACAAGCATCGAGCATCGCCGGTTCGCGGGCATGTGGCACGACTTCCATCTGCAGGTCAGTCT +GCTCCCCGAAGCCCGCGACGCGATCGCCGACCTCGGGGCAAGGCTGCGCGGCCACCTCCACCAATCGCAG +GGACAACCACGGGGAGTAGTCAAATGAGCTCAT +>RD724_28 +ATTCTTGGCGACGCCGTTGCTGTACCGATAGCGACTGCCCCGTATCGATCCCAGGGAACGTGACCATGGT +CGTAGGGATGACTTGACAGTTTCAACGGGGTGCGACCACCGTTGCGCTCAGAAGGCATACGTTGGTGGAA +CACGTCGGAAAGCTGGGAGGTGAATCTGATGGCTGGCGACCAAGAGCTGGAACTGCGGTTCGACGTTCCT +CTTTACACGCTTGCCGAGGCATCGCGGTACCTGGTGGTTCCCCGCGCCACCCTGGCTACGTGGGCTGACG +GCTACGAGCGTCGGCCGGCCAACGCACCGGCGGTCCAGGGGCAACCGATCATCACGGCTCTTCCCCACCC +GACCGGCAGTCACGCTCGGCTCCCATTCGTCGGAATCGCCGAGGCGTATGTGTTGAACGCCTTCCGCCGA +GCGGGCGTCCCTATGCAGCGGATCCGGCCATCCCTCGACTGGCTAATCAAGAATGTCGGGCCACACGCGC +TTGCGTCCCAGGATTTGTGCACGGACGGTGCCGAGGTGCTCTGGCGGTTCGCTGAACGGTCCGGGGAGGG +CAGTCCTGATGATCTGGTGGTCAGGGGGCTGATTGTCCCGCGATCCGGGCAGTACGTCTTCAAGGAGATC +GTCGAGCACTACCTGCAACAAATCAGCTTTGCCGACGACAACCTGGCTTCGATGATTAGGTTGCCGCAGT +ACGGCGATGCCAACGTCGTCCTCGATCCACGCCGCGGCTATGGGCAACCGGTGTTCGACGGAAGCGGCGT +CCGGGTAGCTGACGTGCTCGGCCCATTGCGCGCCGGCGCGACGTTCCAGGCTGTCGCCGACGACTACGGT +GTGACCCCGGACCAGCTTCGAGACGCGCTCGACGCCATTGCAGCCTGATCGGAATCTCCTCGCCGACCTC +GATCACATCTTTGTCGACCGGAGTTTGGGCGCTGTGCAAGTCCCGCAACTCCTTCGGGATGCCGGATTCC +GGCTGACAACGATGCGGGAGCACTACGGCGAGACGCAGGCTCAGAGTGTCAGCGACCACAAGTGGATCGC +AATGACCGCCGAGTGCGGCTGGATTGGATTTCACAAGGATGCCAATATCCGGCGCAACGCCGTCGAGCGA +CGGACGGT +>RD726_29 +AACGGCCGACTTGGTGACCCCGTAGGCGCCGAACCAGCGATGGGTGTTGCTGGCCGCGATCGAGGAGATG +CCGACGAACGAACCGCCGCCGCCGCGTACCAATTCCCGCGCGGCGTGCTTGAGCACGTACATGGTGCCAT +TGACATTGAGGTCCACGGTGCGCCGCCAGGCCTGCGAGTCGATCTGGGTGATTGGCCCAATGGTCTGAGA +CCCGCCCGCGCAATGCACCACACCGTGCAGCCGGCCATGCCACGCGGTTGCCGCGTCCACCACACGCAGG +GTCTGCTCCTCGTCGGTGATGTCGGCCGGCTCATAGCCGATCGCTCCGGTCTTGAGCGCCTCGATGTCTT +TGACAGCCGCCGCCAGCTTGTCTGGATTTCGTCCCACGATCATGACGGCGGCTCCAGCCGCGACCAACCC +GGCGGCCACCCCCTTGCCGATTCCGCTGCCACCTCCGGTGACCAGGTAGGTCCGGTCTTG +>RD761_30 +CACTCGCACACTGCGACCGAAGCGTACCCGAGCCGTACGGACGTGAAGCTTGCCACCGAACCCGACGCGC +ACTACGTGCTGGTGTCCACCCGCGACCCGCACCGGCACGAGCTACGCAGCTACCGCATCGTCGATGGCGC +TGTCACCGAGGAACCTGTCAATGTCGTCGAGCAGTACTGAACCGTTCCGAGAAAGGCCAGCATGAACGTC +ACCGTATCCATTCCGACCATCCTGCGGCCCCACACCGGCGGCCAGAAGAGTGTCTCGGCCAGCGGCGATA +CCTTGGGTGCCGTCATCAGCGACCTGGAGGCCAACTATTCGGGCATTTCCGAGCGCCTGATGGACCCGTC +TTCCCCAGGTAAGTTGCACCGCTTCGTGAACATCTACGTCAACGACGAGGACGTGCGGTTCTCCGGCGGC +TTGGC +>7bp_pks15.1 +GGTGCTCAAACACCGAGCGACTGGCCAACGAGCACCCCACATCGATCGGATCCAGCCCTGGGTTGGCCTG +CACGTGGGCCATAAGTCGACCCGCCTGCGCCGTCAACGCCTCAGCCGATCTCGCCGAAATCACCCACGGC +ACCATCGACGGCCGCGGCCCGCGGCCCCCGGTGCTTTCGCTCGCCTCAACCGGCGCCTCTGCGGGGGCTG +GTACGGGGGCCTCTTCCAAGATCAGATGCGCGTTGGTGCCGCTGATCCCAAAGGAGGACACCGCCGCCCG +GCGCGGACGCCCGTCAACCGACCACTCCCTGGCCTCGGTCAACACCG diff --git a/workflows/quality_check_wf.nf b/workflows/quality_check_wf.nf index 1ff4629c..572302b1 100644 --- a/workflows/quality_check_wf.nf +++ b/workflows/quality_check_wf.nf @@ -27,6 +27,15 @@ include { FASTQC } from '../modules/fastqc/fastqc.nf' addParams (pa include { NTMPROFILER_PROFILE } from '../modules/ntmprofiler/profile.nf' addParams (params.NTMPROFILER_PROFILE) include { NTMPROFILER_COLLATE } from '../modules/ntmprofiler/collate.nf' addParams (params.NTMPROFILER_COLLATE) +include { TBPROFILER_FASTQ_PROFILE } from '../modules/tbprofiler/fastq_profile.nf' addParams (params.TBPROFILER_FASTQ_PROFILE) +include { TBPROFILER_COLLATE as TBPROFILER_FASTQ_COLLATE } from '../modules/tbprofiler/collate.nf' addParams (params.TBPROFILER_FASTQ_COLLATE) + +include { SPOTYPING } from '../modules/spotyping/main.nf' addParams (params.SPOTYPING) +include { UTILS_CAT_SPOTYPING } from '../modules/utils/cat_spotyping.nf' addParams (params.UTILS_CAT_SPOTYPING) + +//FIXME Enable this once it is working +//include { RDANALYZER } from '../modules/rdanalyzer/main.nf' addParams (params.RDANALYZER) + workflow QUALITY_CHECK_WF { take: @@ -36,12 +45,48 @@ workflow QUALITY_CHECK_WF { FASTQC(reads_ch) - NTMPROFILER_PROFILE( reads_ch ) - NTMPROFILER_COLLATE( params.vcf_name, - NTMPROFILER_PROFILE.out.profile_json.collect() ) + if (!params.skip_ntmprofiler) { + + NTMPROFILER_PROFILE( reads_ch ) + + NTMPROFILER_COLLATE( params.vcf_name, + NTMPROFILER_PROFILE.out.profile_json.collect() ) + + } + + + + if (!params.skip_tbprofiler_fastq) { + + TBPROFILER_FASTQ_PROFILE( reads_ch ) + + fastq_jsons_ch = TBPROFILER_FASTQ_PROFILE.out.json.map{ it[1] }.collect() + + TBPROFILER_FASTQ_COLLATE( params.vcf_name, + fastq_jsons_ch, + [] ) + } + + + + if(!params.skip_spotyping) { + SPOTYPING( reads_ch ) + + UTILS_CAT_SPOTYPING( SPOTYPING.out.txt.collect() ) + } + + /* + //FIXME: As of 20-DEC-2024, RD-Analyzer fails to complete even with bundled fasta file + if(!params.skip_rdanalyzer) { + RDANALYZER( reads_ch, params.ref_fasta_rdanalyzer ) + } + */ + + + //TODO: Publish more outputs from this subworkflow emit: reports_fastqc_ch = FASTQC.out.collect()