diff --git a/.github/workflows/publish_conda.yml b/.github/workflows/publish_conda.yml new file mode 100644 index 0000000..3e05b56 --- /dev/null +++ b/.github/workflows/publish_conda.yml @@ -0,0 +1,14 @@ +name: publish_conda + +on: [release] + +jobs: + publish: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + - name: publish-to-conda + uses: maxibor/conda-package-publish-action@v1.1 + with: + subDir: 'conda' + AnacondaToken: ${{ secrets.ANACONDA_TOKEN }} \ No newline at end of file diff --git a/.gitignore b/.gitignore index a75f825..46db792 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,7 @@ r1.fq r2.fq metagenome.* stats.csv +adrsm.egg-info +__pycache__ +.ipynb_checkpoints +dist \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index 123c5bc..65a3c9d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,5 +6,8 @@ install: - pip install numpy - pip install scipy - pip install requests + - pip install click # command to run tests -script: python adrsm -p 0.5 -m 0.001 -M 0.3 -t 2 ./data/short_genome_list.csv +script: + - python setup.py install + - adrsm -p 0.5 -m 0.001 -M 0.3 -t 2 ./test/data/short_genome_list.csv diff --git a/adrsm b/adrsm deleted file mode 100755 index a817ef1..0000000 --- a/adrsm +++ /dev/null @@ -1,159 +0,0 @@ -#!/usr/bin/env python - -from numpy import random as npr -import lib.adrsmlib as ad -import argparse - - -def _get_args(): - '''This function parses and return arguments passed in''' - parser = argparse.ArgumentParser( - prog='ADRSM v' + str(version), - formatter_class=argparse.RawDescriptionHelpFormatter, - description=''' -==================================================\n -ADRSM: Ancient DNA Read Simulator for Metagenomics\n -Author: Maxime Borry\n -Contact: \n -Homepage & Documentation: github.com/maxibor/adrsm -================================================== - ''') - parser.add_argument('confFile', help="path to configuration file") - parser.add_argument( - '-r', - dest='readLength', - default=76, - help="Average read length. Default = 76") - parser.add_argument( - '-n', - dest="nbinom", - default=8, - help="n parameter for Negative Binomial insert length distribution. Default = 8") - parser.add_argument( - '-fwd', - dest="fwdAdapt", - default="AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNATCTCGTATGCCGTCTTCTGCTTG", - help="Forward adaptor. Default = AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNATCTCGTATGCCGTCTTCTGCTTG") - parser.add_argument( - '-rev', - dest="revAdapt", - default="AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT", - help="Reverse adaptor. Default = AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT") - parser.add_argument( - '-p', - dest="geom_p", - default=0.5, - help="Geometric distribution parameter for deamination. Default = 0.5") - parser.add_argument( - '-m', - dest="min", - default=0.001, - help="Deamination substitution base frequency. Default = 0.001") - parser.add_argument( - '-M', - dest="max", - default=0.3, - help="Deamination substitution max frequency. Default = 0.3") - parser.add_argument( - '-o', - dest="output", - default="metagenome", - help="Output file basename. Default = ./metagenome.*") - parser.add_argument( - '-s', - dest="stats", - default="stats.csv", - help="Statistic file. Default = stats.csv") - parser.add_argument( - '-se', - dest="seed", - default=7357, - help="Seed for random generator. Default = 7357") - parser.add_argument( - '-t', - dest="threads", - default=2, - help="Number of threads for parallel processing. Default = 2") - - args = parser.parse_args() - - infile = args.confFile - readlen = int(args.readLength) - nbinom = int(args.nbinom) - a1 = args.fwdAdapt - a2 = args.revAdapt - geom_p = float(args.geom_p) - themin = float(args.min) - themax = float(args.max) - outfile = args.output - stats = args.stats - seed = int(args.seed) - threads = int(args.threads) - - return(infile, readlen, nbinom, a1, a2, geom_p, themin, themax, outfile, stats, seed, threads) - - -def read_config(infile): - """ - READS CONFIG FILE AND RETURNS CONFIG DICT - """ - genomes = {} - with open(infile, "r") as f: - next(f) - for line in f: - line = line.rstrip() - splitline = line.split(",") - agenome = splitline[0].replace(" ", "") - ainsert = int(splitline[1].replace(" ", "")) - acov = float(splitline[2].replace(" ", "")) - deambool = str(splitline[3].replace(" ", "")) - deamination = ad.parse_yes_no(deambool) - if len(splitline) > 4 and float(splitline[4].replace(" ", "")) != 0.0: - mutate = True - mutrate = float(splitline[4].replace(" ", "")) - age = float(splitline[5].replace(" ", "")) - else: - mutate = False - mutrate = 0 - age = 0 - - genomes[agenome] = {'size': ainsert, - 'cov': acov, 'deam': deamination, 'mutate': mutate, 'mutrate': mutrate, 'age': age} - return(genomes) - - -if __name__ == "__main__": - version = "0.9.3" - INFILE, READLEN, NBINOM, A1, A2, GEOM_P, THEMIN, THEMAX, OUTFILE, STATS, SEED, PROCESS = _get_args() - - MINLENGTH = 20 - npr.seed(SEED) - fastq_list = [] - stat_dict = {} - all_genomes = read_config(INFILE) - for agenome in all_genomes.keys(): - stat_and_run = ad.run_read_simulation_multi(INFILE=agenome, - COV=all_genomes[agenome]['cov'], - READLEN=READLEN, - INSERLEN=all_genomes[agenome]['size'], - NBINOM=NBINOM, - A1=A1, - A2=A2, - MINLENGTH=MINLENGTH, - MUTATE=all_genomes[agenome]['mutate'], - MUTRATE=all_genomes[agenome]['mutrate'], - AGE=all_genomes[agenome]['age'], - DAMAGE=all_genomes[agenome]['deam'], - GEOM_P=GEOM_P, - THEMIN=THEMIN, - THEMAX=THEMAX, - PROCESS=PROCESS, - FASTQ_OUT=OUTFILE) - stat_dict[ad.get_basename(agenome)] = stat_and_run - - ad.write_stat(stat_dict=stat_dict, stat_out=STATS) - print("\n-- ADRSM v" + str(version) + - " finished generating this mock metagenome --") - print("-- FASTQ files written to " + OUTFILE + - ".1.fastq and " + OUTFILE + ".2.fastq --") - print("-- Statistic file written to " + STATS + " --") diff --git a/adrsm/__init__.py b/adrsm/__init__.py new file mode 100644 index 0000000..acec196 --- /dev/null +++ b/adrsm/__init__.py @@ -0,0 +1 @@ +__version__ = "0.9.4" \ No newline at end of file diff --git a/adrsm/adrsm.py b/adrsm/adrsm.py new file mode 100755 index 0000000..fd05734 --- /dev/null +++ b/adrsm/adrsm.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python + +from numpy import random as npr +from .lib import adrsmlib as ad +from . import __version__ +import click + +@click.command() +@click.version_option(__version__) +@click.argument('confFile', type=click.Path(exists=True, + readable=True, + resolve_path=True)) +@click.option('-r', + '--readLength', + default='76', + type=int, + show_default=True, + help='Average read length') +@click.option('-n', + '--nbinom', + default=8, + type=int, + show_default=True, + help='n parameter for Negative Binomial insert length distribution') +@click.option('-fwd', + '--fwdAdapt', + default='AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNATCTCGTATGCCGTCTTCTGCTTG', + type=str, + show_default=True, + help='Forward adaptor sequence') +@click.option('-rev', + '--revAdapt', + default='AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT', + type=str, + show_default=True, + help='Reverse adaptor sequence') +@click.option('-p', + '--geom_p', + default=0.5, + type=click.FloatRange(min=0.0, max=1.0), + show_default=True, + help='Geometric distribution parameter for deamination') +@click.option('-m', + '--minD', + default=0.01, + type=click.FloatRange(min=0.0, max=1.0), + show_default=True, + help='Deamination substitution base frequency') +@click.option('-M', + '--maxD', + default=0.3, + type=click.FloatRange(min=0.0, max=1.0), + show_default=True, + help='Deamination substitution max frequency') +@click.option('-s', + '--seed', + default=42, + type=int, + show_default=True, + help='Seed for random generator generator') +@click.option('-t', + '--threads', + default=2, + type=click.IntRange(min=1, max=1024), + show_default=True, + help='Number of threads for parallel processing') +@click.option('-o', + '--output', + default='./metagenome', + type=click.Path(file_okay=True, writable=True, resolve_path=True), + show_default=True, + help='Fastq output file basename') +@click.option('-s', + '--stats', + default='./stats.csv', + type=click.Path(file_okay=True, writable=True, resolve_path=True), + show_default=True, + help='Summary statistics file') + + +def cli(no_args_is_help=True, **kwargs): + """\b + ================================================== + ADRSM: Ancient DNA Read Simulator for Metagenomics + Author: Maxime Borry + Contact: + Homepage & Documentation: github.com/maxibor/adrsm + + CONFFILE: path to ADRSM configuration file + """ + main(**kwargs) + +def read_config(infile): + """ + READS CONFIG FILE AND RETURNS CONFIG DICT + """ + genomes = {} + with open(infile, "r") as f: + next(f) + for line in f: + line = line.rstrip() + splitline = line.split(",") + agenome = splitline[0].replace(" ", "") + ainsert = int(splitline[1].replace(" ", "")) + acov = float(splitline[2].replace(" ", "")) + deambool = str(splitline[3].replace(" ", "")) + deamination = ad.parse_yes_no(deambool) + if len(splitline) > 4 and float(splitline[4].replace(" ", "")) != 0.0: + mutate = True + mutrate = float(splitline[4].replace(" ", "")) + age = float(splitline[5].replace(" ", "")) + else: + mutate = False + mutrate = 0 + age = 0 + + genomes[agenome] = {'size': ainsert, + 'cov': acov, 'deam': deamination, 'mutate': mutate, 'mutrate': mutrate, 'age': age} + return(genomes) + + +def main(conffile, readlength, nbinom, fwdadapt, revadapt, geom_p, mind, maxd, seed, threads, output, stats): + MINLENGTH = 20 + npr.seed(seed) + fastq_list = [] + stat_dict = {} + all_genomes = read_config(conffile) + for agenome in all_genomes.keys(): + stat_and_run = ad.run_read_simulation_multi(INFILE=agenome, + COV=all_genomes[agenome]['cov'], + READLEN=readlength, + INSERLEN=all_genomes[agenome]['size'], + NBINOM=nbinom, + A1=fwdadapt, + A2=revadapt, + MINLENGTH=MINLENGTH, + MUTATE=all_genomes[agenome]['mutate'], + MUTRATE=all_genomes[agenome]['mutrate'], + AGE=all_genomes[agenome]['age'], + DAMAGE=all_genomes[agenome]['deam'], + GEOM_P=geom_p, + THEMIN=mind, + THEMAX=maxd, + PROCESS=threads, + FASTQ_OUT=output) + stat_dict[ad.get_basename(agenome)] = stat_and_run + + ad.write_stat(stat_dict=stat_dict, stat_out=stats) + print("\n-- ADRSM v" + __version__ + + " finished generating this mock metagenome --") + print("-- FASTQ files written to " + output + + ".1.fastq and " + output + ".2.fastq --") + print("-- Statistic file written to " + stats + " --") + +if __name__ == "__main__": + cli() \ No newline at end of file diff --git a/adrsm/data/quality/fwd_qual.p b/adrsm/data/quality/fwd_qual.p new file mode 100644 index 0000000..00783b3 Binary files /dev/null and b/adrsm/data/quality/fwd_qual.p differ diff --git a/adrsm/data/quality/rev_qual.p b/adrsm/data/quality/rev_qual.p new file mode 100644 index 0000000..e7b760b Binary files /dev/null and b/adrsm/data/quality/rev_qual.p differ diff --git a/lib/__init__.py b/adrsm/lib/__init__.py similarity index 100% rename from lib/__init__.py rename to adrsm/lib/__init__.py diff --git a/lib/adrsmlib.py b/adrsm/lib/adrsmlib.py similarity index 94% rename from lib/adrsmlib.py rename to adrsm/lib/adrsmlib.py index b2a40fe..ce948d2 100644 --- a/lib/adrsmlib.py +++ b/adrsm/lib/adrsmlib.py @@ -8,6 +8,7 @@ import multiprocessing import pickle from functools import partial +from pkg_resources import resource_filename from . import sequencefunctions as sf from . import markov as mk @@ -95,10 +96,7 @@ def get_fwd_qual(): ret = pickle.load(open("data/quality/fwd_qual.p", 'rb')) return(ret) except FileNotFoundError: - cmd = "which adrsm" - res = subprocess.check_output(cmd, shell=True) - res = res.decode('utf-8').rstrip() - path = "/".join(res.split("/")[:-2])+"/data/quality/fwd_qual.p" + path = resource_filename('adrsm', '/data/quality/fwd_qual.p') ret = pickle.load(open(path, 'rb')) return(ret) @@ -108,10 +106,7 @@ def get_rev_qual(): ret = pickle.load(open("data/quality/fwd_qual.p", 'rb')) return(ret) except FileNotFoundError: - cmd = "which adrsm" - res = subprocess.check_output(cmd, shell=True) - res = res.decode('utf-8').rstrip() - path = "/".join(res.split("/")[:-2])+"/data/quality/rev_qual.p" + path = resource_filename('adrsm', '/data/quality/rev_qual.p') ret = pickle.load(open(path, 'rb')) return(ret) diff --git a/lib/markov.py b/adrsm/lib/markov.py similarity index 100% rename from lib/markov.py rename to adrsm/lib/markov.py diff --git a/lib/quality.py b/adrsm/lib/quality.py similarity index 100% rename from lib/quality.py rename to adrsm/lib/quality.py diff --git a/lib/sequencefunctions.py b/adrsm/lib/sequencefunctions.py similarity index 76% rename from lib/sequencefunctions.py rename to adrsm/lib/sequencefunctions.py index 3a27e7d..824b99c 100644 --- a/lib/sequencefunctions.py +++ b/adrsm/lib/sequencefunctions.py @@ -58,8 +58,10 @@ def mutate_fwd(self, mutrate, alpha=0.4, beta=0.2): a = int(10 * alpha) b = int(10 * beta) newseq = "" - dmut = {'A': b * ['C'] + b * ['T'] + a * ['G'], 'C': b * ['A'] + b * ['G'] + a * [ - 'T'], 'G': b * ['C'] + b * ['T'] + a * ['A'], 'T': b * ['A'] + b * ['G'] + a * ['C']} + dmut = {'A': b * ['C'] + b * ['T'] + a * ['G'], + 'C': b * ['A'] + b * ['G'] + a * ['T'], + 'G': b * ['C'] + b * ['T'] + a * ['A'], + 'T': b * ['A'] + b * ['G'] + a * ['C']} for nuc in self.seq: if npr.random() <= mutrate: new_nucl = random.choice(dmut[nuc]) @@ -77,8 +79,10 @@ def mutate_rev(self, mutrate, alpha=0.4, beta=0.2): a = int(10 * alpha) b = int(10 * beta) newseq = "" - dmut = {'A': b * ['C'] + b * ['T'] + a * ['G'], 'C': b * ['A'] + b * ['G'] + a * [ - 'T'], 'G': b * ['C'] + b * ['T'] + a * ['A'], 'T': b * ['A'] + b * ['G'] + a * ['C']} + dmut = {'A': b * ['C'] + b * ['T'] + a * ['G'], + 'C': b * ['A'] + b * ['G'] + a * ['T'], + 'G': b * ['C'] + b * ['T'] + a * ['A'], + 'T': b * ['A'] + b * ['G'] + a * ['C']} for nuc in self.revcom: if npr.random() <= mutrate: new_nucl = random.choice(dmut[nuc]) @@ -92,19 +96,36 @@ def add_damage_fwd(self, geom_p, scale_min, scale_max): insert = list(self.seq) insertlen = len(self.seq) x = np.arange(1, insertlen + 1) - geom_dist = scale(geom.pmf(x, geom_p), scale_min, scale_max) + geom_dist = geom.pmf(x, geom_p) + # geom_dist = scale(geom.pmf(x, geom_p), scale_min, scale_max) for j in range(0, insertlen): pos = j opp_pos = insertlen - 1 - j + rnd = npr.rand() - # C -> T deamination - if insert[pos] == "C" and geom_dist[j] >= npr.rand(): - insert[pos] = "T" + # C -> T deamination - deamination + if rnd <= scale_max: + if insert[pos] == "C" and geom_dist[j] >= rnd: + insert[pos] = "T" + + # C -> T deamination - baseline + if rnd <= scale_min: + if insert[pos] == "C": + insert[pos] = "T" + + rnd = npr.rand() # G -> A deamination - if insert[opp_pos] == "G" and geom_dist[j] >= npr.rand(): - insert[opp_pos] = "A" + if rnd <= scale_max: + if insert[opp_pos] == "G" and geom_dist[j] >= rnd: + insert[opp_pos] = "A" + + # G -> A deamination - baseline + if rnd <= scale_min: + if insert[pos] == "G": + insert[pos] = "A" + self.seq = "".join(insert) return(self.seq) @@ -113,19 +134,33 @@ def add_damage_rev(self, geom_p, scale_min, scale_max): insert = list(self.revcom) insertlen = len(self.revcom) x = np.arange(1, insertlen + 1) - geom_dist = scale(geom.pmf(x, geom_p), scale_min, scale_max) + geom_dist = geom.pmf(x, geom_p) + # geom_dist = scale(geom.pmf(x, geom_p), scale_min, scale_max) for j in range(0, insertlen): pos = j opp_pos = insertlen - 1 - j - # C -> T deamination - if insert[pos] == "C" and geom_dist[j] >= npr.rand(): - insert[pos] = "T" + # C -> T deamination - deamination + if npr.rand() <= scale_max: + if insert[pos] == "C" and geom_dist[j] >= npr.rand(): + insert[pos] = "T" + + # C -> T deamination - baseline + if npr.rand() <= scale_min: + if insert[pos] == "C": + insert[pos] = "T" # G -> A deamination - if insert[opp_pos] == "G" and geom_dist[j] >= npr.rand(): - insert[opp_pos] = "A" + if npr.rand() <= scale_max: + if insert[opp_pos] == "G" and geom_dist[j] >= npr.rand(): + insert[opp_pos] = "A" + + # C -> T deamination - baseline + if npr.rand() <= scale_min: + if insert[pos] == "G": + insert[pos] = "A" + self.revcom = "".join(insert) return(self.revcom) diff --git a/conda/build.sh b/conda/build.sh index 02a215d..1d4ad31 100644 --- a/conda/build.sh +++ b/conda/build.sh @@ -1,8 +1,5 @@ #!/bin/bash -cp adrsm $PREFIX/bin -cp -r lib $PREFIX/bin -mkdir -p $PREFIX/data/quality -cp data/quality/*.p $PREFIX/data/quality +$PYTHON setup.py install --single-version-externally-managed --record=record.txt diff --git a/conda/meta.yaml b/conda/meta.yaml index e69c072..e7017cd 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -1,21 +1,26 @@ +{% set version = "v0.9.4" %} + package: name: adrsm - version: "0.9.3" + version: {{ version }} source: - path: ../ + git_rev: {{ version }} + git_url: https://github.com/maxibor/adrsm.git requirements: - run: + build: - python 3.6.* - - numpy - - scipy - - requests + - setuptools + run: + - python >=3.6.* + - numpy >=1.18.1 + - scipy >=1.3.1 + - requests >=2.22 + - click test: - source_files: - - data commands: - adrsm --help diff --git a/conda/meta_local.yaml b/conda/meta_local.yaml new file mode 100644 index 0000000..267ff8f --- /dev/null +++ b/conda/meta_local.yaml @@ -0,0 +1,29 @@ +{% set version = "v0.9.4" %} + +package: + name: adrsm + version: {{ version }} + +source: + path: ../ + +requirements: + build: + - python 3.6.* + - setuptools + run: + - python >=3.6.* + - numpy >=1.18.1 + - scipy >=1.3.1 + - requests >=2.22 + - click + + +test: + commands: + - adrsm --help + +about: + home: https://github.com/maxibor/adrsm + license: MIT + summary: 'Ancient DNA Read Simulator for Metagenomics' diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..4dc00f2 --- /dev/null +++ b/setup.py @@ -0,0 +1,26 @@ +from adrsm import __version__ +from setuptools import setup, find_packages + +setup( + name='adrsm', + version=__version__, + description='Ancient DNA Read Simulator for Metagenomic ', + long_description=open("README.md").read(), + url='https://github.com/maxibor/adrsm', + long_description_content_type="text/markdown", + license='MIT', + python_requires=">=3.6", + install_requires=[ + 'numpy >=1.18.1', + 'scipy >= 1.3.1', + 'requests >= 2.22', + 'click' + ], + packages=find_packages(include=['adrsm', 'adrsm.lib']), + package_data= {'adrsm': ['data/quality/fwd_qual.p','data/quality/rev_qual.p'] }, + entry_points={ + 'console_scripts': [ + 'adrsm= adrsm.adrsm:cli' + ] + } +) \ No newline at end of file diff --git a/test/Makefile b/test/Makefile new file mode 100644 index 0000000..7f941a9 --- /dev/null +++ b/test/Makefile @@ -0,0 +1,23 @@ +all: multiqc_report.html + +bt_b_anthracis*: ../data/genomes/Bacillus_anthracis.fa + bowtie2-build ../data/genomes/Bacillus_anthracis.fa bt_b_anthracis + +metagenome.1.fastq metagenome.2.fastq: ../data/b_anthracis.csv + python ../adrsm -p 0.8 -M 0.04 -m 0.01 ../data/short_genome_list.csv + +metagenome_collapse.fq *_fastqc.html metagenome.settings: metagenome.1.fastq metagenome.2.fastq + fastqc metagenome.*.fastq + AdapterRemoval --basename metagenome --file1 metagenome.1.fastq --collapse --file2 metagenome.2.fastq --outputcollapsed metagenome_collapse.fq + +aligned.bam: metagenome_collapse.fq bt_b_anthracis* + bowtie2 -x bt_b_anthracis -U metagenome_collapse.fq | samtools view -bS -F 4 - | samtools sort - > aligned.bam + +tmp/aligned/DamagePlot.pdf: aligned.bam data/genomes/Bacillus_anthracis.fa + damageprofiler -i aligned.bam -r data/genomes/Bacillus_anthracis.fa -o tmp + +multiqc_report.html: metagenome.settings tmp/aligned/DamagePlot.pdf + multiqc . + +clean: + rm -rf metagenome* aligned.bam tmp stats.csv bt_b_anthracis* multiqc* diff --git a/data/short_genome_list.csv b/test/data/b_anthracis.csv similarity index 50% rename from data/short_genome_list.csv rename to test/data/b_anthracis.csv index 175c152..f6bb026 100644 --- a/data/short_genome_list.csv +++ b/test/data/b_anthracis.csv @@ -1,3 +1,2 @@ genome(mandatory), insert_size(mandatory), coverage(mandatory), deamination(mandatory), mutation_rate(optional), age(optional) -./data/genomes/Agrobacterium_tumefaciens.fa, 47 , 0.1, yes, 10e-7, 10000 -./data/genomes/Bacillus_anthracis.fa, 48, 0.2, no +./data/genomes/Bacillus_anthracis.fa, 60, 0.2, yes diff --git a/data/genomes/Agrobacterium_tumefaciens.fa b/test/data/genomes/Agrobacterium_tumefaciens.fa similarity index 100% rename from data/genomes/Agrobacterium_tumefaciens.fa rename to test/data/genomes/Agrobacterium_tumefaciens.fa diff --git a/data/genomes/Bacillus_anthracis.fa b/test/data/genomes/Bacillus_anthracis.fa similarity index 100% rename from data/genomes/Bacillus_anthracis.fa rename to test/data/genomes/Bacillus_anthracis.fa diff --git a/test/data/genomes/Bacillus_anthracis.fa.fai b/test/data/genomes/Bacillus_anthracis.fa.fai new file mode 100644 index 0000000..1e34692 --- /dev/null +++ b/test/data/genomes/Bacillus_anthracis.fa.fai @@ -0,0 +1,31 @@ +NZ_JHCB02000001.1 14845 102 80 81 +NZ_JHCB02000002.1 48399 15235 80 81 +NZ_JHCB02000003.1 58375 64341 80 81 +NZ_JHCB02000004.1 95357 123548 80 81 +NZ_JHCB02000005.1 15842 220199 80 81 +NZ_JHCB02000006.1 8614 236342 80 81 +NZ_JHCB02000007.1 5273 245166 80 81 +NZ_JHCB02000008.1 160246 250606 80 81 +NZ_JHCB02000009.1 77364 412958 80 81 +NZ_JHCB02000010.1 204227 491391 80 81 +NZ_JHCB02000011.1 436056 698272 80 81 +NZ_JHCB02000012.1 482263 1139880 80 81 +NZ_JHCB02000013.1 16470 1628274 80 81 +NZ_JHCB02000014.1 1114119 1645051 80 81 +NZ_JHCB02000015.1 46007 2773199 80 81 +NZ_JHCB02000016.1 25139 2819884 80 81 +NZ_JHCB02000017.1 35708 2845440 80 81 +NZ_JHCB02000018.1 898379 2881696 80 81 +NZ_JHCB02000019.1 82218 3791407 80 81 +NZ_JHCB02000020.1 345264 3874754 80 81 +NZ_JHCB02000021.1 440320 4224435 80 81 +NZ_JHCB02000022.1 263136 4670360 80 81 +NZ_JHCB02000023.1 49679 4936888 80 81 +NZ_JHCB02000024.1 239264 4987289 80 81 +NZ_JHCB02000025.1 94786 5229646 80 81 +NZ_JHCB02000026.1 66512 5325719 80 81 +NZ_JHCB02000027.1 61192 5393165 80 81 +NZ_JHCB02000028.1 45102 5455224 80 81 +NZ_JHCB02000029.1 5864 5500992 80 81 +NZ_JHCB02000030.1 1260 5507032 80 81 +NZ_JHCB02000031.1 3428 5508410 80 81 diff --git a/test/data/short_genome_list.csv b/test/data/short_genome_list.csv new file mode 100644 index 0000000..85cc72b --- /dev/null +++ b/test/data/short_genome_list.csv @@ -0,0 +1,3 @@ +genome(mandatory), insert_size(mandatory), coverage(mandatory), deamination(mandatory), mutation_rate(optional), age(optional) +./test/data/genomes/Agrobacterium_tumefaciens.fa, 47 , 0.1, yes, 10e-7, 10000 +./test/data/genomes/Bacillus_anthracis.fa, 48, 0.2, no