Skip to content

Commit

Permalink
Merge pull request #1 from maxibor/dev
Browse files Browse the repository at this point in the history
Reorganizing to use setupTools
  • Loading branch information
maxibor authored Feb 25, 2020
2 parents 826bcd7 + 523ef1b commit d6cdca9
Show file tree
Hide file tree
Showing 23 changed files with 360 additions and 198 deletions.
14 changes: 14 additions & 0 deletions .github/workflows/publish_conda.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
name: publish_conda

on: [release]

jobs:
publish:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- name: publish-to-conda
uses: maxibor/[email protected]
with:
subDir: 'conda'
AnacondaToken: ${{ secrets.ANACONDA_TOKEN }}
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,7 @@ r1.fq
r2.fq
metagenome.*
stats.csv
adrsm.egg-info
__pycache__
.ipynb_checkpoints
dist
5 changes: 4 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,8 @@ install:
- pip install numpy
- pip install scipy
- pip install requests
- pip install click
# command to run tests
script: python adrsm -p 0.5 -m 0.001 -M 0.3 -t 2 ./data/short_genome_list.csv
script:
- python setup.py install
- adrsm -p 0.5 -m 0.001 -M 0.3 -t 2 ./test/data/short_genome_list.csv
159 changes: 0 additions & 159 deletions adrsm

This file was deleted.

1 change: 1 addition & 0 deletions adrsm/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__ = "0.9.4"
156 changes: 156 additions & 0 deletions adrsm/adrsm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
#!/usr/bin/env python

from numpy import random as npr
from .lib import adrsmlib as ad
from . import __version__
import click

@click.command()
@click.version_option(__version__)
@click.argument('confFile', type=click.Path(exists=True,
readable=True,
resolve_path=True))
@click.option('-r',
'--readLength',
default='76',
type=int,
show_default=True,
help='Average read length')
@click.option('-n',
'--nbinom',
default=8,
type=int,
show_default=True,
help='n parameter for Negative Binomial insert length distribution')
@click.option('-fwd',
'--fwdAdapt',
default='AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNATCTCGTATGCCGTCTTCTGCTTG',
type=str,
show_default=True,
help='Forward adaptor sequence')
@click.option('-rev',
'--revAdapt',
default='AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT',
type=str,
show_default=True,
help='Reverse adaptor sequence')
@click.option('-p',
'--geom_p',
default=0.5,
type=click.FloatRange(min=0.0, max=1.0),
show_default=True,
help='Geometric distribution parameter for deamination')
@click.option('-m',
'--minD',
default=0.01,
type=click.FloatRange(min=0.0, max=1.0),
show_default=True,
help='Deamination substitution base frequency')
@click.option('-M',
'--maxD',
default=0.3,
type=click.FloatRange(min=0.0, max=1.0),
show_default=True,
help='Deamination substitution max frequency')
@click.option('-s',
'--seed',
default=42,
type=int,
show_default=True,
help='Seed for random generator generator')
@click.option('-t',
'--threads',
default=2,
type=click.IntRange(min=1, max=1024),
show_default=True,
help='Number of threads for parallel processing')
@click.option('-o',
'--output',
default='./metagenome',
type=click.Path(file_okay=True, writable=True, resolve_path=True),
show_default=True,
help='Fastq output file basename')
@click.option('-s',
'--stats',
default='./stats.csv',
type=click.Path(file_okay=True, writable=True, resolve_path=True),
show_default=True,
help='Summary statistics file')


def cli(no_args_is_help=True, **kwargs):
"""\b
==================================================
ADRSM: Ancient DNA Read Simulator for Metagenomics
Author: Maxime Borry
Contact: <borry[at]shh.mpg.de>
Homepage & Documentation: github.com/maxibor/adrsm
CONFFILE: path to ADRSM configuration file
"""
main(**kwargs)

def read_config(infile):
"""
READS CONFIG FILE AND RETURNS CONFIG DICT
"""
genomes = {}
with open(infile, "r") as f:
next(f)
for line in f:
line = line.rstrip()
splitline = line.split(",")
agenome = splitline[0].replace(" ", "")
ainsert = int(splitline[1].replace(" ", ""))
acov = float(splitline[2].replace(" ", ""))
deambool = str(splitline[3].replace(" ", ""))
deamination = ad.parse_yes_no(deambool)
if len(splitline) > 4 and float(splitline[4].replace(" ", "")) != 0.0:
mutate = True
mutrate = float(splitline[4].replace(" ", ""))
age = float(splitline[5].replace(" ", ""))
else:
mutate = False
mutrate = 0
age = 0

genomes[agenome] = {'size': ainsert,
'cov': acov, 'deam': deamination, 'mutate': mutate, 'mutrate': mutrate, 'age': age}
return(genomes)


def main(conffile, readlength, nbinom, fwdadapt, revadapt, geom_p, mind, maxd, seed, threads, output, stats):
MINLENGTH = 20
npr.seed(seed)
fastq_list = []
stat_dict = {}
all_genomes = read_config(conffile)
for agenome in all_genomes.keys():
stat_and_run = ad.run_read_simulation_multi(INFILE=agenome,
COV=all_genomes[agenome]['cov'],
READLEN=readlength,
INSERLEN=all_genomes[agenome]['size'],
NBINOM=nbinom,
A1=fwdadapt,
A2=revadapt,
MINLENGTH=MINLENGTH,
MUTATE=all_genomes[agenome]['mutate'],
MUTRATE=all_genomes[agenome]['mutrate'],
AGE=all_genomes[agenome]['age'],
DAMAGE=all_genomes[agenome]['deam'],
GEOM_P=geom_p,
THEMIN=mind,
THEMAX=maxd,
PROCESS=threads,
FASTQ_OUT=output)
stat_dict[ad.get_basename(agenome)] = stat_and_run

ad.write_stat(stat_dict=stat_dict, stat_out=stats)
print("\n-- ADRSM v" + __version__ +
" finished generating this mock metagenome --")
print("-- FASTQ files written to " + output +
".1.fastq and " + output + ".2.fastq --")
print("-- Statistic file written to " + stats + " --")

if __name__ == "__main__":
cli()
Binary file added adrsm/data/quality/fwd_qual.p
Binary file not shown.
Binary file added adrsm/data/quality/rev_qual.p
Binary file not shown.
File renamed without changes.
11 changes: 3 additions & 8 deletions lib/adrsmlib.py → adrsm/lib/adrsmlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import multiprocessing
import pickle
from functools import partial
from pkg_resources import resource_filename
from . import sequencefunctions as sf
from . import markov as mk

Expand Down Expand Up @@ -95,10 +96,7 @@ def get_fwd_qual():
ret = pickle.load(open("data/quality/fwd_qual.p", 'rb'))
return(ret)
except FileNotFoundError:
cmd = "which adrsm"
res = subprocess.check_output(cmd, shell=True)
res = res.decode('utf-8').rstrip()
path = "/".join(res.split("/")[:-2])+"/data/quality/fwd_qual.p"
path = resource_filename('adrsm', '/data/quality/fwd_qual.p')
ret = pickle.load(open(path, 'rb'))
return(ret)

Expand All @@ -108,10 +106,7 @@ def get_rev_qual():
ret = pickle.load(open("data/quality/fwd_qual.p", 'rb'))
return(ret)
except FileNotFoundError:
cmd = "which adrsm"
res = subprocess.check_output(cmd, shell=True)
res = res.decode('utf-8').rstrip()
path = "/".join(res.split("/")[:-2])+"/data/quality/rev_qual.p"
path = resource_filename('adrsm', '/data/quality/rev_qual.p')
ret = pickle.load(open(path, 'rb'))
return(ret)

Expand Down
File renamed without changes.
File renamed without changes.
Loading

0 comments on commit d6cdca9

Please sign in to comment.