From 9f1c9259c14341bcdb81764677301372592681fe Mon Sep 17 00:00:00 2001 From: ksahlin Date: Thu, 12 Nov 2015 17:46:08 -0500 Subject: [PATCH] removed some prints to stdout and added documentation --- BESST/libmetrics.py | 8 ++++---- README.md | 8 ++++++++ docs/MANUAL.md | 8 ++++++++ 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/BESST/libmetrics.py b/BESST/libmetrics.py index 0c3b194..1051c4a 100644 --- a/BESST/libmetrics.py +++ b/BESST/libmetrics.py @@ -130,7 +130,7 @@ def sum_chunks(l, n): for i in xrange(0, len(l), n): yield sum(l[i:i+n]) -def getdistr(ins_size_reads, cont_lengths_list, param): +def getdistr(ins_size_reads, cont_lengths_list, param, Information): largest_contigs = map(lambda x: int(x),sorted(nlargest(1000, cont_lengths_list))) #print largest_contigs #sorted_lengths = sorted(cont_lengths_list) @@ -199,7 +199,7 @@ def getdistr(ins_size_reads, cont_lengths_list, param): adj_distr_chunked = list(sum_chunks(adjusted_distribution, chunk_size)) mode_adj = (argmax(adj_distr_chunked) + 0.5)*chunk_size mode_for_different_windows.append(int(mode_adj)) - print "mode for chunk size ", chunk_size, " : ", mode_adj + print >> Information, "mode for chunk size ", chunk_size, " : ", mode_adj mode_adj = sorted(mode_for_different_windows)[int(len(mode_for_different_windows)/2)] print "Choosing mode:", mode_adj @@ -211,7 +211,7 @@ def getdistr(ins_size_reads, cont_lengths_list, param): # m_3 = sum(map(lambda x: (x - mean_isize) ** 3, ins_size_reads))/n skew_adj = m_3 / sigma_adj**3 - print mu_adj, sigma_adj, skew_adj + print 'mu_adjusted:{0}, sigma_adjusted:{1}, skewness_adjusted:{2}'.format(mu_adj, sigma_adj, skew_adj) return adjusted_distribution, mu_adj, sigma_adj, skew_adj, median_adj, mode_adj #with pysam.Samfile(param.bamfile, 'rb') as bam_file: @@ -334,7 +334,7 @@ def get_metrics(bam_file, param, Information): print >> Information, 'Skewness of distribution: ', param.skewness # weight each observation with how likely it is to see it - adj_distr, mu_adj, sigma_adj, skew_adj, median_adj, mode_adj = getdistr(ins_size_reads, cont_lengths_list, param) + adj_distr, mu_adj, sigma_adj, skew_adj, median_adj, mode_adj = getdistr(ins_size_reads, cont_lengths_list, param, Information) param.skew_adj = skew_adj print >> Information, 'Mean of getdistr adjusted distribution: ', mu_adj print >> Information, 'Sigma of getdistr adjusted distribution: ', sigma_adj diff --git a/README.md b/README.md index a50f948..b8b5420 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,14 @@ See docs/INSTALL.md. Q&A ------------------ +#### What parameters should I call BESST with? +BESST is designed to infer as much as possible from data. If this is your first time running BESST, it is highly reccomended to only run BESST as + +```sh +runBESST -c /path/to/contigfile.fa -f /path/to/file1.bam /path/to/file2.bam ... -o /path/to/output --orientation {fr/rf} +``` +For more details, see section "INPUT" further down. BESST will then infer ar much as possible from data and print everything to "/path/to/output/BESST_output/Statistics.txt". This file is useful for debugging. + #### What aligner should I use? BESST requires only a sorted and indexed BAM file -- your favourite aligner can be used. However, we have had the best experience with BWA-mem using default parameters on most data used in our evaluations. diff --git a/docs/MANUAL.md b/docs/MANUAL.md index 88ffa49..46c7e06 100644 --- a/docs/MANUAL.md +++ b/docs/MANUAL.md @@ -8,6 +8,14 @@ See docs/INSTALL.md. Q&A ------------------ +#### What parameters should I call BESST with? +BESST is designed to infer as much as possible from data. If this is your first time running BESST, it is highly reccomended to only run BESST as + +```sh +runBESST -c /path/to/contigfile.fa -f /path/to/file1.bam /path/to/file2.bam ... -o /path/to/output --orientation {fr/rf} +``` +For more details, see section "INPUT" further down. BESST will then infer ar much as possible from data and print everything to "/path/to/output/BESST_output/Statistics.txt". This file is useful for debugging. + #### What aligner should I use? BESST requires only a sorted and indexed bamfile -- your favourite aligner can be used. However, we have had the best experience with BWA-mem using default parameters on most data used in our evaluations.