diff --git a/HISTORY.rst b/HISTORY.rst index f2def88..85c51c4 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -2,6 +2,12 @@ History ======= +0.2.4 (2015-11-25) +------------------ +* Bugfix: ribocount now returns correct read counts if an offset is provided. +* Bugfix: Don't include read counts in the longest ORF start or stop positions + i.e., only include reads upstream or downstream of the start or stop positions. + 0.2.3 (2015-11-24) ------------------ * Bugfix: Recalculate read frame positions after applying offset. diff --git a/riboplot/__init__.py b/riboplot/__init__.py index f8a0e9f..9650fd6 100644 --- a/riboplot/__init__.py +++ b/riboplot/__init__.py @@ -2,4 +2,4 @@ __author__ = 'Vimalkumar Velayudhan' __email__ = 'vimalkumarvelayudhan@gmail.com' -__version__ = '0.2.3' +__version__ = '0.2.4' diff --git a/riboplot/ribocore.py b/riboplot/ribocore.py index 3ef9ef0..ed01c97 100644 --- a/riboplot/ribocore.py +++ b/riboplot/ribocore.py @@ -244,12 +244,12 @@ def filter_ribo_counts(counts, orf_start=None, orf_stop=None): filtered_counts.pop(position) elif orf_start: # check if current position is upstream of ORF start. if not, remove - if position > orf_start: + if position >= orf_start: filtered_counts.pop(position) elif orf_stop: # check if current position is downstream of ORF stop. If not, # remove - if position < orf_stop: + if position <= orf_stop: filtered_counts.pop(position) # calculate total reads for this transcript diff --git a/riboplot/ribocount.py b/riboplot/ribocount.py index 0f18ffb..be2dfbf 100644 --- a/riboplot/ribocount.py +++ b/riboplot/ribocount.py @@ -97,10 +97,12 @@ def main(args): log.info('Get RiboSeq read counts for all transcripts in FASTA') for transcript in f.references: - ribo_counts, ribo_reads = ribocore.get_ribo_counts(b, transcript, read_length) + ribo_counts, ribo_reads = ribocore.get_ribo_counts(ribo_fileobj=b, transcript_name=transcript, + read_length=read_length, read_offset=read_offset) if not ribo_reads: # no reads for this transcript. skip. continue + transcript_sequence = f[transcript] # By default, all counts will be written (ribo_counts) # If 5' or 3' counts requested, filter and use # those counts for printing instead @@ -113,7 +115,7 @@ def main(args): if count_five or count_three: # use default start and stop codons and find ORFs in all 3 # frames (+) - orfs = ribocore.get_three_frame_orfs(sequence=f[transcript]) + orfs = ribocore.get_three_frame_orfs(sequence=transcript_sequence) if not len(orfs): log.debug('No ORFs for transcript {0}'.format(transcript)) continue @@ -136,13 +138,14 @@ def main(args): count += 1 csv_file = 'RiboCounts{}.csv'.format(count) with open(os.path.join(csv_dir, csv_file), 'w') as cw: - cw.write('"Position","Frame 1","Frame 2","Frame 3"\n') - for pos in range(1, len(f[transcript]) + 1): + cw.write('"Position","Nucleotide","Frame 1","Frame 2","Frame 3"\n') + for pos in range(1, len(transcript_sequence) + 1): + nucleotide = transcript_sequence[pos - 1] if pos in write_counts: - cw.write('{0},{1},{2},{3}\n'.format( - pos, write_counts[pos][1], write_counts[pos][2], write_counts[pos][3])) + cw.write('{0},{1},{2},{3},{4}\n'.format( + pos, nucleotide, write_counts[pos][1], write_counts[pos][2], write_counts[pos][3])) else: - cw.write('{0},{1},{2},{3}\n'.format(pos, 0, 0, 0)) + cw.write('{0},{1},{2},{3},{4}\n'.format(pos, nucleotide, 0, 0, 0)) # HTML table table_body += '{0}{1}'.format(transcript, ribo_reads) if count_five: diff --git a/riboplot/riboplot.py b/riboplot/riboplot.py index 5e7599b..a92fa9b 100644 --- a/riboplot/riboplot.py +++ b/riboplot/riboplot.py @@ -174,7 +174,6 @@ def plot_profile(ribo_counts, transcript_name, transcript_length, edgecolor=colors['rna'], label='RNA') ax_rna.set_zorder(1) -# import pudb;pudb.set_trace(); frame_counts = {1: {}, 2: {}, 3: {}} for k, v in ribo_counts.iteritems(): for fr in (1, 2, 3): @@ -191,9 +190,6 @@ def plot_profile(ribo_counts, transcript_name, transcript_length, for frame in (1, 2, 3): color = colors['frames'][frame - 1] -# if read_offset: -# x_vals = [pos + read_offset for pos in frame_counts[frame].keys()] -# else: x_vals = frame_counts[frame].keys() ax2.bar(x_vals, frame_counts[frame].values(), color=color, facecolor=color, edgecolor=color) @@ -383,7 +379,7 @@ def main(args): log.info('Writing RiboSeq read counts for {}'.format(transcript_name)) with open(os.path.join(output_path, 'RiboCounts.csv'), 'w') as f: - f.write('"Position","Sequence","Frame 1","Frame 2","Frame 3"\n') + f.write('"Position","Nucleotide","Frame 1","Frame 2","Frame 3"\n') for pos in range(1, transcript_length + 1): if pos in ribo_counts: diff --git a/setup.py b/setup.py index 467af14..4316cf6 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ setup( name='riboplot', - version='0.2.3', + version='0.2.4', description="Plot read counts of RiboSeq data from BAM format alignment files", long_description=readme + '\n\n' + history, author="Vimalkumar Velayudhan",