Skip to content

Commit

Permalink
significantly spead up count total reads function
Browse files Browse the repository at this point in the history
  • Loading branch information
gpratt committed Oct 3, 2014
1 parent 9dac56e commit 0f50978
Showing 1 changed file with 12 additions and 16 deletions.
28 changes: 12 additions & 16 deletions clipper/src/CLIP_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -772,20 +772,19 @@ def make_dir(dir_name):
os.mkdir(dir_name)


def count_total_reads(bedtool, counts):

"""
Counts total reads in genes
bam: bedtool (reading in a bam file) all the reads in the original clip-seq experiment
counts: HTSeq Coverage file to count reads in, defines locations of all genes to analize
"""
def count_total_reads(genomic_regions, counts):
gas = HTSeq.GenomicArrayOfSets( "auto", stranded=True)

result = get_densities(bedtool, counts)
return sum([sum(item) for item in result])
for region_name, cur_region in genomic_regions.items():
for interval in bed_to_genomic_interval(cur_region):
gas[interval] += region_name

total_counts = defaultdict(int)
for overlapping_interval, count in counts.steps():
for step in gas[overlapping_interval].steps():
for item in step[1]:
total_counts[item] += count
return total_counts

def count_reads_per_cluster(bedtool, counts):

Expand Down Expand Up @@ -1137,10 +1136,7 @@ def main(bedtool, bam, species, runPhast=False, motifs=[], k=[6], nrand=3,
reads_in_clusters, reads_per_cluster = count_reads_per_cluster(cluster_regions['all']['real'], counts)

print "counting total reads in regions"
region_read_counts = {}
for region_name, cur_region in genomic_regions.items():
region_read_counts[region_name] = count_total_reads(cur_region, counts)

region_read_counts = count_total_reads(genomic_regions, counts)
total_reads = region_read_counts['genes']

print "clustering peaks"
Expand Down

0 comments on commit 0f50978

Please sign in to comment.