Skip to content

Commit

Permalink
added quick hack to find the exon (or region number) of bound RBP
Browse files Browse the repository at this point in the history
  • Loading branch information
gpratt committed Oct 2, 2014
1 parent 8cec45e commit 4e2d117
Showing 1 changed file with 9 additions and 6 deletions.
15 changes: 9 additions & 6 deletions clipper/src/CLIP_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,22 +429,26 @@ def get_distributions(bedtool, region_dict):
total_distributions = []
num_errors = []
num_missed = []
bound_in_regions = []
genes = []
for interval in bedtool:
try:
#will need to redefine this to use intervals
exon, total = RNA_position_interval(interval, region_dict)
exon, total, bound_in_region, gene = RNA_position_interval(interval, region_dict)

if total is not None:
total_distributions.append(total)
exon_distributions.append(exon)
bound_in_regions.append(bound_in_region)
genes.append(gene)
else:
num_missed.append(interval)
except Exception as e:
print e
num_errors.append(interval)

return {'individual': exon_distributions, 'total': total_distributions,
'errors': num_errors, 'missed': num_missed}
return {'individual': exon_distributions, 'total': total_distributions, "gene_ids": genes,
"region_numbers": bound_in_regions, 'errors': num_errors, 'missed': num_missed}


def RNA_position_interval(interval, location_dict):
Expand Down Expand Up @@ -484,7 +488,7 @@ def RNA_position_interval(interval, location_dict):
total_length = float(sum(region.length for region in location_dict[gene]))

running_length = 0
for region in location_dict[gene]:
for region_number, region in enumerate(location_dict[gene]):
length = float(region.length)

if int(region.start) <= peak_center <= int(region.stop):
Expand All @@ -498,7 +502,6 @@ def RNA_position_interval(interval, location_dict):
total_location = running_length + (region.stop - peak_center)
total_fraction = total_location / total_length
individual_fraction = (region.stop - peak_center) / length

else:
raise ValueError("Strand not correct strand is %s" % interval.strand)

Expand All @@ -508,7 +511,7 @@ def RNA_position_interval(interval, location_dict):
gene,
total_length,
total_location))
return individual_fraction, total_fraction
return individual_fraction, total_fraction, region_number + 1, gene

running_length += length

Expand Down

0 comments on commit 4e2d117

Please sign in to comment.