Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/YeoLab/clipper
Browse files Browse the repository at this point in the history
  • Loading branch information
Gabriel Pratt committed Oct 2, 2014
2 parents 1eba0ed + 4e2d117 commit 9dac56e
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 21 deletions.
15 changes: 9 additions & 6 deletions clipper/src/CLIP_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,22 +429,26 @@ def get_distributions(bedtool, region_dict):
total_distributions = []
num_errors = []
num_missed = []
bound_in_regions = []
genes = []
for interval in bedtool:
try:
#will need to redefine this to use intervals
exon, total = RNA_position_interval(interval, region_dict)
exon, total, bound_in_region, gene = RNA_position_interval(interval, region_dict)

if total is not None:
total_distributions.append(total)
exon_distributions.append(exon)
bound_in_regions.append(bound_in_region)
genes.append(gene)
else:
num_missed.append(interval)
except Exception as e:
print e
num_errors.append(interval)

return {'individual': exon_distributions, 'total': total_distributions,
'errors': num_errors, 'missed': num_missed}
return {'individual': exon_distributions, 'total': total_distributions, "gene_ids": genes,
"region_numbers": bound_in_regions, 'errors': num_errors, 'missed': num_missed}


def RNA_position_interval(interval, location_dict):
Expand Down Expand Up @@ -484,7 +488,7 @@ def RNA_position_interval(interval, location_dict):
total_length = float(sum(region.length for region in location_dict[gene]))

running_length = 0
for region in location_dict[gene]:
for region_number, region in enumerate(location_dict[gene]):
length = float(region.length)

if int(region.start) <= peak_center <= int(region.stop):
Expand All @@ -498,7 +502,6 @@ def RNA_position_interval(interval, location_dict):
total_location = running_length + (region.stop - peak_center)
total_fraction = total_location / total_length
individual_fraction = (region.stop - peak_center) / length

else:
raise ValueError("Strand not correct strand is %s" % interval.strand)

Expand All @@ -508,7 +511,7 @@ def RNA_position_interval(interval, location_dict):
gene,
total_length,
total_location))
return individual_fraction, total_fraction
return individual_fraction, total_fraction, region_number + 1, gene

running_length += length

Expand Down
24 changes: 9 additions & 15 deletions clipper/src/get_genomic_regions.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,33 +356,27 @@ def get_feature_locations(self, limit_genes=False, flush_cashe=False):

except ValueError:
pass

five_prime_ends = []
three_prime_ends = []
poly_a_sites = []
stop_codons = []
start_codons = []
transcription_start_sites = []

for i, gene_id in enumerate(self._db.features_of_type('gene')):
if i % 2000 == 0:
print "processed %d genes" % (i)
if i == 2000 and limit_genes:
break

gene = { "five_prime_ends" : [],
"three_prime_ends" : [],
"poly_a_sites" : [],
"stop_codons" : [],
"start_codons" : [],
"transcription_start_sites" : []}
gene = { "five_prime_ends": [],
"three_prime_ends": [],
"poly_a_sites": [],
"stop_codons": [],
"start_codons": [],
"transcription_start_sites": []}
try:
for exon in self._db.children(gene_id, featuretype='exon'):
exon_start = copy.deepcopy(exon)
exon_start.start = exon.start + 1

exon_stop = copy.deepcopy(exon)
exon_stop.start = exon_stop.stop
exon_stop.stop = exon_stop.stop + 1
exon_stop.stop += 1

if exon.strand == "-":
exon_start, exon_stop = exon_stop, exon_start
Expand All @@ -397,7 +391,7 @@ def get_feature_locations(self, limit_genes=False, flush_cashe=False):

transcript_stop = copy.deepcopy(transcript)
transcript_stop.start = transcript_stop.stop
transcript_stop.stop = transcript_stop.stop + 1
transcript_stop.stop += 1

if transcript.strand == "-":
transcript_start, transcript_stop = transcript_stop, transcript_start
Expand Down

0 comments on commit 9dac56e

Please sign in to comment.