-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
1,480 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
|
||
def CombineRegionInfo(listToCompress, regions): | ||
from collections import OrderedDict | ||
|
||
previous_region = [] | ||
current_region = [] | ||
combine_temp = [] | ||
newList=[] | ||
index=-1 # Since we compare with the previous start at -1 instead of 0 to jump one step and start with the first element as a previous value | ||
|
||
# Since we will save the previous value on the current line iteration we need one extra made up value in both lists to make sure the last elment is saved | ||
regions.append('end.extra') | ||
listToCompress.append(['end.extra']) | ||
|
||
|
||
for line in regions: | ||
current_region = line.split('.') | ||
current_region = current_region[0] | ||
|
||
if not(previous_region ==[]): | ||
if previous_region == current_region: | ||
combine_temp += listToCompress[index] | ||
|
||
else: | ||
combine_temp += listToCompress[index] | ||
newList.append(combine_temp) | ||
combine_temp = [] | ||
index+=1 | ||
previous_region=current_region | ||
|
||
# Removes the added elements to the input lists | ||
regions.pop() | ||
listToCompress.pop() | ||
|
||
return newList | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
|
||
def CombineRowsList(listToCompress, regions): | ||
from collections import OrderedDict | ||
#listToCompress=[[1,2,2,3],[1,2,2,23,4],[2,4,6,7,8], [1,2,3,4]] | ||
#regions = ['GeneA.g.1.e', 'GeneA.g.2.e', 'GeneA.g.1.e', 'GeneB.g.1.e'] | ||
|
||
# Variables used for generating a splice list, indices of where the regions have been merged, used in the region plot figure to add black seperating lines. One for each line in the bed file | ||
splice_full = [] # contain the length of each list in the list to compress, hence the last position of every coverage depth value list. One per row in the bed file containing regions | ||
for line in listToCompress: | ||
splice_full.append(len(line)) | ||
|
||
splice_temp = [] # Only contains a temporary splicing value | ||
splice = [] # Contain lists of all splice values one list per merged region | ||
|
||
previous_region = [] # Previous region name | ||
current_region = [] # Current region name | ||
combine_temp = [] # Temporary list with lines to compress | ||
newList=[] # Contain the new merged list from listToCompress | ||
|
||
index=-1 # Start at -1 that would be the previous element for the first element in the list, the -1 index is skipped and we move forward in the list | ||
regions.append('end.extra') # add one extra element to the list as for each line the previous information is stored | ||
listToCompress.append(['end.extra']) # add one extra elemtn to the list as for each line the previous information is stored | ||
|
||
|
||
# Merge the list to compress | ||
for line in regions: | ||
current_region = line.split('.') | ||
current_region = current_region[0] | ||
|
||
if not(previous_region ==[]): | ||
# As long as the previous and current region has the same name merge rows and add to the temporary combine_temp, also add the splice index to splice_temp | ||
if previous_region == current_region: | ||
combine_temp += listToCompress[index] | ||
splice_temp.append(splice_full[index]) | ||
|
||
# If the Current region name is different from the previous rows region name, add the previous region to the temp list and save it in the new merged list (newlist) | ||
# while alsoadding the splice indices. | ||
else: | ||
combine_temp += listToCompress[index] | ||
newList.append(combine_temp) | ||
splice_temp.append(splice_full[index]) | ||
splice.append(splice_temp) | ||
# The temporary variables are emptied | ||
combine_temp = [] | ||
splice_temp=[] | ||
index+=1 # Move forward one line in the listToCompress and the splice_full list | ||
previous_region=current_region # Change the previous element for next iteration | ||
|
||
# Remove the extra element added to the lists | ||
regions.pop() | ||
listToCompress.pop() | ||
|
||
# Create a output list of first part of region names with no duplicates | ||
regions_temp=[] | ||
region_name=[] | ||
|
||
# Extract the first written region name before the '.' and save each name in the list regions temp | ||
for line in regions: | ||
l = line.split('.') | ||
regions_temp.append(l[0]) | ||
|
||
#Extract duplicates from the list and keep the original order of the elements in the list | ||
region_name = list(OrderedDict.fromkeys(regions_temp)) | ||
|
||
# Return the generated lists | ||
return region_name, newList, splice | ||
|
||
######################### Genrates new list of merged information rows for the validation table. The original region information list contains | ||
# the values chr start, stop and length. If the information comes from the same region name the values will be added horisontally | ||
# list to compress=[[chr 1', 'start', 'stop'], [chr 1', 'start', 'stop'], [chr 1', 'start', 'stop'], [chr 1', 'start', 'stop']] | ||
# regions = ['GeneA.g.1.e', 'GeneA.g.2.e', 'GeneB.g.1.e', 'GeneC.g.1.e'] | ||
|
||
def CombineRegionInfo(listToCompress, regions): | ||
from collections import OrderedDict | ||
|
||
previous_region = [] | ||
current_region = [] | ||
combine_temp = [] | ||
newList=[] | ||
index=-1 # Since we compare with the previous start at -1 instead of 0 to jump one step and start with the first element as a previous value | ||
|
||
# Since we will save the previous value on the current line iteration we need one extra made up value in both lists to make sure the last elment is saved | ||
regions.append('end.extra') | ||
listToCompress.append(['end.extra']) | ||
|
||
|
||
for line in regions: | ||
current_region = line.split('.') | ||
current_region = current_region[0] | ||
|
||
if not(previous_region ==[]): | ||
if previous_region == current_region: | ||
combine_temp += listToCompress[index] | ||
|
||
else: | ||
combine_temp += listToCompress[index] | ||
newList.append(combine_temp) | ||
combine_temp = [] | ||
index+=1 | ||
previous_region=current_region | ||
|
||
# Removes the added elements to the input lists | ||
regions.pop() | ||
listToCompress.pop() | ||
|
||
return newList | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
def detail(Regions, Reads): | ||
|
||
import pybedtools | ||
|
||
####################### Computations of per base/amplicon coverage using bedtools ############################## | ||
# Compute the per base coverage for the regions in the bedfile, columns in Region_coverage: chromosome, region start, region stop, name, index number of base (what base in the region), coverage | ||
|
||
Region_coverage_perbase = Regions.coverage(Reads, d=True) | ||
|
||
# create a detailed list | ||
|
||
detailed =[] | ||
list_temp=[] | ||
previous_gene = Region_coverage_perbase[0][3] | ||
print(previous_gene) | ||
count=0 | ||
size_list = len(Region_coverage_perbase) | ||
print(size_list) | ||
|
||
for line in Region_coverage_perbase: | ||
count+=1 | ||
|
||
if str(line[3]) == str(previous_gene): | ||
list_temp.append(line[5]) | ||
|
||
else: | ||
previous_gene=line[3] | ||
detailed.append(list_temp) | ||
list_temp=[] | ||
list_temp.append(line[5]) | ||
|
||
if count == size_list: | ||
detailed.append(list_temp) | ||
|
||
return detailed | ||
|
||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
def Meanfunction(mylist, Xcov, start_pos, region_info): | ||
|
||
|
||
previous_element = -1 # made up start value for the previous element in mylist | ||
Coveragesum=0 | ||
MeanList = [] | ||
end_pos = 0 | ||
|
||
count = 0 # used to caluculate end position: start + count | ||
|
||
# If only one coverage element print the one base pair region with chromosom, start, stop, length and mean=coverage value in this case | ||
if len(mylist) == 1: | ||
MeanList.append(region_info + [int(start_pos), int(start_pos)+1, int(mylist[0])/1, 1]) | ||
print(region_info + [int(start_pos), int(start_pos)+1, int(mylist[0])/1, 1]) | ||
|
||
# If the list contains more than one per base coverage values, go through all elements in the list. Save the previous element each time. | ||
# Divide up the regions by comparing the current and the previous elements coverage value. If both over or below keep in same group, if on different sides of the | ||
# threshold coverage value save the previous and add it to the list Mean list whila starting a new region with the current element. Continue until all elements has been placed in a region in the mean list. | ||
else: | ||
mylist.append(0) | ||
for i in range(len(mylist)): | ||
|
||
# Make sure that the first previous element is from the actual list and not the start value of -1 | ||
if int(previous_element) >= 0: | ||
|
||
# Check if the current element coverage value is over threshold | ||
if int(mylist[i]) < int(Xcov): | ||
|
||
# If previous element also over threshold add previous element to a temporary coverage sum variable to calculate mean coverage for each new sub region | ||
if int(previous_element) < int(Xcov): | ||
Coveragesum += int(previous_element) | ||
count += 1 | ||
# If at the end of the list save the last sub region | ||
if i == len(mylist)-1: | ||
end_pos = int(start_pos)+int(count) | ||
length = end_pos-int(start_pos) | ||
MeanList.append(region_info + [int(start_pos), end_pos, round(float(Coveragesum)/float(length),0), length]) | ||
|
||
# If the previous element are instead below, save the previous region in the mean list | ||
else: | ||
Coveragesum += int(previous_element) | ||
count+=1 | ||
end_pos = int(start_pos)+int(count) | ||
length = end_pos-int(start_pos) | ||
MeanList.append(region_info + [int(start_pos), end_pos, round(float(Coveragesum)/float(length),0), length]) | ||
start_pos = int(end_pos) | ||
Coveragesum = 0 | ||
count=0 | ||
|
||
# Check if the current element coverage value is below threshold | ||
if int(mylist[i]) >= int(Xcov): | ||
|
||
# If the previous element is also below threshold add previous value to the coverage sum | ||
if int(previous_element) >= int(Xcov): | ||
Coveragesum += int(previous_element) | ||
count+=1 | ||
# If the end of the list is reached save the last region | ||
if i == len(mylist)-1: | ||
end_pos = int(start_pos)+int(count) | ||
length = end_pos-int(start_pos) | ||
MeanList.append(region_info + [int(start_pos), end_pos, round(float(Coveragesum)/float(length),0), length]) | ||
|
||
# If the previous and current is on opposite sides of the threshold save the new sub region and start a new region with the current element | ||
else: | ||
Coveragesum += int(previous_element) | ||
count+=1 | ||
end_pos = int(start_pos)+int(count) | ||
length = end_pos-int(start_pos) | ||
MeanList.append(region_info + [int(start_pos), end_pos, round(float(Coveragesum)/float(length),0), length]) | ||
start_pos = int(end_pos) | ||
Coveragesum = 0 | ||
count=0 | ||
|
||
previous_element = int(mylist[i]) # Move the previous element forward for the next iteration | ||
i+=1 # index for the coverage value list (mylist) | ||
|
||
mylist.pop() | ||
|
||
return MeanList | ||
|
||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
|
||
# Generate pie chart | ||
|
||
def Generate_Pie(Detailed_list, Threshold, index, fig, title, textSize): | ||
|
||
import matplotlib | ||
matplotlib.use('Agg') | ||
import matplotlib.pyplot as plt | ||
import Tkinter | ||
|
||
count_under = 0 | ||
count_over = 0 | ||
print(Detailed_list) | ||
for element in Detailed_list: | ||
if int(element) < int(Threshold): | ||
count_under+=1 | ||
else: | ||
count_over+=1 | ||
|
||
print(count_over) | ||
print(count_under) | ||
ax = fig.add_subplot(2,2,index) | ||
ax.pie([count_over,count_under], colors=['blue', 'orange']) | ||
ax.axis('equal') | ||
ax.set_title(str(title), fontsize=textSize) | ||
ax.legend([str(count_over) +' bp. > ' + str(int(Threshold)) +'X', str(count_under) + ' pos. < '+ str(int(Threshold)) + 'X'], loc='lower center', bbox_to_anchor=(0.5, 0), fontsize='xx-small') | ||
|
||
return fig | ||
|
Oops, something went wrong.