Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
anod6351 authored Nov 19, 2018
1 parent 9a21530 commit 2285113
Show file tree
Hide file tree
Showing 12 changed files with 1,480 additions and 0 deletions.
38 changes: 38 additions & 0 deletions CombRegionInfo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@

def CombineRegionInfo(listToCompress, regions):
from collections import OrderedDict

previous_region = []
current_region = []
combine_temp = []
newList=[]
index=-1 # Since we compare with the previous start at -1 instead of 0 to jump one step and start with the first element as a previous value

# Since we will save the previous value on the current line iteration we need one extra made up value in both lists to make sure the last elment is saved
regions.append('end.extra')
listToCompress.append(['end.extra'])


for line in regions:
current_region = line.split('.')
current_region = current_region[0]

if not(previous_region ==[]):
if previous_region == current_region:
combine_temp += listToCompress[index]

else:
combine_temp += listToCompress[index]
newList.append(combine_temp)
combine_temp = []
index+=1
previous_region=current_region

# Removes the added elements to the input lists
regions.pop()
listToCompress.pop()

return newList



109 changes: 109 additions & 0 deletions CombineRows_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@

def CombineRowsList(listToCompress, regions):
from collections import OrderedDict
#listToCompress=[[1,2,2,3],[1,2,2,23,4],[2,4,6,7,8], [1,2,3,4]]
#regions = ['GeneA.g.1.e', 'GeneA.g.2.e', 'GeneA.g.1.e', 'GeneB.g.1.e']

# Variables used for generating a splice list, indices of where the regions have been merged, used in the region plot figure to add black seperating lines. One for each line in the bed file
splice_full = [] # contain the length of each list in the list to compress, hence the last position of every coverage depth value list. One per row in the bed file containing regions
for line in listToCompress:
splice_full.append(len(line))

splice_temp = [] # Only contains a temporary splicing value
splice = [] # Contain lists of all splice values one list per merged region

previous_region = [] # Previous region name
current_region = [] # Current region name
combine_temp = [] # Temporary list with lines to compress
newList=[] # Contain the new merged list from listToCompress

index=-1 # Start at -1 that would be the previous element for the first element in the list, the -1 index is skipped and we move forward in the list
regions.append('end.extra') # add one extra element to the list as for each line the previous information is stored
listToCompress.append(['end.extra']) # add one extra elemtn to the list as for each line the previous information is stored


# Merge the list to compress
for line in regions:
current_region = line.split('.')
current_region = current_region[0]

if not(previous_region ==[]):
# As long as the previous and current region has the same name merge rows and add to the temporary combine_temp, also add the splice index to splice_temp
if previous_region == current_region:
combine_temp += listToCompress[index]
splice_temp.append(splice_full[index])

# If the Current region name is different from the previous rows region name, add the previous region to the temp list and save it in the new merged list (newlist)
# while alsoadding the splice indices.
else:
combine_temp += listToCompress[index]
newList.append(combine_temp)
splice_temp.append(splice_full[index])
splice.append(splice_temp)
# The temporary variables are emptied
combine_temp = []
splice_temp=[]
index+=1 # Move forward one line in the listToCompress and the splice_full list
previous_region=current_region # Change the previous element for next iteration

# Remove the extra element added to the lists
regions.pop()
listToCompress.pop()

# Create a output list of first part of region names with no duplicates
regions_temp=[]
region_name=[]

# Extract the first written region name before the '.' and save each name in the list regions temp
for line in regions:
l = line.split('.')
regions_temp.append(l[0])

#Extract duplicates from the list and keep the original order of the elements in the list
region_name = list(OrderedDict.fromkeys(regions_temp))

# Return the generated lists
return region_name, newList, splice

######################### Genrates new list of merged information rows for the validation table. The original region information list contains
# the values chr start, stop and length. If the information comes from the same region name the values will be added horisontally
# list to compress=[[chr 1', 'start', 'stop'], [chr 1', 'start', 'stop'], [chr 1', 'start', 'stop'], [chr 1', 'start', 'stop']]
# regions = ['GeneA.g.1.e', 'GeneA.g.2.e', 'GeneB.g.1.e', 'GeneC.g.1.e']

def CombineRegionInfo(listToCompress, regions):
from collections import OrderedDict

previous_region = []
current_region = []
combine_temp = []
newList=[]
index=-1 # Since we compare with the previous start at -1 instead of 0 to jump one step and start with the first element as a previous value

# Since we will save the previous value on the current line iteration we need one extra made up value in both lists to make sure the last elment is saved
regions.append('end.extra')
listToCompress.append(['end.extra'])


for line in regions:
current_region = line.split('.')
current_region = current_region[0]

if not(previous_region ==[]):
if previous_region == current_region:
combine_temp += listToCompress[index]

else:
combine_temp += listToCompress[index]
newList.append(combine_temp)
combine_temp = []
index+=1
previous_region=current_region

# Removes the added elements to the input lists
regions.pop()
listToCompress.pop()

return newList



41 changes: 41 additions & 0 deletions Detailed_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
def detail(Regions, Reads):

import pybedtools

####################### Computations of per base/amplicon coverage using bedtools ##############################
# Compute the per base coverage for the regions in the bedfile, columns in Region_coverage: chromosome, region start, region stop, name, index number of base (what base in the region), coverage

Region_coverage_perbase = Regions.coverage(Reads, d=True)

# create a detailed list

detailed =[]
list_temp=[]
previous_gene = Region_coverage_perbase[0][3]
print(previous_gene)
count=0
size_list = len(Region_coverage_perbase)
print(size_list)

for line in Region_coverage_perbase:
count+=1

if str(line[3]) == str(previous_gene):
list_temp.append(line[5])

else:
previous_gene=line[3]
detailed.append(list_temp)
list_temp=[]
list_temp.append(line[5])

if count == size_list:
detailed.append(list_temp)

return detailed






85 changes: 85 additions & 0 deletions Meanlist_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
def Meanfunction(mylist, Xcov, start_pos, region_info):


previous_element = -1 # made up start value for the previous element in mylist
Coveragesum=0
MeanList = []
end_pos = 0

count = 0 # used to caluculate end position: start + count

# If only one coverage element print the one base pair region with chromosom, start, stop, length and mean=coverage value in this case
if len(mylist) == 1:
MeanList.append(region_info + [int(start_pos), int(start_pos)+1, int(mylist[0])/1, 1])
print(region_info + [int(start_pos), int(start_pos)+1, int(mylist[0])/1, 1])

# If the list contains more than one per base coverage values, go through all elements in the list. Save the previous element each time.
# Divide up the regions by comparing the current and the previous elements coverage value. If both over or below keep in same group, if on different sides of the
# threshold coverage value save the previous and add it to the list Mean list whila starting a new region with the current element. Continue until all elements has been placed in a region in the mean list.
else:
mylist.append(0)
for i in range(len(mylist)):

# Make sure that the first previous element is from the actual list and not the start value of -1
if int(previous_element) >= 0:

# Check if the current element coverage value is over threshold
if int(mylist[i]) < int(Xcov):

# If previous element also over threshold add previous element to a temporary coverage sum variable to calculate mean coverage for each new sub region
if int(previous_element) < int(Xcov):
Coveragesum += int(previous_element)
count += 1
# If at the end of the list save the last sub region
if i == len(mylist)-1:
end_pos = int(start_pos)+int(count)
length = end_pos-int(start_pos)
MeanList.append(region_info + [int(start_pos), end_pos, round(float(Coveragesum)/float(length),0), length])

# If the previous element are instead below, save the previous region in the mean list
else:
Coveragesum += int(previous_element)
count+=1
end_pos = int(start_pos)+int(count)
length = end_pos-int(start_pos)
MeanList.append(region_info + [int(start_pos), end_pos, round(float(Coveragesum)/float(length),0), length])
start_pos = int(end_pos)
Coveragesum = 0
count=0

# Check if the current element coverage value is below threshold
if int(mylist[i]) >= int(Xcov):

# If the previous element is also below threshold add previous value to the coverage sum
if int(previous_element) >= int(Xcov):
Coveragesum += int(previous_element)
count+=1
# If the end of the list is reached save the last region
if i == len(mylist)-1:
end_pos = int(start_pos)+int(count)
length = end_pos-int(start_pos)
MeanList.append(region_info + [int(start_pos), end_pos, round(float(Coveragesum)/float(length),0), length])

# If the previous and current is on opposite sides of the threshold save the new sub region and start a new region with the current element
else:
Coveragesum += int(previous_element)
count+=1
end_pos = int(start_pos)+int(count)
length = end_pos-int(start_pos)
MeanList.append(region_info + [int(start_pos), end_pos, round(float(Coveragesum)/float(length),0), length])
start_pos = int(end_pos)
Coveragesum = 0
count=0

previous_element = int(mylist[i]) # Move the previous element forward for the next iteration
i+=1 # index for the coverage value list (mylist)

mylist.pop()

return MeanList






29 changes: 29 additions & 0 deletions PieChart.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@

# Generate pie chart

def Generate_Pie(Detailed_list, Threshold, index, fig, title, textSize):

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import Tkinter

count_under = 0
count_over = 0
print(Detailed_list)
for element in Detailed_list:
if int(element) < int(Threshold):
count_under+=1
else:
count_over+=1

print(count_over)
print(count_under)
ax = fig.add_subplot(2,2,index)
ax.pie([count_over,count_under], colors=['blue', 'orange'])
ax.axis('equal')
ax.set_title(str(title), fontsize=textSize)
ax.legend([str(count_over) +' bp. > ' + str(int(Threshold)) +'X', str(count_under) + ' pos. < '+ str(int(Threshold)) + 'X'], loc='lower center', bbox_to_anchor=(0.5, 0), fontsize='xx-small')

return fig

Loading

0 comments on commit 2285113

Please sign in to comment.