Add files via upload

anod6351 · Nov 19, 2018 · 2285113 · 2285113
1 parent 9a21530
commit 2285113
Show file tree

Hide file tree

Showing 12 changed files with 1,480 additions and 0 deletions.
diff --git a/CombRegionInfo.py b/CombRegionInfo.py
@@ -0,0 +1,38 @@
+
+def CombineRegionInfo(listToCompress, regions):
+	from collections import OrderedDict
+
+	previous_region = []
+	current_region = []
+	combine_temp = []
+	newList=[]
+	index=-1 # Since we compare with the previous start at -1 instead of 0 to jump one step and start with the first element as a previous value
+
+	# Since we will save the previous value on the current line iteration we need one extra made up value in both lists to make sure the last elment is saved
+	regions.append('end.extra')
+	listToCompress.append(['end.extra'])
+
+
+	for line in regions:
+		current_region = line.split('.')
+		current_region = current_region[0]
+
+		if not(previous_region ==[]):
+			if previous_region == current_region:
+				combine_temp += listToCompress[index]
+
+			else:
+				combine_temp += listToCompress[index]
+				newList.append(combine_temp)
+				combine_temp = []		
+		index+=1
+		previous_region=current_region
+
+	# Removes the added elements to the input lists
+	regions.pop()
+	listToCompress.pop()
+
+	return newList
+
+
+
diff --git a/CombineRows_generator.py b/CombineRows_generator.py
@@ -0,0 +1,109 @@
+
+def CombineRowsList(listToCompress, regions):
+	from collections import OrderedDict
+	#listToCompress=[[1,2,2,3],[1,2,2,23,4],[2,4,6,7,8], [1,2,3,4]]
+	#regions = ['GeneA.g.1.e', 'GeneA.g.2.e', 'GeneA.g.1.e', 'GeneB.g.1.e']
+
+	# Variables used for generating a splice list, indices of where the regions have been merged, used in the region plot figure to add black seperating lines. One for each line in the bed file
+	splice_full = [] # contain the length of each list in the list to compress, hence the last position of every coverage depth value list. One per row in the bed file containing regions
+	for line in listToCompress:
+		splice_full.append(len(line))
+
+	splice_temp = [] # Only contains a temporary splicing value
+	splice = [] # Contain lists of all splice values one list per merged region
+
+	previous_region = [] # Previous region name 
+	current_region = [] # Current region name
+	combine_temp = [] # Temporary list with lines to compress
+	newList=[] # Contain the new merged list from listToCompress
+
+	index=-1 # Start at -1 that would be the previous element for the first element in the list, the -1 index is skipped and we move forward in the list
+	regions.append('end.extra') # add one extra element to the list as for each line the previous information is stored
+	listToCompress.append(['end.extra']) # add one extra elemtn to the list as for each line the previous information is stored
+
+
+	# Merge the list to compress
+	for line in regions:
+		current_region = line.split('.')
+		current_region = current_region[0]
+
+		if not(previous_region ==[]):
+			# As long as the previous and current region has the same name merge rows and add to the temporary combine_temp, also add the splice index to splice_temp
+			if previous_region == current_region:
+				combine_temp += listToCompress[index]
+				splice_temp.append(splice_full[index])
+
+			# If the Current region name is different from the previous rows region name, add the previous region to the temp list and save it in the new merged list (newlist)
+			# while alsoadding the splice indices. 
+			else:
+				combine_temp += listToCompress[index]
+				newList.append(combine_temp)
+				splice_temp.append(splice_full[index])
+				splice.append(splice_temp)
+				# The temporary variables are emptied 
+				combine_temp = []		
+				splice_temp=[]		
+		index+=1 # Move forward one line in the listToCompress and the splice_full list
+		previous_region=current_region # Change the previous element for next iteration
+
+	# Remove the extra element added to the lists
+	regions.pop()
+	listToCompress.pop()
+
+	# Create a output list of first part of region names with no duplicates
+	regions_temp=[]
+	region_name=[]
+
+	# Extract the first written region name before the '.' and save each name in the list regions temp
+	for line in regions:
+		l = line.split('.')
+		regions_temp.append(l[0])
+
+	#Extract duplicates from the list and keep the original order of the elements in the list
+	region_name = list(OrderedDict.fromkeys(regions_temp))
+
+	# Return the generated lists
+	return region_name, newList, splice
+
+	######################### Genrates new list of merged information rows for the validation table. The original region information list contains
+	# the values chr start, stop and length. If the information comes from the same region name the values will be added horisontally 
+	# list to compress=[[chr 1', 'start', 'stop'], [chr 1', 'start', 'stop'], [chr 1', 'start', 'stop'], [chr 1', 'start', 'stop']]
+	# regions = ['GeneA.g.1.e', 'GeneA.g.2.e', 'GeneB.g.1.e', 'GeneC.g.1.e']
+
+def CombineRegionInfo(listToCompress, regions):
+	from collections import OrderedDict
+
+	previous_region = []
+	current_region = []
+	combine_temp = []
+	newList=[]
+	index=-1 # Since we compare with the previous start at -1 instead of 0 to jump one step and start with the first element as a previous value
+
+	# Since we will save the previous value on the current line iteration we need one extra made up value in both lists to make sure the last elment is saved
+	regions.append('end.extra')
+	listToCompress.append(['end.extra'])
+
+
+	for line in regions:
+		current_region = line.split('.')
+		current_region = current_region[0]
+
+		if not(previous_region ==[]):
+			if previous_region == current_region:
+				combine_temp += listToCompress[index]
+
+			else:
+				combine_temp += listToCompress[index]
+				newList.append(combine_temp)
+				combine_temp = []		
+		index+=1
+		previous_region=current_region
+
+	# Removes the added elements to the input lists
+	regions.pop()
+	listToCompress.pop()
+
+	return newList
+
+
+
diff --git a/Detailed_generator.py b/Detailed_generator.py
@@ -0,0 +1,41 @@
+def detail(Regions, Reads):
+
+	import pybedtools
+
+	####################### Computations of per base/amplicon coverage using bedtools ##############################
+	# Compute the per base coverage for the regions in the bedfile, columns in Region_coverage: chromosome, region start, region stop, name, index number of base (what base in the region), coverage
+
+	Region_coverage_perbase = Regions.coverage(Reads, d=True)
+
+	# create a detailed list
+
+	detailed =[]
+	list_temp=[]
+	previous_gene = Region_coverage_perbase[0][3]
+	print(previous_gene)
+	count=0
+	size_list = len(Region_coverage_perbase)
+	print(size_list)
+
+	for line in Region_coverage_perbase:
+		count+=1
+
+		if str(line[3]) == str(previous_gene):
+			list_temp.append(line[5])
+
+		else:
+			previous_gene=line[3]
+			detailed.append(list_temp)
+			list_temp=[]
+			list_temp.append(line[5])
+
+		if count == size_list:
+			detailed.append(list_temp)
+
+	return detailed
+
+
+
+
+
+
diff --git a/Meanlist_generator.py b/Meanlist_generator.py
@@ -0,0 +1,85 @@
+def Meanfunction(mylist, Xcov, start_pos, region_info):
+
+
+	previous_element = -1 # made up start value for the previous element in mylist
+	Coveragesum=0
+	MeanList = []
+	end_pos = 0
+
+	count = 0 # used to caluculate end position: start + count 
+
+	# If only one coverage element print the one base pair region with chromosom, start, stop, length and mean=coverage value in this case
+	if len(mylist) == 1:
+		MeanList.append(region_info + [int(start_pos), int(start_pos)+1, int(mylist[0])/1, 1])
+		print(region_info + [int(start_pos), int(start_pos)+1, int(mylist[0])/1, 1])
+
+	# If the list contains more than one per base coverage values, go through all elements in the list. Save the previous element each time. 
+	# Divide up the regions by comparing the current and the previous elements coverage value. If both over or below keep in same group, if on different sides of the 
+	# threshold coverage value save the previous and add it to the list Mean list whila starting a new region with the current element. Continue until all elements has been placed in a region in the mean list.   
+	else:	
+		mylist.append(0)
+		for i in range(len(mylist)):
+
+			# Make sure that the first previous element is from the actual list and not the start value of -1 
+			if int(previous_element) >= 0:
+
+				# Check if the current element coverage value is over threshold
+				if int(mylist[i]) < int(Xcov):
+
+					# If previous element also over threshold add previous element to a temporary coverage sum variable to calculate mean coverage for each new sub region
+					if int(previous_element) < int(Xcov):
+					 	Coveragesum += int(previous_element)
+					 	count += 1
+					 	# If at the end of the list save the last sub region
+					 	if i == len(mylist)-1:
+					 		end_pos = int(start_pos)+int(count)
+					 		length = end_pos-int(start_pos)
+							MeanList.append(region_info + [int(start_pos), end_pos, round(float(Coveragesum)/float(length),0), length])
+
+					# If the previous element are instead below, save the previous region in the mean list
+					else:
+						Coveragesum += int(previous_element) 
+						count+=1
+						end_pos = int(start_pos)+int(count)
+						length = end_pos-int(start_pos) 
+						MeanList.append(region_info + [int(start_pos), end_pos, round(float(Coveragesum)/float(length),0), length])
+						start_pos = int(end_pos)
+						Coveragesum = 0
+						count=0
+
+				# Check if the current element coverage value is below threshold
+				if int(mylist[i]) >= int(Xcov):
+
+					# If the previous element is also below threshold add previous value to the coverage sum
+					if int(previous_element) >= int(Xcov):
+						Coveragesum += int(previous_element)
+						count+=1
+						# If the end of the list is reached save the last region
+						if i == len(mylist)-1:
+							end_pos = int(start_pos)+int(count)
+							length = end_pos-int(start_pos)
+							MeanList.append(region_info + [int(start_pos), end_pos, round(float(Coveragesum)/float(length),0), length])
+
+					# If the previous and current is on opposite sides of the threshold save the new sub region and start a new region with the current element
+					else:
+						Coveragesum += int(previous_element)
+						count+=1
+						end_pos = int(start_pos)+int(count)
+						length = end_pos-int(start_pos)
+						MeanList.append(region_info + [int(start_pos), end_pos, round(float(Coveragesum)/float(length),0), length])
+						start_pos = int(end_pos)
+						Coveragesum = 0
+						count=0
+
+			previous_element = int(mylist[i]) # Move the previous element forward for the next iteration
+			i+=1 # index for the coverage value list (mylist)
+
+		mylist.pop()
+
+	return MeanList
+
+
+
+
+
+
diff --git a/PieChart.py b/PieChart.py
@@ -0,0 +1,29 @@
+
+# Generate pie chart
+
+def Generate_Pie(Detailed_list, Threshold, index, fig, title, textSize):
+
+	import matplotlib
+	matplotlib.use('Agg')
+	import matplotlib.pyplot as plt
+	import Tkinter
+
+	count_under = 0
+	count_over = 0
+	print(Detailed_list)
+	for element in Detailed_list:
+		if int(element) < int(Threshold):
+			count_under+=1
+		else:
+			count_over+=1
+
+	print(count_over)
+	print(count_under)
+	ax = fig.add_subplot(2,2,index)
+	ax.pie([count_over,count_under], colors=['blue', 'orange']) 
+	ax.axis('equal')
+	ax.set_title(str(title), fontsize=textSize)
+	ax.legend([str(count_over) +' bp. > ' + str(int(Threshold)) +'X', str(count_under) + ' pos. < '+ str(int(Threshold)) + 'X'], loc='lower center', bbox_to_anchor=(0.5, 0), fontsize='xx-small')
+
+	return fig
+