-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathreadDAVID.py
72 lines (63 loc) · 2.67 KB
/
readDAVID.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import numpy as np
import gzip
class enrichmentClass(object):
"""Given one line from a functional clustering output file stores all data
"""
def __init__(self, str):
"""Given a line from a function classification file stores values
"""
vals=str.strip().split('\t')
self.category =vals[0]
self.term =vals[1]
self.count =vals[2]
self.percent =vals[3]
self.pvalue =float(vals[4])
self.genes =vals[5]
self.list_total=int(vals[6])
self.pop_hits =int(vals[7])
self.pop_total =int(vals[8])
self.fold_enrichment=float(vals[9])
self.bonferroni =float(vals[10])
self.benjamini =float(vals[11])
self.fdr =float(vals[12])
def readDavidFile(filename, pvalFilter=1, isGzip=False):
enrichments={}
if isGzip:
fp=gzip.open(filename)
else:
fp=open(filename)
for l in fp:
try:
if l.startswith('Category Term Count %'):
continue
davidLine=enrichmentClass(l)
if davidLine.pvalue<=pvalFilter:
enrichments[davidLine.term]=davidLine
except IndexError:
if l.startswith('Functional Group') or l=='\n':
pass
else:
print 'WARNING! There are unkown structured line in the file'
return enrichments
if __name__ == '__main__':
hosvdDown=readDavidFile('david_enrichment_hosvd_down.csv', 1e-3)
hosvdUp=readDavidFile('david_enrichment_hosvd_up.csv.gz', 1e-3, True)
anovaDown=readDavidFile('david_enrichment_anova_down.csv.gz', 1e-3, True)
anovaUp=readDavidFile('david_enrichment_anova_up.csv.gz', 1e-3, True)
defaultEnrichment=enrichmentClass('none\tnone\t0\t0\t1\t\t0\t0\t0\t0\t1\t1\t1\t1')
downKeys=list(set(hosvdDown.keys()).union(anovaDown.keys()))
upKeys=list(set(hosvdUp.keys()).union(anovaUp.keys()))
def printClasses(keys, hosvd, anova):
for k in keys:
hosvdP=hosvd.get(k,defaultEnrichment).pvalue
anovaP=anova.get(k, defaultEnrichment).pvalue
hosvdCount=hosvd.get(k,defaultEnrichment).count
anovaCount=anova.get(k,defaultEnrichment).count
category=hosvd.get(k, defaultEnrichment).category
print '%s\t%40s\t%0.2e\t%s\t%0.2e\t%s' %(category,k,
hosvdP, hosvdCount,
anovaP, anovaCount)
print 'Category\tName\tHOSVD pvalue\tHOSVD count\tANOVA pvalue\tANOVA count'
#printClasses(downKeys, hosvdDown, anovaDown)
printClasses(upKeys, hosvdUp, anovaUp)
#print enrichments.keys()