-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse_clu.py
64 lines (53 loc) · 1.65 KB
/
parse_clu.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import sys,re,json,os.path
infile = open(sys.argv[1])
pattern = re.compile("^#")
communities = {}
nodes = {}
for line in infile:
if pattern.match(line):
continue
s = line.split(' ')
n = int(s[0])
c = int(s[1])
if not c in communities:
communities[c] = []
communities[c].append(n)
if not n in nodes:
nodes[n] = []
nodes[n].append(c)
# merge json
def calculate_avg_num_coms_per_node():
num_coms = [ len(coms) for n,coms in nodes.items() ]
return sum(num_coms) / len(num_coms)
def calculate_avg_com_size():
com_sizes = [len(ns) for c,ns in communities.items() ]
return sum(com_sizes) / len(com_sizes)
d = {"AvgNumComsPerNode": calculate_avg_num_coms_per_node(), "AvgComSize": calculate_avg_com_size() }
print( json.dumps(d) )
# print community size distribution
def num_communities_distribution():
num_coms = [ len(coms) for n,coms in nodes.items() ]
num_coms_dat = 'num_communities.dat'
counts = {}
for num_com in num_coms:
if not num_com in counts:
counts[num_com] = 0
counts[num_com] += 1
f = open(num_coms_dat, 'w')
for k in sorted(counts.keys()):
f.write("%d %d\n" % (k,counts[k]) )
f.close()
num_communities_distribution()
def communit_size_distribution():
com_sizes = [ len(nodes) for c,nodes in communities.items() ]
com_size_dat = 'community_size.dat'
counts = {}
for s in com_sizes:
if not s in counts:
counts[s] = 0
counts[s] += 1
f = open(com_size_dat, 'w')
for k in sorted(counts.keys()):
f.write("%d %d\n" % (k,counts[k]) )
f.close()
communit_size_distribution()