forked from jerrymagic/Python-Tianyancha
-
Notifications
You must be signed in to change notification settings - Fork 0
/
graph.py
161 lines (145 loc) · 4.78 KB
/
graph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#!/usr/bin/python
# -*- coding: utf-8 -*-
# @Time : 2018/2/4 13:38
# @Author :
# @Site :
# @File : graph.py
import xlrd
import networkx as nx
import matplotlib.pyplot as plt
# import codecs
import logging
logging.basicConfig(level=logging.INFO)
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# open excel
def exceldata(filename, n):
"""
open excel file read data
:param filename: excel filename
:pparam n: row number
:return: None
"""
try:
book = xlrd.open_workbook(filename)
sheets = book.sheets()
sheet = sheets[0]
dataset = []
for r in range(sheet.nrows):
col = sheet.cell(r, n).value
if r != 0:
dataset.append(col)
return dataset
except Exception as e:
print(e)
return None
def relation(filename):
"""
processing relations
:param filename: Excel filename
:return: relations
"""
relations = {}
keys = []
try:
book = xlrd.open_workbook(filename)
sheets = book.sheets()
sheet = sheets[0]
for r in range(sheet.nrows):
cmp = sheet.cell(r, 0).value
share = sheet.cell(r, 16).value
shares = []
for word in (share.split(",")):
if word != "" and word != "暂无":
shares.append(word)
if r != 0:
relations.setdefault(cmp, shares)
print("处理前:", len(relations))
for key in relations.keys():
if(len(relations[key]) == 0):
keys.append(key)
for empty_key in keys:
relations.pop(empty_key)
print("处理后:", len(relations))
return relations
except Exception as e:
print(e)
return None
# relations dict to tuples
def coverelation(relations):
"""
cover relations dict to tuples
:param relations: relations dict
:return: tuples
"""
relation_dicts = []
for a in relations.keys():
for word in relations[a]:
relation_dicts.append((a, word))
return relation_dicts
# clean data
def cleandata(datalist, dictname, splitstat):
"""
clean xlsx data
:param datalist: data list
:param dictname: dict txt name
:param splitstat: bool cut or not
:return: dict txt file
"""
dataset_share = []
dataset_share_uniq = []
dataset_cmp_uniq = []
with open(dictname, "w") as F:
if splitstat:
print("处理股东信息")
for datarow in datalist:
for data in datarow.split(","):
if len(data) != 0:
dataset_share.append(data + "\n")
print("去重前:", len(dataset_share))
dataset_share_uniq = sorted(set(dataset_share), key=dataset_share.index)
print("去重后:", len(dataset_share_uniq))
F.writelines(dataset_share_uniq)
else:
print("处理公司信息")
dataset_cmp_uniq = sorted(set(datalist), key=datalist.index)
print("去重前:", len(datalist))
print("去重后:", len(dataset_cmp_uniq))
for row in dataset_cmp_uniq:
F.writelines(row + "\n")
print("写入完毕,关闭文件")
F.close()
# create network graph
def creategraph(picname, relation, dict1, dict2):
colors = ['red', 'green', 'blue', 'yellow']
cmpnode = []
sharenode = []
with open(dict1, "r") as f1:
for node in f1.readlines():
cmpnode.append(node.split("\n")[0])
f1.close()
with open(dict2, "r") as f2:
for node in f2.readlines():
sharenode.append(node.split("\n")[0])
f2.close()
DG = nx.DiGraph()
print("节点较多,需要一定时间运行")
DG.add_nodes_from(cmpnode[0:300])
DG.add_nodes_from(sharenode[0:300])
DG.add_edges_from(relation[0:300])
nx.draw(DG, with_labels=True, node_size=300, font_size=8, node_color=colors)
fig = plt.gcf()
fig.set_size_inches(35, 35)
print("操作完成,生成文件中,请稍等……")
fig.savefig(picname, dpi=600)
print("生成完毕,文件名:" + picname)
if __name__ == "__main__":
excelfname = "test2.xlsx"
cmp_list_raw = exceldata(excelfname, 0)
shareholder_list_raw = exceldata(excelfname, 16)
cleandata(cmp_list_raw, "cmpdict.txt", False)
cleandata(shareholder_list_raw, "shareholder.txt", True)
relations = relation(excelfname)
# relation("graph.xlsx")
relations_dict = coverelation(relations)
creategraph("pic.png", relations_dict, "cmpdict.txt", "shareholder.txt")