-
Notifications
You must be signed in to change notification settings - Fork 62
/
Copy pathutils.py
50 lines (45 loc) · 2.39 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import numpy as np
class2label = {'Other': 0,
'Message-Topic(e1,e2)': 1, 'Message-Topic(e2,e1)': 2,
'Product-Producer(e1,e2)': 3, 'Product-Producer(e2,e1)': 4,
'Instrument-Agency(e1,e2)': 5, 'Instrument-Agency(e2,e1)': 6,
'Entity-Destination(e1,e2)': 7, 'Entity-Destination(e2,e1)': 8,
'Cause-Effect(e1,e2)': 9, 'Cause-Effect(e2,e1)': 10,
'Component-Whole(e1,e2)': 11, 'Component-Whole(e2,e1)': 12,
'Entity-Origin(e1,e2)': 13, 'Entity-Origin(e2,e1)': 14,
'Member-Collection(e1,e2)': 15, 'Member-Collection(e2,e1)': 16,
'Content-Container(e1,e2)': 17, 'Content-Container(e2,e1)': 18}
label2class = {0: 'Other',
1: 'Message-Topic(e1,e2)', 2: 'Message-Topic(e2,e1)',
3: 'Product-Producer(e1,e2)', 4: 'Product-Producer(e2,e1)',
5: 'Instrument-Agency(e1,e2)', 6: 'Instrument-Agency(e2,e1)',
7: 'Entity-Destination(e1,e2)', 8: 'Entity-Destination(e2,e1)',
9: 'Cause-Effect(e1,e2)', 10: 'Cause-Effect(e2,e1)',
11: 'Component-Whole(e1,e2)', 12: 'Component-Whole(e2,e1)',
13: 'Entity-Origin(e1,e2)', 14: 'Entity-Origin(e2,e1)',
15: 'Member-Collection(e1,e2)', 16: 'Member-Collection(e2,e1)',
17: 'Content-Container(e1,e2)', 18: 'Content-Container(e2,e1)'}
def load_word2vec(embedding_path, embedding_dim, vocab):
# initial matrix with random uniform
initW = np.random.randn(len(vocab.vocabulary_), embedding_dim).astype(np.float32) / np.sqrt(len(vocab.vocabulary_))
# load any vectors from the word2vec
print("Load word2vec file {0}".format(embedding_path))
with open(embedding_path, "rb") as f:
header = f.readline()
vocab_size, layer_size = map(int, header.split())
binary_len = np.dtype('float32').itemsize * layer_size
for line in range(vocab_size):
word = []
while True:
ch = f.read(1).decode('latin-1')
if ch == ' ':
word = ''.join(word)
break
if ch != '\n':
word.append(ch)
idx = vocab.vocabulary_.get(word)
if idx != 0:
initW[idx] = np.fromstring(f.read(binary_len), dtype='float32')
else:
f.read(binary_len)
return initW