-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathRelation.py
executable file
·99 lines (85 loc) · 3.58 KB
/
Relation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
__author__ = 'Tian Kang'
#============ Parser step 2: clincial relation identification =====#
# #
# arguement 1: xml output dir #
# arguement 2: file name
# #
# email: [email protected] (Tian) #
# June, 2016 #
# #
#==================================================================#
import os,codecs,re
import sys
from libsvm import svmutil
from features_dir import relation_features
from xml.etree import ElementTree as ET
import sys
def main():
m=svmutil.svm_load_model('trained_models/svm.model')
relation_tag={0:'None',1:'has_value',2:'has_temp',3:'modified_by'}
match=re.search('^(.*)\.txt',sys.argv[2])
filename=sys.argv[2]
if match:
filename=match.group(1)
input_dir=sys.argv[1]+'/'+filename+'_NER.xml'
output_dir=sys.argv[1]+'/'+filename+'_Parsed.xml'
print "Reading NER results from ", input_dir
tree = ET.ElementTree(file=input_dir)
root = tree.getroot()
relations={}
index=[]
for child in root:
syn_features=codecs.open('Tempfile/relation_scale','w')
temp_pairs=relation_features.generate_pairs(child,syn_features)
if temp_pairs:
try:
y,x=svmutil.svm_read_problem('Tempfile/relation_scale')
p_label,p_acc,p_val=svmutil.svm_predict(y,x,m)
except ValueError:
for child2 in child.findall('text'):
print child2.text
continue
#print len(p_label),len(temp_pairs)
else:
p_label=[]
temp_pairs=[]
for j in range(0,len(p_label)):
#print j
relations[temp_pairs[j]]=p_label[j]
indexes=temp_pairs[j].split("_")
index.append(indexes[0])
index.append(indexes[1])
for child2 in child.findall('entity'):
node_index=child2.attrib['index']
child2.attrib['relation']='None'
if node_index in index:
right_pattern='^(\w+)_'+node_index
left_pattern=node_index+'_(\w+)$'
for relation in relations:
match1=re.search(left_pattern,relation)
match2=re.search(right_pattern,relation)
other_index= None
if match1:
other_index=match1.group(1)
else:
if match2:
other_index=match2.group(1)
else:
continue
relation_type=relation_tag[relations[relation]]
if relation_type == 'None':
continue
if child2.attrib['relation'] is 'None':
child2.attrib['relation']=other_index+":"+relation_type
else:
child2.attrib['relation']=child2.attrib['relation']+"|"+other_index+":"+relation_type
#print child2.text,child2.attrib['index'],child2.attrib['relation']
relation_excuted=os.path.exists("in.parse")
if relation_excuted:
os.system('rm in.parse')
os.system('rm Tempfile/relation_scale')
print "Writing Relation xml to ", output_dir
new_tree=codecs.open(output_dir,'w')
tree.write(new_tree)
print "Finished!"
if __name__ == '__main__': main()