-
Notifications
You must be signed in to change notification settings - Fork 37
/
EnExtractor.py
36 lines (31 loc) · 1.4 KB
/
EnExtractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#-*- encoding: utf-8 -*-
from TextRank import EnKeywordExtraction, EnSentenceExtraction
class EnExtractor(object):
"""英文文摘提取器"""
def __init__(self, stop_words_file = './TextRank/trainer/stopword_en.data'):
super(EnExtractor, self).__init__()
self.keyphrase_extraction = EnKeywordExtraction(stop_words_file=stop_words_file)
self.summary_extraction = EnSentenceExtraction(stop_words_file=stop_words_file)
def keyphrase_train(self,text,article_type='Abstract'):
self.keyphrase_extraction.train(text=text,lower=True)
keyphrase = self.keyphrase_extraction.get_keyphrases_maximal(article_type=article_type)
#print self.get_tag(text)
return keyphrase
def summary_train(self,text,sentences_percent='default', sim_func='wordnet',num=100):
self.summary_extraction.train(text=text, sim_func=sim_func)
if sentences_percent == 'default':
summary = self.summary_extraction.get_key_sentences_100w()
else:
summary = self.summary_extraction.get_key_sentences(sentences_percent=sentences_percent)
return summary
def get_tag(self,text):
return self.keyphrase_extraction.get_tag(text)
if __name__ == '__main__':
text = open('../001.txt','r+').read()
#text = """"""
extractor = EnExtractor(stop_words_file='./TextRank/trainer/stopword_en.data')
keyphrase = extractor.keyphrase_train(text=text)
summary = extractor.summary_train(text)
print keyphrase
print"--------------------"
print summary