-
Notifications
You must be signed in to change notification settings - Fork 0
/
GetLiteral.py
80 lines (67 loc) · 2.98 KB
/
GetLiteral.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import requests
from random import *
import random
import wikipedia
from openpyxl import Workbook
'''
Class for generate a list of sentences as test data. The result will be written into an Excel file called "csci318.xlsx"
Example Usage:
TestData=Get_Literal()
TestData.setTestData(samplesize=20)
There will be some exception-thrown warning which won't affect the final result.
'''
class Get_Literal(object):
def __init__(self):
# wordlist is key word for search test data in wikipedia. There are two kind of test data, one is paragraph, another one is single sentences
self.wordlistUrl = "http://www-personal.umich.edu/~jlawler/wordlist"
self.wordlist = ''
self.wordlistSize = 0
#number of test need to generate
self.numberOfTestData = 0;
#run at begin get wordlist
def Get_Wordlist(self):
self.wordlist = requests.get(url = self.wordlistUrl)
self.wordlist = self.wordlist.text.split()
self.wordlistSize = len(self.wordlist)
#parameter "sample "to tell how many sentences do you want to generate
def setTestData(self,samplesize):
self.Get_Wordlist()
exceptFlag = False
self.numberOfTestData=samplesize
#sentencesFile = open("Sentences.txt", "w", encoding='utf-8')
#wordlistFile = open("Wordlist.txt", 'w', encoding='utf-8')
sentencesCount = 0
interval = int((self.wordlistSize / self.numberOfTestData) / 2)
startIndex = 0
workbook = Workbook()
workSheetOne = workbook.active
workSheetOne.title = "English"
while sentencesCount < self.numberOfTestData:
try:
exceptFlag = False
randomNum = randint(startIndex, startIndex + interval - 1) # Pick a random number between 1 and 100.
startIndex += interval
sentences = wikipedia.summary(self.wordlist[randomNum], sentences = 1)
except:
exceptFlag = True
finally:
if exceptFlag == False:
if sentences:
# Write the sentences into the txt file
#sentencesFile.writelines(sentences)
#sentencesFile.writelines("\n")
# write send to wordlist
#wordlistFile.writelines(self.wordlist[randomNum])
#wordlistFile.writelines("\n")
sentencesCount += 1
workSheetOne['A' + str(sentencesCount)] = sentences
#sentencesFile.close()
#wordlistFile.close()
'''
workSheetTwo = workbook.copy_worksheet (workSheetOne)
workSheetTwo.title = "Swedish"
workSheetThree = workbook.copy_worksheet (workSheetOne)
workSheetThree.title = "Japanese"
'''
workbook.save("translations.xlsx")
print ("Finished set test data")