forked from vamsikrishna1902/IntentPredictionEval
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCreateSQLLogs.py
171 lines (166 loc) · 10 KB
/
CreateSQLLogs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
from __future__ import division
import sys, operator
import os
import time
import QueryRecommender as QR
from bitmap import BitMap
import math
import heapq
import TupleIntent as ti
import ParseConfigFile as parseConfig
import ParseResultsToExcel
import ConcurrentSessions
import numpy as np
import argparse
from ParseConfigFile import getConfig
import threading
import copy
import multiprocessing
from multiprocessing.pool import ThreadPool
from multiprocessing import Array
import ReverseEnggQueries
import CreateSQLFromIntentVec
import ReverseEnggQueries_selOpConst
import CreateSQLFromIntentVec_selOpConst
def readFromConcurrentFile(concSessFile):
# Note that query IDs start in the file from 1 but in the outputIntent, query ID starts from 0: so Decrement by 1
curQueryDict = {}
try:
with open(concSessFile) as f:
for line in f:
tokens = line.strip().split(";")
sessQueryID = tokens[0]
sessID = int(sessQueryID.split(", ")[0].split(" ")[1])
queryID = int(sessQueryID.split(", ")[1].split(" ")[1]) - 1
curQuery = tokens[1]
sessQueryID = "Session:"+str(sessID)+";"+"Query:"+str(queryID)
assert sessQueryID not in curQueryDict
curQueryDict[sessQueryID] = curQuery
except:
print("cannot read line !!")
sys.exit(0)
return curQueryDict
def readFromOutputEvalFile(outputEvalQualityFileName):
outputEvalDict = {}
with open(outputEvalQualityFileName) as f:
for line in f:
tokens = line.strip().split(";")
outputEvalDict[tokens[0]+";"+tokens[1]] = ";".join(tokens[2:])
return outputEvalDict
def procPredictedIntents(configDict, schemaDicts, curQueryDict, outputEvalDict, outputIntentFileName, outputSQLLog):
QR.deleteIfExists(outputSQLLog)
assert configDict['INCLUDE_SEL_OP_CONST'] == 'True' or configDict['INCLUDE_SEL_OP_CONST'] == 'False'
if configDict['INCLUDE_SEL_OP_CONST'] == 'False':
createSqlLib = CreateSQLFromIntentVec
else:
createSqlLib = CreateSQLFromIntentVec_selOpConst
assert configDict['RNN_PREDICT_QUERY_OR_TABLE'] == 'QUERY' or configDict['RNN_PREDICT_QUERY_OR_TABLE'] == 'TABLE'
with open(outputIntentFileName) as f:
for line in f:
tokens = line.strip().split(";")
#assert len(tokens) == 4 + int(configDict['TOP_K'])
sessQueryID = tokens[0]+";"+tokens[1]
outputSQLStr = "-----------------------------------------\n"
outputSQLStr += outputEvalDict[sessQueryID]+";"+sessQueryID+"\n" # prints the metrics first
outputSQLStr += "Current Query: "+curQueryDict[sessQueryID]+"\n"
nextQueryID = "Query:"+str(int(tokens[1].split(":")[1]) + 1)
outputSQLStr += "Next Query: "+curQueryDict[tokens[0]+";"+nextQueryID]+"\n"
actualIntent = BitMap.fromstring(tokens[3].split(":")[1])
if configDict['RNN_PREDICT_QUERY_OR_TABLE'] == 'QUERY':
actualIntentObj = createSqlLib.regenerateSQL(None, actualIntent, schemaDicts)
outputSQLStr += "Actual SQL Ops:\n" + createSqlLib.createSQLString(actualIntentObj)
elif configDict['RNN_PREDICT_QUERY_OR_TABLE'] == 'TABLE':
actualIntentObj = createSqlLib.regenerateSQLTable(None, actualIntent, None, schemaDicts, configDict)
outputSQLStr += "Actual SQL Ops:\n" + createSqlLib.createSQLStringForTable(actualIntentObj)
for i in range(4, len(tokens)):
predictedIntent = BitMap.fromstring(tokens[i].split(":")[1])
relIndex = i - 4
if configDict['RNN_PREDICT_QUERY_OR_TABLE'] == 'QUERY':
predictedIntentObj = createSqlLib.regenerateSQL(None, predictedIntent, schemaDicts)
outputSQLStr += "Predicted SQL Ops " + str(
relIndex) + ":\n" + createSqlLib.createSQLString(predictedIntentObj)
elif configDict['RNN_PREDICT_QUERY_OR_TABLE'] == 'TABLE':
predictedIntentObj = createSqlLib.regenerateSQLTable(None, predictedIntent, None, schemaDicts, configDict)
outputSQLStr += "Predicted SQL Ops " + str(
relIndex) + ":\n" + createSqlLib.createSQLStringForTable(predictedIntentObj)
ti.appendToFile(outputSQLLog, outputSQLStr)
return
def createSQLLogsFromConfigDict(configDict, args):
accThres = float(configDict['ACCURACY_THRESHOLD'])
if args.intent is not None:
outputIntentFileName = args.intent
elif configDict['ALGORITHM'] == 'RNN':
outputIntentFileName = getConfig(configDict['OUTPUT_DIR']) + "/OutputFileShortTermIntent_" + \
configDict['ALGORITHM'] + "_" + configDict["RNN_BACKPROP_LSTM_GRU"] + "_" + \
configDict['INTENT_REP'] + "_" + \
configDict['BIT_OR_WEIGHTED'] + "_TOP_K_" + configDict['TOP_K'] + "_EPISODE_IN_QUERIES_" + \
configDict['EPISODE_IN_QUERIES']
elif configDict['ALGORITHM'] == 'CF':
outputIntentFileName = getConfig(configDict['OUTPUT_DIR']) + "/OutputFileShortTermIntent_" + configDict[
'ALGORITHM'] + "_" + \
configDict['CF_COSINESIM_MF'] + "_" + \
configDict['INTENT_REP'] + "_" + configDict['BIT_OR_WEIGHTED'] + "_TOP_K_" + configDict[
'TOP_K'] + "_EPISODE_IN_QUERIES_" + configDict['EPISODE_IN_QUERIES']
elif configDict['ALGORITHM'] == 'SVD':
outputIntentFileName = getConfig(configDict['OUTPUT_DIR']) + "/OutputFileShortTermIntent_" + configDict[
'ALGORITHM'] + "_" + configDict['INTENT_REP'] + "_" + configDict['BIT_OR_WEIGHTED'] + "_TOP_K_" + \
configDict[
'TOP_K'] + "_EPISODE_IN_QUERIES_" + configDict['EPISODE_IN_QUERIES']
elif configDict['ALGORITHM'] == 'QLEARNING':
outputIntentFileName = getConfig(configDict['OUTPUT_DIR']) + "/OutputFileShortTermIntent_" + configDict[
'ALGORITHM'] + "_" + configDict['QL_BOOLEAN_NUMERIC_REWARD'] + "_" + configDict['INTENT_REP'] + "_" + \
configDict['BIT_OR_WEIGHTED'] + "_TOP_K_" + configDict[
'TOP_K'] + "_EPISODE_IN_QUERIES_" + configDict['EPISODE_IN_QUERIES']
if args.eval is not None:
outputEvalQualityFileName = args.eval
elif configDict['ALGORITHM'] == 'RNN':
outputEvalQualityFileName = getConfig(configDict['OUTPUT_DIR']) + "/OutputEvalQualityShortTermIntent_" + configDict[
'ALGORITHM'] + "_" + configDict['RNN_BACKPROP_LSTM_GRU'] + "_" + configDict['INTENT_REP'] + "_" + configDict[
'BIT_OR_WEIGHTED'] + "_TOP_K_" + configDict['TOP_K'] + "_EPISODE_IN_QUERIES_" + \
configDict['EPISODE_IN_QUERIES'] + "_ACCURACY_THRESHOLD_" + str(accThres)
elif configDict['ALGORITHM'] == 'CF':
outputEvalQualityFileName = getConfig(configDict['OUTPUT_DIR']) + "/OutputEvalQualityShortTermIntent_" + configDict[
'ALGORITHM'] + "_" + configDict['CF_COSINESIM_MF'] + "_" + configDict['INTENT_REP'] + "_" + configDict[
'BIT_OR_WEIGHTED'] + "_TOP_K_" + configDict['TOP_K'] + "_EPISODE_IN_QUERIES_" + \
configDict['EPISODE_IN_QUERIES'] + "_ACCURACY_THRESHOLD_" + str(accThres)
elif configDict['ALGORITHM'] == 'SVD':
outputEvalQualityFileName = getConfig(configDict['OUTPUT_DIR']) + "/OutputEvalQualityShortTermIntent_" + \
configDict[
'ALGORITHM'] + "_" + configDict['INTENT_REP'] + "_" + configDict[
'BIT_OR_WEIGHTED'] + "_TOP_K_" + configDict['TOP_K'] + "_EPISODE_IN_QUERIES_" + \
configDict['EPISODE_IN_QUERIES'] + "_ACCURACY_THRESHOLD_" + str(accThres)
elif configDict['ALGORITHM'] == 'QLEARNING':
outputEvalQualityFileName = os.path.join(getConfig(configDict['OUTPUT_DIR']),
"OutputEvalQualityShortTermIntent_" + configDict[
'ALGORITHM'] + "_" + configDict['QL_BOOLEAN_NUMERIC_REWARD'] + "_" + configDict[
'INTENT_REP'] + "_" + configDict[
'BIT_OR_WEIGHTED'] + "_TOP_K_" + configDict['TOP_K'] + "_EPISODE_IN_QUERIES_" + \
configDict['EPISODE_IN_QUERIES'] + "_ACCURACY_THRESHOLD_" + str(accThres))
if args.conc is not None:
concSessFile = args.conc
else:
concSessFile = getConfig(configDict['CONCURRENT_QUERY_SESSIONS'])
if args.output is not None:
outputSQLLog = args.output
else:
outputSQLLog = getConfig(configDict['OUTPUT_DIR']) + "/outputSQLLog"
curQueryDict = readFromConcurrentFile(concSessFile)
outputEvalDict = readFromOutputEvalFile(outputEvalQualityFileName)
assert configDict['INCLUDE_SEL_OP_CONST'] == 'True' or configDict['INCLUDE_SEL_OP_CONST'] == 'False'
if configDict['INCLUDE_SEL_OP_CONST'] == 'False':
schemaDicts = ReverseEnggQueries.readSchemaDicts(configDict)
else:
schemaDicts = ReverseEnggQueries_selOpConst.readSchemaDicts(configDict)
procPredictedIntents(configDict, schemaDicts, curQueryDict, outputEvalDict, outputIntentFileName, outputSQLLog)
return
if __name__ == "__main__":
#configDict = parseConfig.parseConfigFile("configFile.txt")
parser = argparse.ArgumentParser()
parser.add_argument("-config", help="Config parameters file", type=str, required=True)
parser.add_argument("-intent", help="intent output file", type=str, required=False)
parser.add_argument("-eval", help="eval quality file", type=str, required=False)
parser.add_argument("-conc", help="concurrent session file", type=str, required=False)
parser.add_argument("-output", help="output sql log file", type=str, required=False)
args = parser.parse_args()
configDict = parseConfig.parseConfigFile(args.config)
createSQLLogsFromConfigDict(configDict, args)