-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathCNN_predict.py
78 lines (67 loc) · 3.05 KB
/
CNN_predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import argparse
import sys
import pdb
import numpy as np
# import keras
from tensorflow import keras
from keras.models import load_model
from ldl_utils import get_data_dict, vectorize,read_json
from helper_functions_LSTM_TF import gpu_fix_cuda,get_feature_vectors,compile_tweet_dict,check_label_frequency,build_text_labels,keras_feature_prep,write_predictions_to_json_cnn
# from LSTM_utils import LSTM_training,LSTM_and_embedding_layer_train
from collections import defaultdict #https://stackoverflow.com/questions/5900578/how-does-collections-defaultdict-work
# from helper_functions import create_folder,validate_file_location
# from CNN_train_geng import image_feature_extraction
MAX_NB_WORDS = 20000
EMBEDDING_DIM = 100
MAX_SEQUENCE_LENGTH = 1000 # words limit in each doc
ITERATIONS = 100
EPOCHS = 25
BATCHSIZE = 32
q1_LOWER = 2
q1_UPPER = 12
q2_LOWER = 2
q2_UPPER = 12
q3_LOWER = 5
q3_UPPER = 20
DS_iter = 1
def CNN_predict(input_test_file, model_file,pred_output_name,test_vectors):
# train_answer_counters = defaultdict(list)
# JSONfile = read_json(input_train_file_name)
# create_folder(folder_name) #creates the folder for saving LSTM model
# train_message_dict = compile_tweet_dict(JSONfile["data"])
# (fdict, choices) = get_data_dict(JSONfile["dictionary"])
# train_answer_counters = get_feature_vectors(fdict, JSONfile["data"])
gpu_fix_cuda()
test_answer_counters = defaultdict(list)
JSONfile = read_json(input_test_file)
test_message_dict = compile_tweet_dict(JSONfile["data"])
(fdict, choices) = get_data_dict(JSONfile["dictionary"])
test_answer_counters = get_feature_vectors(fdict, JSONfile["data"])
test_text = []
test_labels = []
test_text,test_labels = build_text_labels(test_message_dict,test_answer_counters)
# test_features, test_word_index = keras_feature_prep(test_text,MAX_NB_WORDS,MAX_SEQUENCE_LENGTH)
if test_vectors==False:
test_features, test_word_index = keras_feature_prep(test_text,MAX_NB_WORDS,MAX_SEQUENCE_LENGTH)
x_test = test_features
else:
x_test = np.load(test_vectors,allow_pickle=True)
x_test = np.array(x_test)
x_test = np.expand_dims(x_test,axis=-1)
print(x_test, x_test.shape)
y_test = test_labels
print(y_test, y_test.shape)
check_label_frequency(y_test)
model = load_model(model_file)
predicted_probabilities = model.predict(x_test, batch_size=BATCHSIZE)
write_predictions_to_json_cnn(predicted_probabilities,test_message_dict,choices,pred_output_name)
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--input_test_file", help="Input dev file JSON name")
parser.add_argument("--input_test_file_vects", help="Input dev file npy Vectors name",default=False)
parser.add_argument("--input_model_file", help="Output model file name")
parser.add_argument("--output_pred_name", help="Output JSON predictions")
args = parser.parse_args()
CNN_predict(args.input_test_file,args.input_model_file,args.output_pred_name,args.input_test_file_vects)
if __name__ == '__main__':
main()