-
Notifications
You must be signed in to change notification settings - Fork 2
/
models_2019.py
160 lines (118 loc) · 4.96 KB
/
models_2019.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
from sklearn.svm import SVR
import tensorflow_ranking as tfr
from keras import optimizers
from tensorflow.python.keras.layers import Dropout
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn import linear_model
from numpy.random import seed
SEED = 1 # use this constant seed everywhere
seed(SEED) # numpy pseudo-random generator
tf.set_random_seed(SEED)
#seed(1)
def Get_score(Y_pred,Y_true):
'''Calculate the Spearmann"s correlation coefficient'''
Y_pred = np.squeeze(Y_pred)
Y_true = np.squeeze(Y_true)
if Y_pred.shape != Y_true.shape:
print('Input shapes don\'t match!')
else:
if len(Y_pred.shape) == 1:
Res = pd.DataFrame({'Y_true':Y_true,'Y_pred':Y_pred})
your_spearman = Res[['Y_true','Y_pred']].corr(method='spearman',min_periods=1)
print('The Spearman\'s correlation coefficient is: %.3f' % your_spearman.iloc[1][0],3)
result=your_spearman.iloc[1][0]
#results.append(result)
else:
for ii in range(Y_pred.shape[1]):
Get_score(Y_pred[:,ii],Y_true[:,ii])
def Get_score_ind(pred,Y_true):
Y_pred = pd.DataFrame(data=[pred])
Y_pred=Y_pred.T
Y_pred = np.squeeze(Y_pred.values)
Y_true=np.squeeze(Y_true)
short_term = pd.DataFrame({'Y_pred': Y_pred, 'Y_test': Y_true})
your_spearman = short_term[['Y_pred', 'Y_test']].corr(method='spearman', min_periods=1)
print('The Spearman\'s correlation coefficient is: %.3f' % your_spearman.iloc[1][0], 3)
return your_spearman.iloc[1][0]
def do_pca(sentence_features, n_dimensions):
# Make everything into one big matrix (words x features_per_word)
sentence_lengths = [sf.shape[0] for sf in sentence_features]
X = np.concatenate(sentence_features, axis=0)
# Transform with PCA
pca = PCA(n_components=n_dimensions)
X_transformed = pca.fit_transform(X)
# Put everything back into shape
reduced_dim_sentence_features = np.split(X_transformed, np.cumsum(sentence_lengths)[:-1])
return reduced_dim_sentence_features
def three_dense_layers_model_1_outuput(X_train, X_test, Y_train, Y_test):
print('X_train', X_train.shape)
print('X_test', X_test.shape)
print('Y_train', Y_train.shape)
print('Y_test', Y_test.shape)
n_cols = X_train.shape[1]
# Save the number of columns in predictors: n_cols
# Set up the model: modele
model = Sequential()
# Add the first layer
model.add(Dense(100, activation='relu', input_shape=(n_cols,)))
#model.add(Dropout(0.2))
# Add the second layer
model.add(Dense(50, activation='relu'))
# Add the output layer
model.add(Dense(1))
# Compile the model
#adam = optimizers.Adam(lr=0.001)
#model.compile(optimizer='Adam', loss='mean_absolute_percentage_error')
model.compile(optimizer='Adam', loss='mean_squared_error')
# Define early_stopping_monitor
early_stopping_monitor = EarlyStopping(patience=10)
# Fit the model
model.fit(X_train, Y_train, validation_split=0.2, epochs=200, callbacks=[early_stopping_monitor])
# Verify that model contains information from compiling
print("Loss function: " + model.loss)
predictions = model.predict(X_test)
return predictions
def three_dense_layers_model_2_outuput(X_train, X_test, Y_train, Y_test):
print('X_train', X_train.shape)
print('X_test', X_test.shape)
print('Y_train', Y_train.shape)
print('Y_test', Y_test.shape)
n_cols = X_train.shape[1]
# Save the number of columns in predictors: n_cols
# Set up the model: modele
model = Sequential()
# Add the first layer
model.add(Dense(100, activation='relu', input_shape=(n_cols,)))
# Add the second layer
model.add(Dense(50, activation='relu'))
# Add the output layer
model.add(Dense(2))
# Compile the model
#adam = optimizers.Adam(lr=0.001)
model.compile(optimizer='adam', loss='mean_squared_error')
#model.compile(optimzer=AdamOptimizer(0.001), loss='mean_squarred_error')
# Define early_stopping_monitor
early_stopping_monitor = EarlyStopping(patience=20)
# Fit the model
model.fit(X_train, Y_train, validation_split=0.2, epochs=40, callbacks=[early_stopping_monitor])
# Verify that model contains information from compiling
print("Loss function: " + model.loss)
predictions = model.predict(X_test)
return predictions
def SVR_imp(X_train, X_test, Y_train, Y_test):
model = SVR()
model.fit(X_train,Y_train,)
predictions = model.predict(X_test)
return predictions
def Lasso_imp(X_train, X_test, Y_train, Y_test):
model = linear_model.Lasso(alpha=0.1)
model.fit(X_train,Y_train,)
predictions = model.predict(X_test)
return predictions