-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathCNN.py
129 lines (99 loc) · 4.24 KB
/
CNN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import numpy as np
import pandas as pd
import cv2
import os
from sklearn.metrics import matthews_corrcoef
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout
import keras.backend as K
from sklearn.metrics import matthews_corrcoef
####################### UTILITY FUNCTIONS #####################################
# custom matthew's correlation function
def matthews_correlation(y_true, y_pred):
y_pred_pos = K.round(K.clip(y_pred, 0, 1))
y_pred_neg = 1 - y_pred_pos
y_pos = K.round(K.clip(y_true, 0, 1))
y_neg = 1 - y_pos
tp = K.sum(y_pos * y_pred_pos)
tn = K.sum(y_neg * y_pred_neg)
fp = K.sum(y_neg * y_pred_pos)
fn = K.sum(y_pos * y_pred_neg)
numerator = (tp * tn - fp * fn)
denominator = K.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))
return 1.0 - numerator / (denominator + K.epsilon())
####################### PREPROCESSING #########################################
# read images from folder
directory = "./micro/method1-nomedian"
files = os.listdir(directory)
images = [file for file in files if os.path.splitext(file)[1] == '.jpg']
X = []
for image_name in images:
grey = cv2.imread(os.path.join(directory,image_name),0)
X.append(grey)
# read labels
labels_df = pd.read_csv("./metadata/train_labels.csv", sep=',')
y = [labels_df.loc[labels_df['filename']==os.path.splitext(image)[0]+".mp4", 'stalled'].squeeze() for image in images]
# some images have different shape (384, 418) than others (384, 512) so, convert them by adding padding (47) to left and right
for i, img in enumerate(X):
if img.shape == (384, 418):
X[i]= cv2.copyMakeBorder(img.copy(),0,0,47,47,cv2.BORDER_CONSTANT,value=(0,0,0))
if img.shape == (512, 384):
X[i]= np.zeros((384, 512), np.uint8)
# convert datasets to np arrays
X_array = np.array(X)
y_array = np.array(y)
# Normalize the images.
#X_array = (X_array / 255)
# train-test split
X_train, X_test, y_train, y_test = train_test_split(X_array, y_array, test_size = 0.2, stratify = y)
#reshape data to fit model
X_train = np.expand_dims(X_train, axis=3)
X_test = np.expand_dims(X_test, axis=3)
####################### CNN Classifier ########################################
# https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html
#create model
model = Sequential()
#add model layers
model.add(Conv2D(32, kernel_size=3, activation="relu", input_shape=(384,512,1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(32, kernel_size=3, activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, kernel_size=3, activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation="sigmoid"))
#compile model using accuracy to measure model performance
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=["accuracy", 'AUC', matthews_correlation])
#train and test the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=3)
####################### EVALUATION ############################################
# matthew's correlation score on test set
pred = model.predict_classes(X_test)
pred_score = matthews_corrcoef(y_test, pred.reshape(-1))
print(pred_score)
####################### SAVE MODEL / LOAD MODEL ###############################
# refit model on whole dataset
model.fit( np.expand_dims(X_array, axis=3), y_array, epochs=3)
# save model
model.save_weights('cnn_method1-nomedian-wholedataset.h5')
# Load the model's saved weights.
cnn_method1_nomedian = Sequential([
Conv2D(32, kernel_size=3, activation="relu", input_shape=(384,512,1)),
MaxPooling2D(pool_size=(2, 2)),
Conv2D(32, kernel_size=3, activation="relu"),
MaxPooling2D(pool_size=(2, 2)),
Conv2D(64, kernel_size=3, activation="relu"),
MaxPooling2D(pool_size=(2, 2)),
Flatten(),
Dense(64, activation='relu'),
Dropout(0.5),
Dense(1, activation='sigmoid')
])
cnn_method1_nomedian.load_weights('cnn_method1-nomedian-wholedataset.h5')
# predict first 4 images in the test set
cnn_method1_nomedian.predict(X_test[:4])
# actual results for first 4 images in test set
y_test[:4]