-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathDNN_base.py
75 lines (61 loc) · 2.37 KB
/
DNN_base.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
"""
Base model for the DNN used taken from
https://github.com/bundinyo/lime/blob/5b1f754a4e266a451ed9bf63525bc64e7bd567f7/lstm_lime.py
"""
import numpy as np
from keras.layers import Dense, LSTM, Dropout
from keras.layers.embeddings import Embedding
from keras.models import Sequential
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer
from sklearn.base import BaseEstimator, TransformerMixin
class TextsToSequences(Tokenizer, BaseEstimator, TransformerMixin):
"""
Sklearn transformer to convert texts to indices list
(e.g. [["the cute cat"], ["the dog"]] -> [[1, 2, 3], [1, 4]])
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
def fit(self, texts, y=None):
self.fit_on_texts(texts)
return self
def transform(self, texts, y=None):
return np.array(self.texts_to_sequences(texts))
class Padder(BaseEstimator, TransformerMixin):
"""
Pad and crop uneven lists to the same length.
Only the end of lists longernthan the maxlen attribute are
kept, and lists shorter than maxlen are left-padded with zeros
Attributes
----------
maxlen: int
sizes of sequences after padding
max_index: int
maximum index known by the Padder, if a higher index is met during
transform it is transformed to a 0
"""
def __init__(self, maxlen=500):
self.maxlen = maxlen
self.max_index = None
def fit(self, X, y=None):
self.max_index = pad_sequences(X, maxlen=self.maxlen).max()
return self
def transform(self, X, y=None):
X = pad_sequences(X, maxlen=self.maxlen)
X[X > self.max_index] = 0
return X
def create_model():
model = Sequential()
model.add(Embedding(20000, 64, input_length=140, trainable=True))
model.add(Dropout(0.25))
model.add(Dense(64, activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(LSTM(100))
model.add(Dense(512, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
return model