-
Notifications
You must be signed in to change notification settings - Fork 1
/
model.py
107 lines (92 loc) · 4.37 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import math
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
class CNN_basic(nn.Module):
def __init__(self, num_classes=10):
super(CNN_basic, self).__init__()
self.accuracy = []
self.conv1 = nn.Conv2d(1, 20, 5, 1)
self.conv2 = nn.Conv2d(20, 50, 5, 1)
self.fc1 = nn.Linear(800, 500)
self.fc2 = nn.Linear(500, num_classes)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.max_pool2d(x, 2, 2)
x = F.relu(self.conv2(x))
x = F.max_pool2d(x, 2, 2)
x = x.view(-1, 800)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return x
class CNN_small(nn.Module):
def __init__(self, num_classes=10):
super(CNN_small, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, num_classes)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
class LSTMClassifier(nn.Module):
def __init__(self, batch_size, output_size, hidden_size, vocab_size, embedding_length, weights):
super(LSTMClassifier, self).__init__()
"""
Arguments
---------
batch_size : Size of the batch which is same as the batch_size of the data returned by the TorchText BucketIterator
output_size : 2 = (pos, neg)
hidden_sie : Size of the hidden_state of the LSTM
vocab_size : Size of the vocabulary containing unique words
embedding_length : Embeddding dimension of GloVe word embeddings
weights : Pre-trained GloVe word_embeddings which we will use to create our word_embedding look-up table
"""
self.batch_size = batch_size
self.output_size = output_size
self.hidden_size = hidden_size
self.vocab_size = vocab_size
self.embedding_length = embedding_length
self.word_embeddings = nn.Embedding(vocab_size, embedding_length)# Initializing the look-up table.
self.word_embeddings.weight = nn.Parameter(weights, requires_grad=False) # Assigning the look-up table to the pre-trained GloVe word embedding.
self.lstm = nn.LSTM(embedding_length, hidden_size)
self.label = nn.Linear(hidden_size, output_size)
def forward(self, input_sentence, batch_size=None):
"""
Parameters
----------
input_sentence: input_sentence of shape = (batch_size, num_sequences)
batch_size : default = None. Used only for prediction on a single sentence after training (batch_size = 1)
Returns
-------
Output of the linear layer containing logits for positive & negative class which receives its input as the final_hidden_state of the LSTM
final_output.shape = (batch_size, output_size)
"""
''' Here we will map all the indexes present in the input sequence to the corresponding word vector using our pre-trained word_embedddins.'''
input = self.word_embeddings(input_sentence) # embedded input of shape = (batch_size, num_sequences, embedding_length)
input = input.permute(1, 0, 2) # input.size() = (num_sequences, batch_size, embedding_length)
if batch_size is None:
h_0 = Variable(torch.zeros(1, self.batch_size, self.hidden_size)) # Initial hidden state of the LSTM
c_0 = Variable(torch.zeros(1, self.batch_size, self.hidden_size)) # Initial cell state of the LSTM
if torch.cuda.is_available():
h_0 = h_0.cuda()
c_0 = c_0.cuda()
else:
h_0 = Variable(torch.zeros(1, batch_size, self.hidden_size))
c_0 = Variable(torch.zeros(1, batch_size, self.hidden_size))
if torch.cuda.is_available():
h_0 = h_0.cuda()
c_0 = c_0.cuda()
output, (final_hidden_state, final_cell_state) = self.lstm(input, (h_0, c_0))
final_output = self.label(final_hidden_state[-1])
return final_output