forked from Heronalps/Visual_QA
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvqa_decoder.py
71 lines (54 loc) · 3.08 KB
/
vqa_decoder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import tensorflow as tf
import numpy as np
class vqa_decoder:
def __init__(self,config):
self.config = config
print("decoder model created")
def build(self,image_features,question_features):
print("Building Decoder")
config = self.config
self.is_train = config.PHASE
self.image_features = image_features
self.question_features = question_features
# Setup the placeholders
if self.is_train:
# contexts = self.conv_feats
self.answers = tf.placeholder(
dtype=tf.int32,
shape=[config.BATCH_SIZE, config.MAX_ANSWER_LENGTH])
self.answer_masks = tf.placeholder(
dtype=tf.int32,
shape=[config.BATCH_SIZE, config.MAX_ANSWER_LENGTH])
## Point wise multiplication
self.point_wise = tf.multiply(self.image_features,self.question_features)
## Adding activation layer
self.point_wise = tf.nn.relu(self.point_wise)
## Build a Fully Connected Layer
with tf.variable_scope('fc_decoder', reuse=tf.AUTO_REUSE) as scope:
fcw_1 = tf.get_variable(initializer=tf.truncated_normal([self.config.POINT_WISE_FEATURES, self.config.POINT_WISE_FEATURES],
dtype=tf.float32,
stddev=1e-1), name='fc_W_1',trainable=True)
fcb_1 = tf.get_variable(initializer=tf.constant(1.0, shape=[self.config.POINT_WISE_FEATURES], dtype=tf.float32),
trainable=True, name='fc_b_1')
fcl_1 = tf.nn.bias_add(tf.matmul(self.point_wise, fcw_1), fcb_1)
fc1_out = tf.nn.relu(fcl_1)
## Adding one more fully connected layer
fcw_2 = tf.get_variable(
initializer=tf.truncated_normal([self.config.POINT_WISE_FEATURES, self.config.OUTPUT_SIZE],
dtype=tf.float32,
stddev=1e-1), name='fc_W_2', trainable=True)
fcb_2 = tf.get_variable(initializer=tf.constant(1.0, shape=[self.config.OUTPUT_SIZE], dtype=tf.float32),
trainable=True, name='fc_b_2')
self.fcl_2 = tf.nn.bias_add(tf.matmul(fc1_out, fcw_2), fcb_2)
self.logits = tf.nn.relu(self.fcl_2)
if self.is_train:
# Compute the loss for this step, if necessary
cross_entropy_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=self.answers[:,0], ##[:,0] because answers is array of arrays
logits=self.logits)
self.optimizer = tf.train.AdamOptimizer(config.INITIAL_LEARNING_RATE).minimize(cross_entropy_loss)
self.softmax_logits = tf.nn.softmax(self.logits)
self.predictions = tf.argmax(self.logits, 1,output_type=tf.int32)
## Number of correct predictions in each run
self.predictions_correct = tf.reduce_sum(tf.cast(tf.equal(self.predictions, self.answers[:, 0]),tf.float32))
print(" Decoder model built")