-
Notifications
You must be signed in to change notification settings - Fork 1
/
DQN.py
47 lines (35 loc) · 1.86 KB
/
DQN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import tensorflow as tf
import tensorflow.contrib.slim as slim
from tensorflow.contrib.layers import xavier_initializer
from Game import GameEnv
class DQN(object):
"""Deep Q Network"""
def __init__(self):
# placeholders
self.x = tf.placeholder(tf.float32, [None, GameEnv.s_shape[0] * GameEnv.s_shape[1] * 3], 'x')
self.x_reshaped = tf.reshape(self.x, [-1, GameEnv.s_shape[0], GameEnv.s_shape[1], 3])
self.targetQ = tf.placeholder(tf.float32, [None], 'target')
self.actions = tf.placeholder(tf.int32, [None], 'actions')
# --- Q Network ---
# First convolutional layer
self.conv1 = slim.conv2d(inputs=self.x_reshaped, num_outputs=32, kernel_size=[5, 5], stride=[2, 2], padding='VALID',
biases_initializer=None)
# Second convolutional layer
self.conv2 = slim.conv2d(inputs=self.conv1, num_outputs=64, kernel_size=[5, 5], stride=[2, 2], padding='VALID',
biases_initializer=None)
# Third convolutional layer
self.conv3 = slim.conv2d(inputs=self.conv2, num_outputs=64, kernel_size=[5, 5], stride=[2, 2], padding='VALID',
biases_initializer=None)
# Flatten the output of convolutions
self.flat = slim.flatten(self.conv3)
# Output layer
init = xavier_initializer()
w = tf.Variable(init([832, GameEnv.a_size]))
self.q_vals = tf.matmul(self.flat, w)
self.predict = tf.argmax(self.q_vals, axis=1)
# Loss
self.actions_one_hot = tf.one_hot(self.actions, GameEnv.a_size, dtype=tf.float32)
self.Q = tf.reduce_sum(tf.multiply(self.q_vals, self.actions_one_hot), axis=1)
self.loss = tf.reduce_mean(tf.square(self.Q - self.targetQ))
# Optimizer
self.train = tf.train.AdamOptimizer().minimize(self.loss)