-
Notifications
You must be signed in to change notification settings - Fork 2
/
run_lunarlander.py
82 lines (59 loc) · 1.96 KB
/
run_lunarlander.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
"""
Deep Q Network
CartPole-v0
"""
import gym
from deep_q_network import DeepQNetwork
import numpy as np
env = gym.make('LunarLander-v2')
env = env.unwrapped
print(env.action_space)
print(env.observation_space)
print(env.observation_space.high)
print(env.observation_space.low)
# Initialize DQN
DQN = DeepQNetwork( n_y=env.action_space.n,
n_x=env.observation_space.shape[0],
learning_rate=0.01,
replace_target_iter=100,
memory_size=500,
batch_size=32,
epsilon_max=0.9,
epsilon_greedy_increment=0.001
)
RENDER_ENV = False
EPISODES = 500
rewards = []
RENDER_REWARD_MIN = 0
total_steps_counter = 0
for episode in range(400):
observation = env.reset()
episode_reward = 0
while True:
if RENDER_ENV: env.render()
# 1. Choose an action based on observation
action = DQN.choose_action(observation)
# 2. Take the chosen action in the environment
observation_, reward, done, info = env.step(action)
# 3. Store transition
DQN.store_transition(observation, action, reward, observation_)
episode_reward += reward
if total_steps_counter > 1000:
# 4. Train
DQN.learn()
if done:
rewards.append(episode_reward)
max_reward_so_far = np.amax(rewards)
print("==========================================")
print("Episode: ", episode)
print("Reward: ", round(episode_reward, 2))
print("Epsilon: ", round(DQN.epsilon,2))
print("Max reward so far: ", max_reward_so_far)
# Render env if we get to rewards minimum
if max_reward_so_far > RENDER_REWARD_MIN: RENDER_ENV = True
break
# Save observation
observation = observation_
# Increase total steps
total_steps_counter += 1
# DQN.plot_cost()