Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changed cPickle save/load files to binary. #55

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,10 @@
roms
utils
data
*.pkl
.idea/
*.csv
params.json
*.jpg
*.gz
deep_q_rl/results/breakout_04-07-0008_00_717000/LegalActionSet
24 changes: 18 additions & 6 deletions deep_q_rl/ale_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,14 +98,19 @@ def initialize(self, action_set):
params=self.params)
else:
handle = open(self.params.nn_file, 'r')
self.network = cPickle.load(handle)
try:
self.network = cPickle.load(handle)
except EOFError:
handle.close()
handle = open(self.params.nn_file, 'rb')
self.network = cPickle.load(handle)

# region Dumping/Logging
def _create_export_dir(self):
# CREATE A FOLDER TO HOLD RESULTS
# this is now just exp_pref + timestamp. params are in params.json
time_str = datetime.datetime.now().strftime("_%m-%d-%H%M_%S_%f")
export_dir = self.exp_pref + time_str
export_dir = os.path.join('results', self.exp_pref) + time_str
try:
os.stat(export_dir)
except OSError:
Expand All @@ -117,7 +122,7 @@ def _open_params_file(self):
self.params_file = open(self.export_dir + '/params.json', 'w')
param_dict = {k:v for k, v in self.params.__dict__.items() \
if "__" not in k \
and isinstance(v, (int, float, str, bool))}
and isinstance(v, (int, float, str, bool, tuple))}
json.dump(param_dict, self.params_file, indent=4)
self.params_file.close()

Expand All @@ -127,30 +132,37 @@ def _open_results_file(self):
self.results_file.write(
'epoch,num_episodes,total_reward,reward_per_epoch,mean_q\n')
self.results_file.flush()
self.results_file.close()

def _open_learning_file(self):
logging.info("OPENING " + self.export_dir + '/learning.csv')
self.learning_file = open(self.export_dir + '/learning.csv', 'w', 0)
self.learning_file.write('mean_loss,epsilon\n')
self.learning_file.flush()
self.learning_file.close()

def _update_results_file(self, epoch, num_episodes, holdout_sum):
logging.info("OPENING " + self.export_dir + '/results.csv')
self.results_file = open(self.export_dir + '/results.csv', 'a', 0)
out = "{},{},{},{},{}\n".format(epoch, num_episodes,
self.total_reward,
self.total_reward / float(num_episodes),
holdout_sum)

self.results_file.write(out)
self.results_file.flush()
self.results_file.close()

def _update_learning_file(self):
out = "{},{}\n".format(np.mean(self.loss_averages),
self.epsilon)
self.learning_file = open(self.export_dir + '/learning.csv', 'a', 0)
out = "{},{}\n".format(np.mean(self.loss_averages), self.epsilon)
self.learning_file.write(out)
self.learning_file.flush()
self.learning_file.close()

def _persist_network(self, network_filename):
full_filename = os.path.join(self.export_dir, network_filename)
with open(full_filename, 'w') as net_file:
with open(full_filename, 'wb') as net_file:
cPickle.dump(self.network, net_file, -1)

# endregion
Expand Down
66 changes: 48 additions & 18 deletions deep_q_rl/ale_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,26 @@
# This is appropriate for breakout, but it may need to be modified
# for other games.
import time
from deep_q_rl.visual_interface import VisualInterface

CROP_OFFSET = 8


class ALEExperiment(object):
def __init__(self, ale, agent, resized_width, resized_height,
resize_method, num_epochs, epoch_length, test_length,
frame_skip, death_ends_episode, max_start_nullops, rng):
frame_skip, death_ends_episode, max_start_nullops, rng, display_screen, all_actions):
self.ale = ale
self.agent = agent
self.num_epochs = num_epochs
self.epoch_length = epoch_length
self.test_length = test_length
self.frame_skip = frame_skip
self.death_ends_episode = death_ends_episode
self.min_action_set = ale.getMinimalActionSet()
if all_actions:
self.min_action_set = ale.getLegalActionSet()
else:
self.min_action_set = ale.getMinimalActionSet()
self.resized_width = resized_width
self.resized_height = resized_height
self.resize_method = resize_method
Expand All @@ -42,14 +46,17 @@ def __init__(self, ale, agent, resized_width, resized_height,
self.terminal_lol = False # Most recent episode ended on a loss of life
self.max_start_nullops = max_start_nullops
self.rng = rng
self.display_screen = display_screen

def run(self):
"""
Run the desired number of training epochs, a testing epoch
is conducted after each training epoch.
"""

self.agent.initialize(self.ale.getMinimalActionSet())
self.agent.initialize(self.min_action_set)
if self.display_screen:
self.vis = VisualInterface(self.agent.network, self.agent.data_set)

for epoch in range(1, self.num_epochs + 1):
self.agent.start_epoch(epoch)
Expand All @@ -74,22 +81,32 @@ def run_epoch(self, epoch, num_steps, testing=False):
"""
self.terminal_lol = False # Make sure each epoch starts with a reset.
steps_left = num_steps
total_steps = num_steps
episode = 0
episode_start_time = time.time()
epoch_avg_reward = 0.0
t = episode_start_time

while steps_left > 0:
prefix = "testing" if testing else "training"
prefix = "Testing" if testing else "Training"
# logging.info(prefix + " epoch: " + str(epoch) + " steps_left: " + str(steps_left))
_, num_steps, episode_reward = self.run_episode(steps_left, testing)

epoch_avg_reward = (epoch_avg_reward * episode + episode_reward) / (episode + 1)
episode += 1

episode_time = time.time() - t
t = time.time()
total_time = t - episode_start_time

t0 = time.time()
_, num_steps = self.run_episode(steps_left, testing)
steps_left -= num_steps
t1 = time.time()
total_time = t1 - t0
if episode_time == 0:
steps_sec = num_steps
else:
steps_sec = num_steps / episode_time

logging.info("[{:8}] epoch {:3} | num_steps {:7} " \
"steps_left {:7} steps/second: {:>7.2f}"
.format(prefix,
epoch,
num_steps,
steps_left,
num_steps / total_time))
logging.info("{} episode {} of epoch {} completed with reward {} in {:.1f} sec. Total time: {:.1f}. Epoch avg reward: {:.2f}. Steps: {}/{}. Steps/sec: {:.2f}".format(
prefix, episode, epoch, episode_reward, episode_time, total_time, epoch_avg_reward, total_steps - steps_left, total_steps, steps_sec))

def _init_episode(self):
""" This method resets the game if needed, performs enough null
Expand Down Expand Up @@ -119,7 +136,7 @@ def _act(self, action):
reward = self.ale.act(action)
index = self.buffer_count % self.buffer_length

self.ale.getScreenGrayscale(self.screen_buffer[index, ...])
self.screen_buffer[index, ...] = self.ale.getScreenGrayscale(self.screen_buffer[index, ...])

self.buffer_count += 1
return reward
Expand Down Expand Up @@ -151,22 +168,31 @@ def run_episode(self, max_steps, testing):

action = self.agent.start_episode(self.get_observation())
num_steps = 0
episode_reward = 0
terminal = False

while True:
reward = self._step(self.min_action_set[action])
self.terminal_lol = (self.death_ends_episode and not testing and
self.ale.lives() < start_lives)
terminal = self.ale.game_over() or self.terminal_lol
episode_reward += reward
num_steps += 1

if terminal or num_steps >= max_steps:
self.agent.end_episode(reward, terminal)
break

action = self.agent.step(reward, self.get_observation())
if self.display_screen:
if testing:
self.vis.data_set = self.agent.test_data_set
else:
self.vis.data_set = self.agent.data_set
self.vis.draw(num_steps, self.ale.getScreenRGB())

return terminal, num_steps

return terminal, num_steps, episode_reward

def get_observation(self):
""" Resize and merge the previous two screen images """
Expand All @@ -175,7 +201,7 @@ def get_observation(self):
index = self.buffer_count % self.buffer_length - 1
max_image = np.maximum(self.screen_buffer[index, ...],
self.screen_buffer[index - 1, ...])
return self.resize_image(max_image)
return self.resize_image(self.screen_buffer[index, ...])

def resize_image(self, image):
""" Appropriately resize a single image """
Expand All @@ -199,5 +225,9 @@ def resize_image(self, image):
return cv2.resize(image,
(self.resized_width, self.resized_height),
interpolation=cv2.INTER_LINEAR)
elif self.resize_method == 'scale_nearest':
return cv2.resize(image,
(self.resized_width, self.resized_height),
interpolation=cv2.INTER_NEAREST)
else:
raise ValueError('Unrecognized image resize method.')
4 changes: 3 additions & 1 deletion deep_q_rl/ale_run_watch.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@


def run_watch():
command = ['./run_nature.py', '--steps-per-epoch', '0',
command = [sys.executable, './run_nature.py', '--steps-per-epoch', '0',
'--test-length', '10000', '--nn-file', sys.argv[1],
'--display-screen']

if len(sys.argv) > 2:
command.extend(['--rom', sys.argv[2]])
if len(sys.argv) > 3:
command.extend(sys.argv[3:])

p1 = subprocess.Popen(command)

Expand Down
Loading