Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Experiment with a reward penalty for walking into walls and people #163

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 44 additions & 2 deletions baselines/red_gym_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ def __init__(
self.s_path.mkdir(exist_ok=True)
self.reset_count = 0
self.all_runs = []
self.prev_x_pos = 0
self.prev_y_pos = 0

# Set this in SOME subclasses
self.metadata = {"render.modes": []}
Expand All @@ -63,6 +65,13 @@ def __init__(
WindowEvent.PRESS_BUTTON_A,
WindowEvent.PRESS_BUTTON_B,
]

self.move_actions = [
WindowEvent.PRESS_ARROW_DOWN,
WindowEvent.PRESS_ARROW_LEFT,
WindowEvent.PRESS_ARROW_RIGHT,
WindowEvent.PRESS_ARROW_UP,
]

if self.extra_buttons:
self.valid_actions.extend([
Expand Down Expand Up @@ -150,6 +159,7 @@ def reset(self, seed=None):
self.max_event_rew = 0
self.max_level_rew = 0
self.last_health = 1
self.noop_move = 0
self.total_healing_rew = 0
self.died_count = 0
self.party_size = 0
Expand Down Expand Up @@ -190,6 +200,7 @@ def render(self, reduce_res=True, add_memory=True, update_mem=True):
axis=0)
return game_pixels_render

# Implicitly called by P00
def step(self, action):

self.run_action_on_emulator(action)
Expand Down Expand Up @@ -262,6 +273,19 @@ def append_agent_stats(self, action):
x_pos = self.read_m(0xD362)
y_pos = self.read_m(0xD361)
map_n = self.read_m(0xD35E)

# If player was moved (up/down/left/right) but their position didn't change,
# it may mean they went up against an edge. For example, edge of a building, person,
# or the world. We should penalize this type of action in order to not waste exploration time.
self.noop_move = 0
if self.valid_actions[action] in self.move_actions \
and self.prev_x_pos == x_pos and self.prev_y_pos == y_pos:
self.noop_move = 1


self.prev_x_pos = x_pos
self.prev_y_pos = y_pos

levels = [self.read_m(a) for a in [0xD18C, 0xD1B8, 0xD1E4, 0xD210, 0xD23C, 0xD268]]
if self.use_screen_explore:
expl = ('frames', self.knn_index.get_current_count())
Expand All @@ -278,7 +302,8 @@ def append_agent_stats(self, action):
'hp': self.read_hp_fraction(),
expl[0]: expl[1],
'deaths': self.died_count, 'badge': self.get_badges(),
'event': self.progress_reward['event'], 'healr': self.total_healing_rew
'event': self.progress_reward['event'], 'healr': self.total_healing_rew,
'noop_move': self.noop_move,
})

def update_frame_knn_index(self, frame_vec):
Expand Down Expand Up @@ -434,6 +459,12 @@ def read_bit(self, addr, bit: int) -> bool:
# add padding so zero will read '0b100000000' instead of '0b0'
return bin(256 + self.read_m(addr))[-bit-1] == '1'


def is_in_battle(self):
''' Return boolean: true if player is in any type of battle, else false. '''
return self.read_m(0xD057) == 12

# Iterate through each of the pokemon we're carrying?
def get_levels_sum(self):
poke_levels = [max(self.read_m(a) - 2, 0) for a in [0xD18C, 0xD1B8, 0xD1E4, 0xD210, 0xD23C, 0xD268]]
return max(sum(poke_levels) - 4, 0) # subtract starting pokemon level
Expand All @@ -457,6 +488,16 @@ def get_knn_reward(self):
base = (self.base_explore if self.levels_satisfied else cur_size) * pre_rew
post = (cur_size if self.levels_satisfied else 0) * post_rew
return base + post

def get_movement_reward(self):
'''
Yield a reward if the player is walking in the world and makes an up/down/left/right move
that results in the player moving coordinates. The aim is to penalize running into walls
or people, wasting time.
'''
if self.noop_move and not self.is_in_battle():
return -1
return 0

def get_badges(self):
return self.bit_count(self.read_m(0xD356))
Expand Down Expand Up @@ -532,7 +573,8 @@ def get_game_state_reward(self, print_stats=False):
#'op_poke': self.reward_scale*self.max_opponent_poke * 800,
#'money': self.reward_scale* money * 3,
#'seen_poke': self.reward_scale * seen_poke_count * 400,
'explore': self.reward_scale * self.get_knn_reward()
'explore': self.reward_scale * self.get_knn_reward(),
'noop_move': self.reward_scale * self.get_movement_reward()
}

return state_scores
Expand Down
5 changes: 4 additions & 1 deletion baselines/run_baseline_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,21 @@ def _init():


ep_length = 2048 * 8
# ep_length = 500
sess_path = Path(f'session_{str(uuid.uuid4())[:8]}')

env_config = {
'headless': True, 'save_final_state': True, 'early_stop': False,
# 'headless': False, 'save_final_state': True, 'early_stop': False,
'action_freq': 24, 'init_state': '../has_pokedex_nballs.state', 'max_steps': ep_length,
'print_rewards': True, 'save_video': False, 'fast_video': True, 'session_path': sess_path,
'gb_path': '../PokemonRed.gb', 'debug': False, 'sim_frame_dist': 2_000_000.0,
'use_screen_explore': True, 'extra_buttons': False
}


num_cpu = 44 #64 #46 # Also sets the number of episodes per training iteration
num_cpu = 46 #64 #46 # Also sets the number of episodes per training iteration
# num_cpu = 1 #64 #46 # Also sets the number of episodes per training iteration
env = SubprocVecEnv([make_env(i, env_config) for i in range(num_cpu)])

checkpoint_callback = CheckpointCallback(save_freq=ep_length, save_path=sess_path,
Expand Down