From bdf73f8b2ec3f85f663f937316452dd7cdee30b5 Mon Sep 17 00:00:00 2001 From: cmuslima Date: Wed, 30 Aug 2023 16:38:19 -0600 Subject: [PATCH] fixed rendering so now we can render an agents policy --- App/agent.py | 2 +- App/trial.py | 33 ++++++++++++++++----------------- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/App/agent.py b/App/agent.py index 4dd8ed4..dd75eba 100644 --- a/App/agent.py +++ b/App/agent.py @@ -26,7 +26,7 @@ def __init__(self): self.short_replay_buffer_of_demos = {} self.short_replay_buffer_of_demos.update({0:self.replay_buffer_of_demos2[1]}) - self.short_replay_buffer_of_demos.update({1:self.replay_buffer_of_demos2[5]}) + #self.short_replay_buffer_of_demos.update({1:self.replay_buffer_of_demos2[5]}) #self.short_replay_buffer_of_demos.update({5:self.replay_buffer_of_demos[3]}) #self.short_replay_buffer_of_demos.update({5:self.replay_buffer_of_demos[4]}) self.transition = {'obs': np.array(0, dtype = np.float32), 'acts': 0, 'rewards': {}, 'infos': {}, 'next_obs': np.array(0, dtype = np.float32), 'dones': False} diff --git a/App/trial.py b/App/trial.py index e3617e4..f56b93d 100644 --- a/App/trial.py +++ b/App/trial.py @@ -65,6 +65,7 @@ def start_trial(self): self.agent = Agent() self.agent.start(self.config.get('game')) actionSpace = self.config.get('actionSpace') + self.agent.reset() #self.start_trial() async def run(self): @@ -183,7 +184,8 @@ async def handle_command(self, message): self.play = True if self.modality == 'pref': self.show_demo = True - await self.render_all_frames() + #await self.render_all_frames() + await self.render_policy() elif command == 'stop': self.end() elif command == 'reset': @@ -206,6 +208,7 @@ async def render_all_frames(self): await self.send_render(render) await asyncio.sleep(10) # Sleep for 10 seconds # await asyncio.sleep(1 / self.framerate) # Sleep to control framerate + def handle_action(self, action:str): @@ -262,7 +265,7 @@ async def get_render(self): ''' #print('inside get render') - self.agent.reset() + # self.agent.reset() render = self.agent.render() #print('render', render) try: @@ -317,20 +320,16 @@ async def take_step(self): self.total_reward = 0 async def render_policy(self): - #starting with uniformly sampling demostrations from the agent's demo buffer - #random_demo_number = np.random.choice(list(self.agent.short_replay_buffer_of_demos.keys())) - #print('random demo number', random_demo_number) - - - #index = list(self.agent.short_replay_buffer_of_demos)[self.demo_idx] + demo = self.agent.short_replay_buffer_of_demos[self.demo_idx] print(demo) - print(self.agent.short_replay_buffer_of_demos[1]) + # print('len of demo',) + #print(self.agent.short_replay_buffer_of_demos[1]) #print('type of demo', type(demo)) print('demo number', self.demo_idx) print('just reset the agent env') - self.agent.reset() + #self.agent.reset() print('len of demo', len(demo)) for idx, action in enumerate(demo): @@ -339,13 +338,13 @@ async def render_policy(self): print(action) envState, _ = self.agent.step(action) #print(envState, type(envState)) - self.update_entry(envState) - self.save_entry() - render = self.get_render() - self.send_render(render) - if envState['done']: - print('done') - break + # self.update_entry(envState) + # self.save_entry() + render = await self.get_render() + await self.send_render(render) + # if envState['done']: + # print('done') + # break self.play = False print('self.play is now False') async def main():