From 29443190ecf80676656be6bc1963dd7fccf822a2 Mon Sep 17 00:00:00 2001
From: Ram81 <ramramrakhya81@gmail.com>
Date: Mon, 29 Apr 2024 16:28:08 -0400
Subject: [PATCH 01/88] Add task config for multi-agent

---
 .../config/lang_rearrange_spot_humanoid.yaml  | 39 +++++++++++++++++++
 .../config/language_rearrange.yaml            | 19 ++++-----
 .../language_rearrange_multi_agent.yaml       | 30 ++++++++++++++
 examples/hitl/rearrange_v2/rearrange_v2.py    |  5 +++
 .../controllers/controller_helper.py          |  4 +-
 5 files changed, 84 insertions(+), 13 deletions(-)
 create mode 100644 examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml
 create mode 100644 examples/hitl/rearrange_v2/config/language_rearrange_multi_agent.yaml

diff --git a/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml b/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml
new file mode 100644
index 0000000000..4fff8d00ad
--- /dev/null
+++ b/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml
@@ -0,0 +1,39 @@
+# @package _global_
+
+defaults:
+  - language_rearrange_multi_agent
+  - hitl_defaults
+  - _self_
+
+habitat:
+  # various config args to ensure the episode never ends
+  environment:
+    max_episode_steps: 0
+    iterator_options:
+      # For the demo, we want to showcase the episodes in the specified order
+      shuffle: False
+
+habitat_baselines:
+  # todo: document these choices
+  eval:
+    should_load_ckpt: False
+  rl:
+    agent:
+      num_pool_agents_per_type: [1, 1]
+    policy:
+
+habitat_hitl:
+  window:
+    title: "Rearrange"
+    width: 1300
+    height: 1000
+  gui_controlled_agents:
+    - agent_index: 0
+      lin_speed: 10.0
+      ang_speed: 300
+    - agent_index: 1
+      lin_speed: 10.0
+      ang_speed: 300
+  hide_humanoid_in_gui: True
+  camera:
+    first_person_mode: True
diff --git a/examples/hitl/rearrange_v2/config/language_rearrange.yaml b/examples/hitl/rearrange_v2/config/language_rearrange.yaml
index 349f016cb3..53cd9cf124 100644
--- a/examples/hitl/rearrange_v2/config/language_rearrange.yaml
+++ b/examples/hitl/rearrange_v2/config/language_rearrange.yaml
@@ -5,13 +5,13 @@ defaults:
   - /habitat: habitat_config_base
   - /habitat/task: task_config_base
 
-  - /habitat/simulator/sensor_setups@habitat.simulator.agents.main_agent: rgbd_head_agent
-  - /habitat/simulator/agents@habitat.simulator.agents.main_agent: human
+  - /habitat/simulator/sensor_setups@habitat.simulator.agents.agent_0: rgbd_head_agent
+  - /habitat/simulator/agents@habitat.simulator.agents.agent_0: human
 
   - /habitat/dataset/rearrangement: hssd
 
-  - /habitat/task/actions@habitat.task.actions.base_velocity: base_velocity
-  - /habitat/task/actions@habitat.task.actions.rearrange_stop: rearrange_stop
+  - /habitat/task/actions@habitat.task.actions.agent_0_base_velocity: base_velocity
+  - /habitat/task/actions@habitat.task.actions.agent_0_rearrange_stop: rearrange_stop
 
   - /habitat/task/measurements:
     - num_steps
@@ -43,7 +43,7 @@ habitat:
     task_spec: pddl/multi_agent_tidy_house
     pddl_domain_def: fp
     actions:
-      base_velocity:
+      agent_0_base_velocity:
         lin_speed: 40.0
         ang_speed: 20.0
 
@@ -51,15 +51,9 @@ habitat:
   gym:
     obs_keys:
       - head_depth
-      - relative_resting_position
-      - obj_start_sensor
-      - obj_goal_sensor
-      - obj_start_gps_compass
-      - obj_goal_gps_compass
       - is_holding
       - ee_pos
       - localization_sensor
-      - has_finished_oracle_nav
   environment:
     max_episode_steps: 750
   simulator:
@@ -74,7 +68,7 @@ habitat:
     concur_render: True
     auto_sleep: True
     agents_order:
-      - main_agent
+      - agent_0
 
     kinematic_mode: True
     ac_freq_ratio: 1
@@ -84,4 +78,5 @@ habitat:
       allow_sliding: True
       enable_physics: True
   dataset:
+    type: "CollaborationDataset-v0"
     data_path: data/datasets/hssd/llm_rearrange/v2/60scenes_dataset_776eps_with_eval.json.gz
diff --git a/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent.yaml b/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent.yaml
new file mode 100644
index 0000000000..f66f06fded
--- /dev/null
+++ b/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent.yaml
@@ -0,0 +1,30 @@
+# This config is derived from habitat-lab/habitat/config/benchmark/multi_agent/hssd_spot_human.yaml
+# @package _global_
+
+defaults:
+  - language_rearrange
+  - /habitat/simulator/agents@habitat.simulator.agents.agent_1: spot
+  - /habitat/simulator/sensor_setups@habitat.simulator.agents.agent_1: rgbd_head_rgbd_arm_agent
+  - /habitat/task/actions@habitat.task.actions.agent_1_arm_action: arm_action
+  - /habitat/task/actions@habitat.task.actions.agent_1_base_velocity: base_velocity
+  - /habitat/task/actions@habitat.task.actions.agent_1_rearrange_stop: rearrange_stop
+  - _self_
+
+habitat:
+  task:
+    actions:
+      agent_1_arm_action:
+        grip_controller: MagicGraspAction
+      agent_1_base_velocity:
+        lin_speed: 40.0
+        ang_speed: 20.0
+  gym:
+    obs_keys:
+      - agent_0_head_depth
+      - agent_0_is_holding
+      - agent_0_ee_pos
+      - agent_1_head_depth
+  simulator:
+    agents_order:
+      - agent_0
+      - agent_1
diff --git a/examples/hitl/rearrange_v2/rearrange_v2.py b/examples/hitl/rearrange_v2/rearrange_v2.py
index f6d50696bc..cb131b9eb7 100644
--- a/examples/hitl/rearrange_v2/rearrange_v2.py
+++ b/examples/hitl/rearrange_v2/rearrange_v2.py
@@ -32,6 +32,11 @@
 from habitat_hitl.environment.hablab_utils import get_agent_art_obj_transform
 from habitat_sim.utils.common import quat_from_magnum, quat_to_coeffs
 
+try:
+    from habitat_llm.agent.env import dataset  # noqa: F401
+except ImportError:
+    print("Habitat-LLM not installed. Skipping import.")
+
 
 class DataLogger:
     def __init__(self, app_service):
diff --git a/habitat-hitl/habitat_hitl/environment/controllers/controller_helper.py b/habitat-hitl/habitat_hitl/environment/controllers/controller_helper.py
index 2ed911d4ca..4a044fdf30 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/controller_helper.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/controller_helper.py
@@ -134,7 +134,9 @@ def __init__(
                         # sloppy: derive turn scale. This is the change in yaw (in radians) corresponding to a base ang vel action of 1.0. See also Habitat-lab BaseVelAction.
                         turn_scale = (
                             config.habitat.simulator.ctrl_freq
-                            / config.habitat.task.actions.agent_0_base_velocity.ang_speed
+                            / config.habitat.task.actions[
+                                f"{agent_k}_base_velocity"
+                            ].ang_speed
                         )
 
                         gui_agent_controller = GuiRobotController(

From 8c229530ab3b57ff20dd57dcfefdf76d751c3c8e Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Wed, 1 May 2024 12:17:37 -0400
Subject: [PATCH 02/88] changes to run barebone w/llm

---
 .../lang_rearrange_llmspot_guihumanoid.yaml   | 45 +++++++++++++++++++
 .../config/lang_rearrange_spot_humanoid.yaml  |  3 ++
 2 files changed, 48 insertions(+)
 create mode 100644 examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml

diff --git a/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml b/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
new file mode 100644
index 0000000000..ca801edb25
--- /dev/null
+++ b/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
@@ -0,0 +1,45 @@
+# @package _global_
+
+defaults:
+  - language_rearrange_multi_agent
+  - hitl_defaults
+  - _self_
+
+habitat:
+  # various config args to ensure the episode never ends
+  environment:
+    max_episode_steps: 0
+    iterator_options:
+      # For the demo, we want to showcase the episodes in the specified order
+      shuffle: False
+  dataset:
+    data_path: "data/prerelease_1k_v2.json.gz"
+    scenes_dir: "data/fpss"
+
+habitat_baselines:
+  # todo: document these choices
+  eval:
+    should_load_ckpt: False
+  rl:
+    agent:
+      type: "SingleAgentAccessMgr"
+      num_agent_types: 1
+      num_pool_agents_per_type: [1]
+      num_active_agents_per_type: [1]
+      agent_sample_interval: 20
+      force_partner_sample_idx: -1
+    policy:
+
+habitat_hitl:
+  window:
+    title: "Rearrange"
+    width: 1300
+    height: 1000
+  gui_controlled_agents:
+    - agent_index: 1
+      lin_speed: 10.0
+      ang_speed: 300
+  hide_humanoid_in_gui: True
+  camera:
+    first_person_mode: True
+
diff --git a/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml b/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml
index 4fff8d00ad..d7952a40ba 100644
--- a/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml
+++ b/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml
@@ -12,6 +12,9 @@ habitat:
     iterator_options:
       # For the demo, we want to showcase the episodes in the specified order
       shuffle: False
+  dataset:
+    data_path: "data/prerelease_1k_v2.json.gz"
+    scenes_dir: "data/fpss"
 
 habitat_baselines:
   # todo: document these choices

From e16ddbd63a3e0ef74cc5d2741de48fef16dc0fdb Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Wed, 1 May 2024 13:59:55 -0400
Subject: [PATCH 03/88] basic plumbing into hitl

---
 .../lang_rearrange_llmspot_guihumanoid.yaml   |  4 +
 .../controllers/controller_helper.py          | 21 ++++
 .../environment/controllers/llm_controller.py | 96 +++++++++++++++++++
 3 files changed, 121 insertions(+)
 create mode 100644 habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py

diff --git a/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml b/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
index ca801edb25..8e3f127074 100644
--- a/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
+++ b/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
@@ -39,6 +39,10 @@ habitat_hitl:
     - agent_index: 1
       lin_speed: 10.0
       ang_speed: 300
+  llm_controller_agents:
+    - agent_index: 0
+      lin_speed: 10.0
+      ang_speed: 300
   hide_humanoid_in_gui: True
   camera:
     first_person_mode: True
diff --git a/habitat-hitl/habitat_hitl/environment/controllers/controller_helper.py b/habitat-hitl/habitat_hitl/environment/controllers/controller_helper.py
index 4a044fdf30..c43e175896 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/controller_helper.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/controller_helper.py
@@ -20,6 +20,7 @@
     GuiHumanoidController,
     GuiRobotController,
 )
+from habitat_hitl.environment.controllers.llm_controller import LLMController
 
 if TYPE_CHECKING:
     from omegaconf import DictConfig
@@ -85,6 +86,9 @@ def __init__(
                 gui_controlled_agent_config = (
                     self._find_gui_controlled_agent_config(agent_index)
                 )
+                llm_controlled_agent_config = (
+                    self._find_llm_controlled_agent_config(agent_index)
+                )
                 if gui_controlled_agent_config:
                     agent_name: str = (
                         self._env.sim.habitat_config.agents_order[agent_index]
@@ -157,6 +161,15 @@ def __init__(
 
                     self.controllers.append(gui_agent_controller)
 
+                elif llm_controlled_agent_config:
+                    self.controller.append(
+                        LLMController(
+                            agent_index,
+                            is_multi_agent,
+                            config,
+                            self._gym_habitat_env,
+                        )
+                    )
                 else:
                     self.controllers.append(
                         SingleAgentBaselinesController(
@@ -175,6 +188,14 @@ def _find_gui_controlled_agent_config(self, agent_index):
                 return gui_controlled_agent_config
         return None
 
+    def _llm_controlled_agent_config(self, agent_index):
+        for (
+            llm_controlled_agent_config
+        ) in self._hitl_config.llm_controlled_agents:
+            if llm_controlled_agent_config.agent_index == agent_index:
+                return llm_controlled_agent_config
+        return None
+
     def get_gui_agent_controllers(self) -> List[Controller]:
         """
         Return list of controllers indexed by user index. Beware the difference between user index and agent index. For example, user 0 may control agent 1.
diff --git a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
new file mode 100644
index 0000000000..bc4c9f5e44
--- /dev/null
+++ b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
@@ -0,0 +1,96 @@
+from habitat.core.environments import GymHabitatEnv
+from habitat_hitl.environment.controllers.baselines_controller import (
+    SingleAgentBaselinesController,
+)
+from omegaconf import DictConfig
+from habitat_llm.utils import setup_config, fix_config
+from hydra.utils import instantiate
+from habitat_llm.agent.env import EnvironmentInterface
+from habitat_llm.agent import Agent
+
+
+class LLMController(SingleAgentBaselinesController):
+    """Controller for single LLM controlled agent."""
+
+    def __init__(
+        self,
+        agent_idx: int,
+        is_multi_agent: bool,
+        config: DictConfig,
+        gym_habitat_env: GymHabitatEnv,
+    ):
+        super().__init__(agent_idx, is_multi_agent, config, gym_habitat_env)
+        fix_config(config)
+        seed = 47668090
+        self.config = setup_config(config, seed)
+        self.planner = None
+        self.environment_interface = None
+
+        # NOTE: this is creating just one agent. Habitat-LLM has code for creating
+        # multiple processes/agents in one go. I am only prototyping single process, as
+        # I assume the onus of creating multiple processes is on the user/hitl_driver,
+        # and this code will be called once per Sim instantiation
+        self.initialize_environment_interface()
+        self.initialize_planner()
+        self.info = {}
+
+    def initialize_planner(self):
+        # NOTE: using instantiate here, but given this is planning for a single agent
+        # always will this ever be an option of Centralized vs Decentralized? Maybe DAG...?
+        self.planner = instantiate(self.config.evaluation.planner)
+        self.planner.agents = self.initialize_agents(
+            self.config.evaluation.agents
+        )
+
+    def initialize_agents(self, agent_configs):
+        agents = []
+        for _, agent_conf in agent_configs.items():
+            # Instantiate the agent
+            agent = Agent(
+                agent_conf.uid, agent_conf.config, self.environment_interface
+            )
+
+            # Make sure that its unique by adding to the set
+            agents.append(agent)
+        return agents
+
+    def initialize_environment_interface(self):
+        self.environment_interface = EnvironmentInterface(
+            self.config, gym_habitat_env=self._gym_habitat_env
+        )
+
+        # NOTE: this is to replicate initial call of  get_next_action, in
+        # run_instruction() method. I am not sure why we do this initially?
+        _low_level_actions, _planner_info, _task_done = (
+            self.planner.get_next_action(
+                self.current_instruction,
+                {},
+                self.env_interface.world_graph,
+            )
+        )
+
+    def on_environment_reset(self):
+        # NOTE: the following ONLY resets self._test_recurrent_hidden_states,
+        # self._prev_actions and self._not_done_masks
+        super().on_environment_reset()
+        self.planner.reset()
+        self.environment_interface.reset()
+
+        self.current_instruction = (
+            self.environment_interface.hab_env.current_episode.instruction
+        )
+
+    def act(self, observations):
+        # NOTE: update the world state to reflect the new observations
+        self.environment_interface.update_world_state(observations)
+
+        # NOTE: this is where the LLM magic happens, the agent is given the observations
+        # and it returns the actions for the agent
+        low_level_actions, planner_info, task_done = (
+            self.planner.get_next_action(
+                self.current_instruction,
+                observations,
+                self.envirnment_interface.world_graph,
+            )
+        )
+        return low_level_actions

From 7849ef63e095a3ad1e71a6b247efa0a4ef1770f4 Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Wed, 1 May 2024 15:10:00 -0400
Subject: [PATCH 04/88] fixing precommit

---
 .../lang_rearrange_llmspot_guihumanoid.yaml   |  1 -
 .../controllers/controller_helper.py          |  4 +-
 .../environment/controllers/llm_controller.py | 48 +++++++++++--------
 3 files changed, 30 insertions(+), 23 deletions(-)

diff --git a/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml b/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
index 8e3f127074..92ba4dd351 100644
--- a/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
+++ b/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
@@ -46,4 +46,3 @@ habitat_hitl:
   hide_humanoid_in_gui: True
   camera:
     first_person_mode: True
-
diff --git a/habitat-hitl/habitat_hitl/environment/controllers/controller_helper.py b/habitat-hitl/habitat_hitl/environment/controllers/controller_helper.py
index c43e175896..81cb8a4f1d 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/controller_helper.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/controller_helper.py
@@ -162,7 +162,7 @@ def __init__(
                     self.controllers.append(gui_agent_controller)
 
                 elif llm_controlled_agent_config:
-                    self.controller.append(
+                    self.controllers.append(
                         LLMController(
                             agent_index,
                             is_multi_agent,
@@ -188,7 +188,7 @@ def _find_gui_controlled_agent_config(self, agent_index):
                 return gui_controlled_agent_config
         return None
 
-    def _llm_controlled_agent_config(self, agent_index):
+    def _find_llm_controlled_agent_config(self, agent_index):
         for (
             llm_controlled_agent_config
         ) in self._hitl_config.llm_controlled_agents:
diff --git a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
index bc4c9f5e44..6212d4d10b 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
@@ -1,12 +1,16 @@
+from typing import Any, Dict
+
+from habitat_llm.agent import Agent
+from habitat_llm.agent.env import EnvironmentInterface
+from habitat_llm.planner.llm_planner import LLMPlanner
+from habitat_llm.utils import fix_config, setup_config
+from hydra.utils import instantiate
+from omegaconf import DictConfig
+
 from habitat.core.environments import GymHabitatEnv
 from habitat_hitl.environment.controllers.baselines_controller import (
     SingleAgentBaselinesController,
 )
-from omegaconf import DictConfig
-from habitat_llm.utils import setup_config, fix_config
-from hydra.utils import instantiate
-from habitat_llm.agent.env import EnvironmentInterface
-from habitat_llm.agent import Agent
 
 
 class LLMController(SingleAgentBaselinesController):
@@ -23,8 +27,8 @@ def __init__(
         fix_config(config)
         seed = 47668090
         self.config = setup_config(config, seed)
-        self.planner = None
-        self.environment_interface = None
+        self.planner: LLMPlanner = None
+        self.environment_interface: EnvironmentInterface = None
 
         # NOTE: this is creating just one agent. Habitat-LLM has code for creating
         # multiple processes/agents in one go. I am only prototyping single process, as
@@ -32,7 +36,7 @@ def __init__(
         # and this code will be called once per Sim instantiation
         self.initialize_environment_interface()
         self.initialize_planner()
-        self.info = {}
+        self.info: Dict[str, Any] = {}
 
     def initialize_planner(self):
         # NOTE: using instantiate here, but given this is planning for a single agent
@@ -61,12 +65,14 @@ def initialize_environment_interface(self):
 
         # NOTE: this is to replicate initial call of  get_next_action, in
         # run_instruction() method. I am not sure why we do this initially?
-        _low_level_actions, _planner_info, _task_done = (
-            self.planner.get_next_action(
-                self.current_instruction,
-                {},
-                self.env_interface.world_graph,
-            )
+        (
+            _low_level_actions,
+            _planner_info,
+            _task_done,
+        ) = self.planner.get_next_action(
+            self.current_instruction,
+            {},
+            self.environment_interface.world_graph,
         )
 
     def on_environment_reset(self):
@@ -86,11 +92,13 @@ def act(self, observations):
 
         # NOTE: this is where the LLM magic happens, the agent is given the observations
         # and it returns the actions for the agent
-        low_level_actions, planner_info, task_done = (
-            self.planner.get_next_action(
-                self.current_instruction,
-                observations,
-                self.envirnment_interface.world_graph,
-            )
+        (
+            low_level_actions,
+            planner_info,
+            task_done,
+        ) = self.planner.get_next_action(
+            self.current_instruction,
+            observations,
+            self.environment_interface.world_graph,
         )
         return low_level_actions

From e4631d5dba73a314f3b86afd5d4c2e5ce2eaae91 Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Thu, 2 May 2024 14:13:52 -0400
Subject: [PATCH 05/88] adding visual sensors

---
 .../lang_rearrange_spot_humanoid_visual.yaml  | 43 +++++++++++++++++++
 .../language_rearrange_multi_agent.yaml       |  6 ++-
 2 files changed, 47 insertions(+), 2 deletions(-)
 create mode 100644 examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid_visual.yaml

diff --git a/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid_visual.yaml b/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid_visual.yaml
new file mode 100644
index 0000000000..18da93ac34
--- /dev/null
+++ b/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid_visual.yaml
@@ -0,0 +1,43 @@
+# @package _global_
+
+defaults:
+  - language_rearrange_multi_agent
+  - hitl_defaults
+  - _self_
+
+habitat:
+  # various config args to ensure the episode never ends
+  environment:
+    max_episode_steps: 0
+    iterator_options:
+      # For the demo, we want to showcase the episodes in the specified order
+      shuffle: False
+  dataset:
+    data_path: "data/prerelease_1k_v2.json.gz"
+    scenes_dir: "data/fpss"
+
+habitat_baselines:
+  # todo: document these choices
+  eval:
+    should_load_ckpt: False
+  rl:
+    agent:
+      num_pool_agents_per_type: [1, 1]
+    policy:
+
+habitat_hitl:
+  window:
+    title: "Rearrange"
+    width: 1300
+    height: 1000
+  gui_controlled_agents:
+    - agent_index: 0
+      lin_speed: 10.0
+      ang_speed: 300
+    - agent_index: 1
+      lin_speed: 10.0
+      ang_speed: 300
+  hide_humanoid_in_gui: True
+  camera:
+    first_person_mode: True
+  remove_visual_sensors: False
diff --git a/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent.yaml b/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent.yaml
index f66f06fded..46388265fc 100644
--- a/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent.yaml
+++ b/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent.yaml
@@ -20,10 +20,12 @@ habitat:
         ang_speed: 20.0
   gym:
     obs_keys:
-      - agent_0_head_depth
+      - agent_0_head_rgb
+      - agent_1_head_rgb
       - agent_0_is_holding
+      - agent_1_is_holding
       - agent_0_ee_pos
-      - agent_1_head_depth
+      - agent_1_ee_pos
   simulator:
     agents_order:
       - agent_0

From 0c0ed6744ccbb9153cae82cba77565ad66823f53 Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Thu, 2 May 2024 18:03:37 -0400
Subject: [PATCH 06/88] config patching, remove unneeded super() calls

---
 .../habitat_hitl/_internal/config_helper.py   | 35 +++++++++---------
 .../controllers/controller_helper.py          |  2 ++
 .../environment/controllers/llm_controller.py | 36 +++++++++----------
 3 files changed, 37 insertions(+), 36 deletions(-)

diff --git a/habitat-hitl/habitat_hitl/_internal/config_helper.py b/habitat-hitl/habitat_hitl/_internal/config_helper.py
index 2d4de4dbb5..b293916291 100644
--- a/habitat-hitl/habitat_hitl/_internal/config_helper.py
+++ b/habitat-hitl/habitat_hitl/_internal/config_helper.py
@@ -71,10 +71,10 @@ def update_config(
                 )
 
             # avoid camera sensors for GUI-controlled agents
-            gui_controlled_agent_config = get_agent_config(
-                sim_config, agent_id=gui_controlled_agent_index
-            )
-            gui_controlled_agent_config.sim_sensors.clear()
+            # gui_controlled_agent_config = get_agent_config(
+            #     sim_config, agent_id=gui_controlled_agent_index
+            # )
+            # gui_controlled_agent_config.sim_sensors.clear()
 
             lab_sensor_names = ["has_finished_oracle_nav"]
             for lab_sensor_name in lab_sensor_names:
@@ -100,19 +100,20 @@ def update_config(
                     task_config.measurements.pop(measurement_name)
 
             # todo: decide whether to fix up config here versus validate config
-            sim_sensor_names = [
-                "head_depth",
-                "head_rgb",
-                "articulated_agent_arm_depth",
-            ]
-            for sensor_name in sim_sensor_names + lab_sensor_names:
-                sensor_name = (
-                    sensor_name
-                    if len(sim_config.agents) == 1
-                    else (f"{gui_agent_key}_{sensor_name}")
-                )
-                if sensor_name in gym_obs_keys:
-                    gym_obs_keys.remove(sensor_name)
+            if config.habitat_hitl.remove_visual_sensors:
+                sim_sensor_names = [
+                    "head_depth",
+                    "head_rgb",
+                    "articulated_agent_arm_depth",
+                ]
+                for sensor_name in sim_sensor_names + lab_sensor_names:
+                    sensor_name = (
+                        sensor_name
+                        if len(sim_config.agents) == 1
+                        else (f"{gui_agent_key}_{sensor_name}")
+                    )
+                    if sensor_name in gym_obs_keys:
+                        gym_obs_keys.remove(sensor_name)
 
             if agent_type == "KinematicHumanoid":
                 # use humanoidjoint_action for GUI-controlled KinematicHumanoid
diff --git a/habitat-hitl/habitat_hitl/environment/controllers/controller_helper.py b/habitat-hitl/habitat_hitl/environment/controllers/controller_helper.py
index 81cb8a4f1d..d4855ea3ce 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/controller_helper.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/controller_helper.py
@@ -189,6 +189,8 @@ def _find_gui_controlled_agent_config(self, agent_index):
         return None
 
     def _find_llm_controlled_agent_config(self, agent_index):
+        if not hasattr(self._hitl_config, "llm_controlled_agents"):
+            return None
         for (
             llm_controlled_agent_config
         ) in self._hitl_config.llm_controlled_agents:
diff --git a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
index 6212d4d10b..e9a06040dd 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
@@ -7,6 +7,8 @@
 from hydra.utils import instantiate
 from omegaconf import DictConfig
 
+import habitat
+import habitat.config
 from habitat.core.environments import GymHabitatEnv
 from habitat_hitl.environment.controllers.baselines_controller import (
     SingleAgentBaselinesController,
@@ -23,10 +25,17 @@ def __init__(
         config: DictConfig,
         gym_habitat_env: GymHabitatEnv,
     ):
-        super().__init__(agent_idx, is_multi_agent, config, gym_habitat_env)
-        fix_config(config)
+        self._config = config
+        self._is_multi_agent = is_multi_agent
+        self._gym_habitat_env = gym_habitat_env
+        self._habitat_env = gym_habitat_env.unwrapped.habitat_env
+        self._agent_idx = agent_idx
+
+        with habitat.config.read_write(self._config):
+            fix_config(self._config)
         seed = 47668090
-        self.config = setup_config(config, seed)
+        with habitat.config.read_write(self._config):
+            self._config = setup_config(self._config, seed)
         self.planner: LLMPlanner = None
         self.environment_interface: EnvironmentInterface = None
 
@@ -40,10 +49,11 @@ def __init__(
 
     def initialize_planner(self):
         # NOTE: using instantiate here, but given this is planning for a single agent
-        # always will this ever be an option of Centralized vs Decentralized? Maybe DAG...?
-        self.planner = instantiate(self.config.evaluation.planner)
+        # always will this ever be an option of Centralized vs Decentralized? Maybe
+        # DAG...?
+        self.planner = instantiate(self._config.planner)
         self.planner.agents = self.initialize_agents(
-            self.config.evaluation.agents
+            self._config.planner.agents
         )
 
     def initialize_agents(self, agent_configs):
@@ -60,19 +70,7 @@ def initialize_agents(self, agent_configs):
 
     def initialize_environment_interface(self):
         self.environment_interface = EnvironmentInterface(
-            self.config, gym_habitat_env=self._gym_habitat_env
-        )
-
-        # NOTE: this is to replicate initial call of  get_next_action, in
-        # run_instruction() method. I am not sure why we do this initially?
-        (
-            _low_level_actions,
-            _planner_info,
-            _task_done,
-        ) = self.planner.get_next_action(
-            self.current_instruction,
-            {},
-            self.environment_interface.world_graph,
+            self._config, gym_habitat_env=self._gym_habitat_env
         )
 
     def on_environment_reset(self):

From c8a98574cc6d8c72cb1c727d3790ca8bce61dbaf Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Thu, 2 May 2024 18:05:36 -0400
Subject: [PATCH 07/88] frankenstein hitl+llm config

---
 .../lang_rearrange_llmspot_guihumanoid.yaml   |  68 ++++++--
 ...anguage_rearrange_multi_agent_llm_gui.yaml | 152 ++++++++++++++++++
 2 files changed, 205 insertions(+), 15 deletions(-)
 create mode 100644 examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml

diff --git a/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml b/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
index 92ba4dd351..12ab09fc95 100644
--- a/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
+++ b/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
@@ -1,7 +1,16 @@
 # @package _global_
 
 defaults:
-  - language_rearrange_multi_agent
+  - /training@ : base_train
+  - habitat_conf/task : rearrange_easy_multi_agent
+  - world_model@world_model: gt_graph
+  - /trajectory@trajectory : trajectory_logger
+  # single agent planner setup
+  - /planner : llm_centralized_planner
+  - ../../../../../habitat-llm/habitat_llm/conf/agent/@oracle_rearrange_agent: oracle_rearrange_agent
+  - ../../../../../habitat-llm/habitat_llm/conf/agent/@planner.agents.agent_0.config: oracle_rearrange_agent
+  - /wandb_conf@                : own
+  - language_rearrange_multi_agent_llm_gui
   - hitl_defaults
   - _self_
 
@@ -15,20 +24,48 @@ habitat:
   dataset:
     data_path: "data/prerelease_1k_v2.json.gz"
     scenes_dir: "data/fpss"
+    # metadata:
+    #   metadata_folder: "data/fpss/metadata"
+    #   obj_metadata: object_categories_filtered.csv
+      # staticobj_metadata: fpmodels-with-decomposed.csv
+  task:
+    pddl_domain_def: fp
+  simulator:
+    agents:
+      agent_1:
+        radius: 0.3
+        articulated_agent_type: 'KinematicHumanoid'
+        articulated_agent_urdf: 'data/humanoids/humanoid_data/female_2/female_2.urdf'
+        motion_data_path: "data/humanoids/humanoid_data/female_2/female_2_motion_data_smplx.pkl"
 
-habitat_baselines:
-  # todo: document these choices
-  eval:
-    should_load_ckpt: False
-  rl:
-    agent:
-      type: "SingleAgentAccessMgr"
-      num_agent_types: 1
-      num_pool_agents_per_type: [1]
-      num_active_agents_per_type: [1]
-      agent_sample_interval: 20
-      force_partner_sample_idx: -1
-    policy:
+planner:
+  agents:
+    agent_0:
+      uid: 0
+# habitat_baselines:
+#   # todo: document these choices
+#   eval:
+#     should_load_ckpt: False
+
+device      : cuda
+instruction : ''
+mode: "dataset" # cli / dataset
+env: habitat
+num_runs_per_episode: 1
+num_proc: 1
+dry_run: False
+paths:
+  results_dir: ${hydra:runtime.output_dir}/results
+  epi_result_file_path: "${paths.results_dir}/episode_result_log.csv"
+  run_result_file_path: "${paths.results_dir}/run_result_log.csv"
+  end_result_file_path: "${paths.results_dir}/end_result_log.csv"
+
+hydra:
+  job:
+    name: 'habitat_llm'
+    chdir: False
+  run:
+    dir: outputs/${hydra.job.name}/${now:%Y-%m-%d_%H-%M-%S}-${file_stem:${habitat.dataset.data_path}}
 
 habitat_hitl:
   window:
@@ -39,10 +76,11 @@ habitat_hitl:
     - agent_index: 1
       lin_speed: 10.0
       ang_speed: 300
-  llm_controller_agents:
+  llm_controlled_agents:
     - agent_index: 0
       lin_speed: 10.0
       ang_speed: 300
   hide_humanoid_in_gui: True
   camera:
     first_person_mode: True
+  remove_visual_sensors: False
diff --git a/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml b/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
new file mode 100644
index 0000000000..6ff4b1b7ed
--- /dev/null
+++ b/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
@@ -0,0 +1,152 @@
+# This config is derived from habitat-lab/habitat/config/benchmark/multi_agent/hssd_spot_human.yaml
+# @package _global_
+
+defaults:
+  - /habitat: habitat_config_base
+  # - /habitat/task: task_config_base
+
+  # Human setup
+  - /habitat/simulator/sensor_setups@habitat.simulator.agents.agent_1: rgbd_head_agent
+  - /habitat/simulator/agents@habitat.simulator.agents.agent_1: human
+  # following are from hab-llm entry config. Questions:
+  # 1. What is the difference between
+  #    habitat.task.actions.agent_1_humanoid_base_velocity and
+  #    habitat.task.actions.agent_1_base_velocity?
+  # 2. What is the humanoidjoint_action? Do we need it here?
+  - /habitat/task/actions@habitat.task.actions.agent_1_humanoid_base_velocity: humanoid_base_velocity
+  - /habitat/task/actions@habitat.task.actions.agent_1_humanoidjoint_action: humanoidjoint_action
+  # - /habitat/task/actions@habitat.task.actions.agent_1_base_velocity: base_velocity
+  # - /habitat/task/actions@habitat.task.actions.agent_1_rearrange_stop: rearrange_stop
+  # Spot setup
+  - /habitat/simulator/agents@habitat.simulator.agents.agent_0: spot
+  - /habitat_conf/habitat_agent@habitat.simulator.agents.agent_0: rgbd_head_rgbd_arm_rgbd_jaw_agent_vis
+  # - /habitat/task/actions@habitat.task.actions.agent_0_arm_action: arm_action
+  # - /habitat/task/actions@habitat.task.actions.agent_0_base_velocity: base_velocity
+  # - /habitat/task/actions@habitat.task.actions.agent_0_rearrange_stop: rearrange_stop
+
+  - /habitat/dataset/rearrangement: hssd
+  - /habitat/task/measurements:
+    - num_steps
+  - /habitat/task/lab_sensors:
+    # from previous hitl config
+    - relative_resting_pos_sensor
+    - target_start_sensor
+    - goal_sensor
+    - joint_sensor
+    - is_holding_sensor
+    - end_effector_sensor
+    - target_start_gps_compass_sensor
+    - target_goal_gps_compass_sensor
+    - localization_sensor
+    # from habitat-llm
+    - humanoid_detector_sensor
+
+  - _self_
+
+habitat:
+  # task:
+    # type: RearrangeEmptyTask-v0
+    # reward_measure: num_steps
+    # success_measure: num_steps
+    # success_reward: 10.0
+    # min_distance_start_agents: 5.0
+    # slack_reward: -0.0005
+    # end_on_success: True
+    # constraint_violation_ends_episode: False
+    # constraint_violation_drops_object: True
+    # task_spec_base_path: benchmark/multi_agent/
+    # task_spec: pddl/multi_agent_tidy_house
+    # pddl_domain_def: fp
+    # actions:
+    #   agent_0_base_velocity:
+    #     lin_speed: 40.0
+    #     ang_speed: 20.0
+
+    # robot_at_thresh: 3.0
+    # lab_sensors:
+    #   # Defien the human detector
+    #   humanoid_detector_sensor:
+    #     # If the human detector function is image or binary flag
+    #     return_image: False
+    #     is_return_image_bbox: False
+  gym:
+    obs_keys:
+# --- habitat-llm block
+      # - agent_0_third_rgb
+      - agent_0_articulated_agent_arm_depth
+      - agent_0_articulated_agent_arm_rgb
+      - agent_0_articulated_agent_arm_panoptic
+      - agent_0_head_depth
+      - agent_0_head_rgb
+      - agent_0_relative_resting_position
+      - agent_0_joint
+      - agent_0_ee_pos
+      - agent_0_is_holding
+      # - agent_0_dynamic_obj_goal_sensor
+      # - agent_0_dynamic_goal_to_agent_gps_compass
+      # dynamic_obj_start_sensor is mapped into dynamic_obj_start_sensor automatically
+      # due to sensor mapping
+      # - agent_0_dynamic_obj_start_sensor
+      # - agent_0_goal_to_agent_gps_compass
+      - agent_0_humanoid_detector_sensor
+      - agent_0_articulated_agent_jaw_rgb
+      - agent_0_articulated_agent_jaw_depth
+      - agent_0_articulated_agent_jaw_panoptic
+
+      # - agent_1_third_rgb
+      # - agent_1_articulated_agent_arm_depth
+      # - agent_1_articulated_agent_arm_rgb
+      # - agent_1_articulated_agent_arm_panoptic
+      - agent_1_head_depth
+      - agent_1_head_rgb
+      - agent_1_relative_resting_position
+      - agent_1_joint
+      - agent_1_ee_pos
+      - agent_1_is_holding
+      # - agent_1_dynamic_obj_goal_sensor
+      # - agent_1_dynamic_goal_to_agent_gps_compass
+      # - agent_1_dynamic_obj_start_sensor
+# --- habitat-llm block
+  environment:
+    max_episode_steps: 750  # this is 20000 in habitat-llm
+  simulator:
+    type: RearrangeSim-v0
+    seed: 100
+
+    # --- habitat-llm block
+    agents:
+      agent_0:
+        radius: 0.3
+        articulated_agent_urdf: ./data/robots/hab_spot_arm/urdf/hab_spot_arm.urdf
+        articulated_agent_type: SpotRobot
+        joint_start_noise: 0.0
+      agent_1:
+        radius: 0.3
+        articulated_agent_urdf: ./data/robots/hab_spot_arm/urdf/hab_spot_arm.urdf
+        articulated_agent_type: SpotRobot
+        joint_start_noise: 0.0
+    # --- habitat-llm block
+
+    additional_object_paths:
+      - "data/objects/ycb/configs/"
+      - "data/objects_ovmm/train_val/ai2thorhab/configs/objects/"
+      - "data/objects_ovmm/train_val/amazon_berkeley/configs/"
+      - "data/objects_ovmm/train_val/google_scanned/configs/"
+      - "data/objects_ovmm/train_val/hssd/configs/objects/"
+    concur_render: True
+    auto_sleep: True
+    kinematic_mode: True
+    ac_freq_ratio: 1
+    step_physics: False
+    habitat_sim_v0:
+      allow_sliding: True
+      enable_physics: True
+    # Q. What is this agents_order?
+    agents_order:
+      - agent_0
+      - agent_1
+
+  dataset:
+    type: "CollaborationDataset-v0"
+    split: train
+    scenes_dir: data/fpss

From 77880a3f9c8b80f0bb736cbb349a925210339caf Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Mon, 6 May 2024 13:13:17 -0400
Subject: [PATCH 08/88] bandaids to get to working V0

---
 .../lang_rearrange_llmspot_guihumanoid.yaml   | 21 ++++++--------
 ...anguage_rearrange_multi_agent_llm_gui.yaml | 28 +++++++++----------
 examples/hitl/rearrange_v2/rearrange_v2.py    | 19 +++++++++++++
 .../habitat_hitl/_internal/hitl_driver.py     |  4 +--
 .../environment/controllers/llm_controller.py | 26 +++++++++++++----
 5 files changed, 64 insertions(+), 34 deletions(-)

diff --git a/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml b/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
index 12ab09fc95..9e93453446 100644
--- a/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
+++ b/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
@@ -6,14 +6,20 @@ defaults:
   - world_model@world_model: gt_graph
   - /trajectory@trajectory : trajectory_logger
   # single agent planner setup
-  - /planner : llm_centralized_planner
-  - ../../../../../habitat-llm/habitat_llm/conf/agent/@oracle_rearrange_agent: oracle_rearrange_agent
-  - ../../../../../habitat-llm/habitat_llm/conf/agent/@planner.agents.agent_0.config: oracle_rearrange_agent
+  - /evaluation: decentralized_evaluation_runner_single_agent
   - /wandb_conf@                : own
   - language_rearrange_multi_agent_llm_gui
   - hitl_defaults
+  - override /instruct@evaluation.agents.agent_0.planner.plan_config.instruct: few_shot_decentralized_robot_partner_aware_v1
+  - override /llm@evaluation.agents.agent_0.planner.plan_config.llm: openai_chat
   - _self_
 
+habitat_llm:
+  enable: True
+    # agent_1:
+    #   gui_controlled: True
+world_model:
+  partial_obs: False
 habitat:
   # various config args to ensure the episode never ends
   environment:
@@ -38,15 +44,6 @@ habitat:
         articulated_agent_urdf: 'data/humanoids/humanoid_data/female_2/female_2.urdf'
         motion_data_path: "data/humanoids/humanoid_data/female_2/female_2_motion_data_smplx.pkl"
 
-planner:
-  agents:
-    agent_0:
-      uid: 0
-# habitat_baselines:
-#   # todo: document these choices
-#   eval:
-#     should_load_ckpt: False
-
 device      : cuda
 instruction : ''
 mode: "dataset" # cli / dataset
diff --git a/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml b/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
index 6ff4b1b7ed..c9cec26eec 100644
--- a/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
+++ b/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
@@ -6,17 +6,17 @@ defaults:
   # - /habitat/task: task_config_base
 
   # Human setup
-  - /habitat/simulator/sensor_setups@habitat.simulator.agents.agent_1: rgbd_head_agent
+  - /habitat_conf/habitat_agent@habitat.simulator.agents.agent_1: rgbd_head_rgbd_arm_agent_vis
   - /habitat/simulator/agents@habitat.simulator.agents.agent_1: human
   # following are from hab-llm entry config. Questions:
   # 1. What is the difference between
   #    habitat.task.actions.agent_1_humanoid_base_velocity and
   #    habitat.task.actions.agent_1_base_velocity?
   # 2. What is the humanoidjoint_action? Do we need it here?
-  - /habitat/task/actions@habitat.task.actions.agent_1_humanoid_base_velocity: humanoid_base_velocity
-  - /habitat/task/actions@habitat.task.actions.agent_1_humanoidjoint_action: humanoidjoint_action
+  # - /habitat/task/actions@habitat.task.actions.agent_1_humanoid_base_velocity: humanoid_base_velocity
+  # - /habitat/task/actions@habitat.task.actions.agent_1_humanoidjoint_action: humanoidjoint_action
   # - /habitat/task/actions@habitat.task.actions.agent_1_base_velocity: base_velocity
-  # - /habitat/task/actions@habitat.task.actions.agent_1_rearrange_stop: rearrange_stop
+  - /habitat/task/actions@habitat.task.actions.agent_1_rearrange_stop: rearrange_stop
   # Spot setup
   - /habitat/simulator/agents@habitat.simulator.agents.agent_0: spot
   - /habitat_conf/habitat_agent@habitat.simulator.agents.agent_0: rgbd_head_rgbd_arm_rgbd_jaw_agent_vis
@@ -82,30 +82,30 @@ habitat:
       - agent_0_joint
       - agent_0_ee_pos
       - agent_0_is_holding
-      # - agent_0_dynamic_obj_goal_sensor
-      # - agent_0_dynamic_goal_to_agent_gps_compass
+      - agent_0_dynamic_obj_goal_sensor
+      - agent_0_dynamic_goal_to_agent_gps_compass
       # dynamic_obj_start_sensor is mapped into dynamic_obj_start_sensor automatically
       # due to sensor mapping
-      # - agent_0_dynamic_obj_start_sensor
-      # - agent_0_goal_to_agent_gps_compass
+      - agent_0_dynamic_obj_start_sensor
+      - agent_0_goal_to_agent_gps_compass
       - agent_0_humanoid_detector_sensor
       - agent_0_articulated_agent_jaw_rgb
       - agent_0_articulated_agent_jaw_depth
       - agent_0_articulated_agent_jaw_panoptic
 
       # - agent_1_third_rgb
-      # - agent_1_articulated_agent_arm_depth
-      # - agent_1_articulated_agent_arm_rgb
-      # - agent_1_articulated_agent_arm_panoptic
+      - agent_1_articulated_agent_arm_depth
+      - agent_1_articulated_agent_arm_rgb
+      - agent_1_articulated_agent_arm_panoptic
       - agent_1_head_depth
       - agent_1_head_rgb
       - agent_1_relative_resting_position
       - agent_1_joint
       - agent_1_ee_pos
       - agent_1_is_holding
-      # - agent_1_dynamic_obj_goal_sensor
-      # - agent_1_dynamic_goal_to_agent_gps_compass
-      # - agent_1_dynamic_obj_start_sensor
+      - agent_1_dynamic_obj_goal_sensor
+      - agent_1_dynamic_goal_to_agent_gps_compass
+      - agent_1_dynamic_obj_start_sensor
 # --- habitat-llm block
   environment:
     max_episode_steps: 750  # this is 20000 in habitat-llm
diff --git a/examples/hitl/rearrange_v2/rearrange_v2.py b/examples/hitl/rearrange_v2/rearrange_v2.py
index cb131b9eb7..b266975da4 100644
--- a/examples/hitl/rearrange_v2/rearrange_v2.py
+++ b/examples/hitl/rearrange_v2/rearrange_v2.py
@@ -34,10 +34,24 @@
 
 try:
     from habitat_llm.agent.env import dataset  # noqa: F401
+    from habitat_llm.agent.env import (
+        register_actions,
+        register_measures,
+        register_sensors,
+    )
 except ImportError:
     print("Habitat-LLM not installed. Skipping import.")
 
 
+# -------- HABITAT-LLM SPECIFIC CODE --------
+
+
+def register_habitat_llm_extensions(config):
+    register_actions(config)
+    register_measures(config)
+    register_sensors(config)
+
+
 class DataLogger:
     def __init__(self, app_service):
         self._app_service = app_service
@@ -95,6 +109,9 @@ def record_state(self, task_completed: bool = False):
         )
 
 
+# -------- HABITAT-LLM SPECIFIC CODE --------
+
+
 class AppStateRearrangeV2(AppState):
     """
     Todo
@@ -347,6 +364,8 @@ def record_state(self):
     version_base=None, config_path="config", config_name="rearrange_v2"
 )
 def main(config):
+    if hasattr(config, "habitat_llm") and config.habitat_llm.enable:
+        register_habitat_llm_extensions(config)
     hitl_main(
         config,
         lambda app_service: AppStateRearrangeV2(app_service),
diff --git a/habitat-hitl/habitat_hitl/_internal/hitl_driver.py b/habitat-hitl/habitat_hitl/_internal/hitl_driver.py
index 10e2a0e1ec..91c5519e11 100644
--- a/habitat-hitl/habitat_hitl/_internal/hitl_driver.py
+++ b/habitat-hitl/habitat_hitl/_internal/hitl_driver.py
@@ -365,8 +365,8 @@ def _reset_environment(self):
             self._remote_client_state.clear_history()
 
         # todo: fix duplicate calls to self.ctrl_helper.on_environment_reset() here
-        if self.ctrl_helper:
-            self.ctrl_helper.on_environment_reset()
+        # if self.ctrl_helper:
+        #     self.ctrl_helper.on_environment_reset()
 
         if self._save_episode_record:
             self._reset_episode_recorder()
diff --git a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
index e9a06040dd..4ba847a4e5 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
@@ -1,5 +1,6 @@
-from typing import Any, Dict
+from typing import Any, Dict, Union
 
+import numpy as np
 from habitat_llm.agent import Agent
 from habitat_llm.agent.env import EnvironmentInterface
 from habitat_llm.planner.llm_planner import LLMPlanner
@@ -30,6 +31,8 @@ def __init__(
         self._gym_habitat_env = gym_habitat_env
         self._habitat_env = gym_habitat_env.unwrapped.habitat_env
         self._agent_idx = agent_idx
+        # TODO: gather this from config
+        self._agent_action_length = 28
 
         with habitat.config.read_write(self._config):
             fix_config(self._config)
@@ -51,9 +54,11 @@ def initialize_planner(self):
         # NOTE: using instantiate here, but given this is planning for a single agent
         # always will this ever be an option of Centralized vs Decentralized? Maybe
         # DAG...?
-        self.planner = instantiate(self._config.planner)
+        # NOTE: assuming use of DecentralizedLLMPlanner here
+        planner = instantiate(self._config.evaluation.agents.agent_0.planner)
+        self.planner = planner(env_interface=self.environment_interface)
         self.planner.agents = self.initialize_agents(
-            self._config.planner.agents
+            self._config.evaluation.agents
         )
 
     def initialize_agents(self, agent_configs):
@@ -76,20 +81,22 @@ def initialize_environment_interface(self):
     def on_environment_reset(self):
         # NOTE: the following ONLY resets self._test_recurrent_hidden_states,
         # self._prev_actions and self._not_done_masks
-        super().on_environment_reset()
+        # super().on_environment_reset()
         self.planner.reset()
-        self.environment_interface.reset()
+        self.environment_interface.reset_environment()
 
         self.current_instruction = (
             self.environment_interface.hab_env.current_episode.instruction
         )
 
-    def act(self, observations):
+    def act(self, observations, *args, **kwargs):
         # NOTE: update the world state to reflect the new observations
         self.environment_interface.update_world_state(observations)
 
         # NOTE: this is where the LLM magic happens, the agent is given the observations
         # and it returns the actions for the agent
+        # TODO: looping needed here until a physical low-level-action is returned
+        low_level_actions: Union[dict, np.ndarray] = {}
         (
             low_level_actions,
             planner_info,
@@ -98,5 +105,12 @@ def act(self, observations):
             self.current_instruction,
             observations,
             self.environment_interface.world_graph,
+            verbose=True,
         )
+        if low_level_actions:
+            low_level_actions = low_level_actions[str(self._agent_idx)]
+            # NOTE: truncating the action here, as this includes both Spot and Human actions
+            low_level_actions = low_level_actions[:-248]
+        else:
+            low_level_actions = np.zeros(self._agent_action_length)
         return low_level_actions

From 21ce0c4c741b7852e32b96537aefd5c3b93282d4 Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Wed, 8 May 2024 16:52:09 -0400
Subject: [PATCH 09/88] making visual sensor changes configurable with explicit
 flags

---
 .../config/lang_rearrange_llmspot_guihumanoid.yaml       | 5 +++--
 .../config/lang_rearrange_spot_humanoid.yaml             | 2 ++
 habitat-hitl/habitat_hitl/_internal/config_helper.py     | 9 +++++----
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml b/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
index 9e93453446..7715106bb8 100644
--- a/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
+++ b/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
@@ -19,7 +19,7 @@ habitat_llm:
     # agent_1:
     #   gui_controlled: True
 world_model:
-  partial_obs: False
+  partial_obs: True
 habitat:
   # various config args to ensure the episode never ends
   environment:
@@ -65,6 +65,8 @@ hydra:
     dir: outputs/${hydra.job.name}/${now:%Y-%m-%d_%H-%M-%S}-${file_stem:${habitat.dataset.data_path}}
 
 habitat_hitl:
+  remove_visual_sensors: False
+  remove_gui_sensors: False
   window:
     title: "Rearrange"
     width: 1300
@@ -80,4 +82,3 @@ habitat_hitl:
   hide_humanoid_in_gui: True
   camera:
     first_person_mode: True
-  remove_visual_sensors: False
diff --git a/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml b/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml
index d7952a40ba..81c9def1c4 100644
--- a/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml
+++ b/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml
@@ -26,6 +26,8 @@ habitat_baselines:
     policy:
 
 habitat_hitl:
+  remove_visual_sensors: True
+  remove_gui_sensors: True
   window:
     title: "Rearrange"
     width: 1300
diff --git a/habitat-hitl/habitat_hitl/_internal/config_helper.py b/habitat-hitl/habitat_hitl/_internal/config_helper.py
index b293916291..34b4453084 100644
--- a/habitat-hitl/habitat_hitl/_internal/config_helper.py
+++ b/habitat-hitl/habitat_hitl/_internal/config_helper.py
@@ -71,10 +71,11 @@ def update_config(
                 )
 
             # avoid camera sensors for GUI-controlled agents
-            # gui_controlled_agent_config = get_agent_config(
-            #     sim_config, agent_id=gui_controlled_agent_index
-            # )
-            # gui_controlled_agent_config.sim_sensors.clear()
+            gui_controlled_agent_config = get_agent_config(
+                sim_config, agent_id=gui_controlled_agent_index
+            )
+            if config.habitat_hitl.remove_gui_sensors:
+                gui_controlled_agent_config.sim_sensors.clear()
 
             lab_sensor_names = ["has_finished_oracle_nav"]
             for lab_sensor_name in lab_sensor_names:

From e74a92ddb00ab520f5f10dc32c5d88e79a1d5991 Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Wed, 8 May 2024 16:52:56 -0400
Subject: [PATCH 10/88] delete sensors we do not need

---
 ...anguage_rearrange_multi_agent_llm_gui.yaml | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml b/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
index c9cec26eec..be2c88f020 100644
--- a/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
+++ b/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
@@ -73,11 +73,11 @@ habitat:
     obs_keys:
 # --- habitat-llm block
       # - agent_0_third_rgb
-      - agent_0_articulated_agent_arm_depth
-      - agent_0_articulated_agent_arm_rgb
+      # - agent_0_articulated_agent_arm_depth
+      # - agent_0_articulated_agent_arm_rgb
       - agent_0_articulated_agent_arm_panoptic
-      - agent_0_head_depth
-      - agent_0_head_rgb
+      # - agent_0_head_depth
+      # - agent_0_head_rgb
       - agent_0_relative_resting_position
       - agent_0_joint
       - agent_0_ee_pos
@@ -89,23 +89,23 @@ habitat:
       - agent_0_dynamic_obj_start_sensor
       - agent_0_goal_to_agent_gps_compass
       - agent_0_humanoid_detector_sensor
-      - agent_0_articulated_agent_jaw_rgb
-      - agent_0_articulated_agent_jaw_depth
+      # - agent_0_articulated_agent_jaw_rgb
+      # - agent_0_articulated_agent_jaw_depth
       - agent_0_articulated_agent_jaw_panoptic
 
       # - agent_1_third_rgb
-      - agent_1_articulated_agent_arm_depth
-      - agent_1_articulated_agent_arm_rgb
+      # - agent_1_articulated_agent_arm_depth
+      # - agent_1_articulated_agent_arm_rgb
       - agent_1_articulated_agent_arm_panoptic
-      - agent_1_head_depth
-      - agent_1_head_rgb
-      - agent_1_relative_resting_position
-      - agent_1_joint
-      - agent_1_ee_pos
+      # - agent_1_head_depth
+      # - agent_1_head_rgb
+      # - agent_1_relative_resting_position
+      # - agent_1_joint
+      # - agent_1_ee_pos
       - agent_1_is_holding
-      - agent_1_dynamic_obj_goal_sensor
-      - agent_1_dynamic_goal_to_agent_gps_compass
-      - agent_1_dynamic_obj_start_sensor
+      # - agent_1_dynamic_obj_goal_sensor
+      # - agent_1_dynamic_goal_to_agent_gps_compass
+      # - agent_1_dynamic_obj_start_sensor
 # --- habitat-llm block
   environment:
     max_episode_steps: 750  # this is 20000 in habitat-llm

From 8e912e4b554437d1ca2f37cf7f4c0032d59e3a99 Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Sun, 12 May 2024 18:07:10 -0400
Subject: [PATCH 11/88] multi-threading + partial obs

---
 .../lang_rearrange_llmspot_guihumanoid.yaml   |  6 ++
 ...anguage_rearrange_multi_agent_llm_gui.yaml |  5 +-
 .../environment/controllers/llm_controller.py | 71 ++++++++++++++++---
 3 files changed, 70 insertions(+), 12 deletions(-)

diff --git a/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml b/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
index 7715106bb8..592c357995 100644
--- a/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
+++ b/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
@@ -14,6 +14,12 @@ defaults:
   - override /llm@evaluation.agents.agent_0.planner.plan_config.llm: openai_chat
   - _self_
 
+evaluation:
+  agents:
+    agent_0:
+      planner:
+        plan_config:
+          replanning_threshold: 30
 habitat_llm:
   enable: True
     # agent_1:
diff --git a/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml b/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
index be2c88f020..bb8d81a4e0 100644
--- a/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
+++ b/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
@@ -96,9 +96,10 @@ habitat:
       # - agent_1_third_rgb
       # - agent_1_articulated_agent_arm_depth
       # - agent_1_articulated_agent_arm_rgb
-      - agent_1_articulated_agent_arm_panoptic
+      # - agent_1_articulated_agent_arm_panoptic
       # - agent_1_head_depth
-      # - agent_1_head_rgb
+      - agent_1_head_rgb
+      - agent_1_head_panoptic
       # - agent_1_relative_resting_position
       # - agent_1_joint
       # - agent_1_ee_pos
diff --git a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
index 4ba847a4e5..dde7cac879 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
@@ -1,3 +1,5 @@
+import logging
+import threading
 from typing import Any, Dict, Union
 
 import numpy as np
@@ -5,6 +7,7 @@
 from habitat_llm.agent.env import EnvironmentInterface
 from habitat_llm.planner.llm_planner import LLMPlanner
 from habitat_llm.utils import fix_config, setup_config
+from habitat_llm.utils.analysis import CodeTimer
 from hydra.utils import instantiate
 from omegaconf import DictConfig
 
@@ -33,6 +36,20 @@ def __init__(
         self._agent_idx = agent_idx
         # TODO: gather this from config
         self._agent_action_length = 28
+        self._thread: Union[None, threading.Thread] = None
+        self._low_level_actions: Union[None, dict, np.ndarray] = {}
+        self._iter = 0
+        self._skip_iters = 0
+        logging.basicConfig(
+            filename="/home/priyamp/hitl/act_timing.log",
+            filemode="a",
+            format="%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s",
+            datefmt="%H:%M:%S",
+            level=logging.DEBUG,
+            force=True,
+        )
+        self._analysis_logger = logging.getLogger("LLMController")
+        self._analysis_logger.debug("LLMController initialized")
 
         with habitat.config.read_write(self._config):
             fix_config(self._config)
@@ -82,23 +99,32 @@ def on_environment_reset(self):
         # NOTE: the following ONLY resets self._test_recurrent_hidden_states,
         # self._prev_actions and self._not_done_masks
         # super().on_environment_reset()
-        self.planner.reset()
         self.environment_interface.reset_environment()
+        self.planner.reset()
+        if self._thread is not None:
+            self._thread.join()
+            self._thread = None  # noqa: F841
+            self._low_level_actions = {}
 
         self.current_instruction = (
             self.environment_interface.hab_env.current_episode.instruction
         )
+        print(f"Instruction: {self.current_instruction}")
+        self._analysis_logger.debug(
+            f"------\nInstruction: {self.current_instruction}"
+        )
+        self._analysis_logger.debug("NEW EPISODE STARTING")
+        self._iter = 0
 
-    def act(self, observations, *args, **kwargs):
+    def _act(self, observations, *args, **kwargs):
         # NOTE: update the world state to reflect the new observations
         self.environment_interface.update_world_state(observations)
-
         # NOTE: this is where the LLM magic happens, the agent is given the observations
         # and it returns the actions for the agent
         # TODO: looping needed here until a physical low-level-action is returned
-        low_level_actions: Union[dict, np.ndarray] = {}
+        # low_level_actions: Union[dict, np.ndarray] = {}
         (
-            low_level_actions,
+            self._low_level_actions,
             planner_info,
             task_done,
         ) = self.planner.get_next_action(
@@ -107,10 +133,35 @@ def act(self, observations, *args, **kwargs):
             self.environment_interface.world_graph,
             verbose=True,
         )
-        if low_level_actions:
-            low_level_actions = low_level_actions[str(self._agent_idx)]
-            # NOTE: truncating the action here, as this includes both Spot and Human actions
-            low_level_actions = low_level_actions[:-248]
-        else:
+        if task_done:
+            print("Task Done")
+        return
+
+    def act(self, observations, *args, **kwargs):
+        if self._iter < self._skip_iters:
+            self._iter += 1
+            return np.zeros(self._agent_action_length)
+        with CodeTimer("LLMController.act", self._analysis_logger):
             low_level_actions = np.zeros(self._agent_action_length)
+            if self._thread is None:
+                self._thread = threading.Thread(
+                    target=self._act, args=(observations,), kwargs=kwargs
+                )
+                self._thread.start()
+            else:
+                if self._thread.is_alive():
+                    pass
+                else:
+                    self._thread = None
+                    if self._low_level_actions != {}:
+                        low_level_actions = self._low_level_actions[
+                            str(self._agent_idx)
+                        ][:-248]
+
+        # if low_level_actions:
+        #     low_level_actions = low_level_actions[str(self._agent_idx)]
+        #     # NOTE: truncating the action here, as this includes both Spot and Human actions
+        #     low_level_actions = low_level_actions[:-248]
+        # else:
+        #     low_level_actions = np.zeros(self._agent_action_length)
         return low_level_actions

From f62a4e938d09ce3e26733a1b258f8a876dd7ffe7 Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Wed, 15 May 2024 17:10:06 -0400
Subject: [PATCH 12/88] speeding up the humanoid

---
 ...anguage_rearrange_multi_agent_llm_gui.yaml | 10 +--
 .../humanoid_rearrange_controller.py          | 83 ++++++++++---------
 2 files changed, 50 insertions(+), 43 deletions(-)

diff --git a/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml b/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
index bb8d81a4e0..7ca5939268 100644
--- a/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
+++ b/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
@@ -44,7 +44,7 @@ defaults:
   - _self_
 
 habitat:
-  # task:
+  task:
     # type: RearrangeEmptyTask-v0
     # reward_measure: num_steps
     # success_measure: num_steps
@@ -57,10 +57,10 @@ habitat:
     # task_spec_base_path: benchmark/multi_agent/
     # task_spec: pddl/multi_agent_tidy_house
     # pddl_domain_def: fp
-    # actions:
-    #   agent_0_base_velocity:
-    #     lin_speed: 40.0
-    #     ang_speed: 20.0
+    actions:
+      agent_1_base_velocity:
+        lin_speed: 10.0
+        ang_speed: 300
 
     # robot_at_thresh: 3.0
     # lab_sensors:
diff --git a/habitat-lab/habitat/articulated_agent_controllers/humanoid_rearrange_controller.py b/habitat-lab/habitat/articulated_agent_controllers/humanoid_rearrange_controller.py
index f57fb3c969..0f754ffc4e 100644
--- a/habitat-lab/habitat/articulated_agent_controllers/humanoid_rearrange_controller.py
+++ b/habitat-lab/habitat/articulated_agent_controllers/humanoid_rearrange_controller.py
@@ -76,6 +76,7 @@ def __init__(
 
         self.prev_orientation = None
         self.walk_mocap_frame = 0
+        self.meters_per_step = 0
 
         self.hand_processed_data = {}
         self._hand_names = ["left_hand", "right_hand"]
@@ -109,8 +110,10 @@ def __init__(
     def set_framerate_for_linspeed(self, lin_speed, ang_speed, ctrl_freq):
         """Set the speed of the humanoid according to the simulator speed"""
         seconds_per_step = 1.0 / ctrl_freq
-        meters_per_step = lin_speed * seconds_per_step
-        frames_per_step = meters_per_step / self.dist_per_step_size
+        # meters_per_step = lin_speed * seconds_per_step
+        # frames_per_step = meters_per_step / self.dist_per_step_size
+        self.meters_per_step = lin_speed * seconds_per_step
+        frames_per_step = self.meters_per_step / self.dist_per_step_size
         self.motion_fps = self.walk_motion.fps / frames_per_step
         rotate_amount = ang_speed * seconds_per_step
         rotate_amount = rotate_amount * 180.0 / np.pi
@@ -203,28 +206,28 @@ def calculate_walk_pose(
             step_size = 0
 
         # Advance mocap frame
-        prev_mocap_frame = self.walk_mocap_frame
+        # prev_mocap_frame = self.walk_mocap_frame
         self.walk_mocap_frame = (
             self.walk_mocap_frame + step_size
         ) % self.walk_motion.num_poses
 
         # Compute how much distance we covered in this motion
-        prev_cum_distance_covered = self.walk_motion.displacement[
-            prev_mocap_frame
-        ]
-        new_cum_distance_covered = self.walk_motion.displacement[
-            self.walk_mocap_frame
-        ]
-
-        offset = 0
-        if self.walk_mocap_frame < prev_mocap_frame:
-            # We looped over the motion
-            offset = self.walk_motion.displacement[-1]
-
-        distance_covered = max(
-            0, new_cum_distance_covered + offset - prev_cum_distance_covered
-        )
-        dist_diff = min(distance_to_walk, distance_covered)
+        # prev_cum_distance_covered = self.walk_motion.displacement[
+        #     prev_mocap_frame
+        # ]
+        # new_cum_distance_covered = self.walk_motion.displacement[
+        #     self.walk_mocap_frame
+        # ]
+
+        # offset = 0
+        # if self.walk_mocap_frame < prev_mocap_frame:
+        #     # We looped over the motion
+        #     offset = self.walk_motion.displacement[-1]
+
+        # distance_covered = max(
+        #     0, new_cum_distance_covered + offset - prev_cum_distance_covered
+        # )
+        # dist_diff = min(distance_to_walk, distance_covered)
 
         new_pose = self.walk_motion.poses[self.walk_mocap_frame]
         joint_pose, obj_transform = new_pose.joints, new_pose.root_transform
@@ -252,7 +255,9 @@ def calculate_walk_pose(
         # The base_transform here is independent of transforms caused by the current
         # motion pose.
         obj_transform_base = look_at_path_T
-        forward_V_dist = forward_V * dist_diff * distance_multiplier
+        # HACK FROM MIKAEL
+        # forward_V_dist = forward_V * dist_diff * distance_multiplier
+        forward_V_dist = forward_V * self.meters_per_step * distance_multiplier
         obj_transform_base.translation += forward_V_dist
 
         rot_offset = mn.Matrix4.rotation(
@@ -375,28 +380,28 @@ def calculate_walk_pose_directional(
             step_size = 0
 
         # Advance mocap frame
-        prev_mocap_frame = self.walk_mocap_frame
+        # prev_mocap_frame = self.walk_mocap_frame
         self.walk_mocap_frame = (
             self.walk_mocap_frame + step_size
         ) % self.walk_motion.num_poses
 
         # Compute how much distance we covered in this motion
-        prev_cum_distance_covered = self.walk_motion.displacement[
-            prev_mocap_frame
-        ]
-        new_cum_distance_covered = self.walk_motion.displacement[
-            self.walk_mocap_frame
-        ]
-
-        offset = 0
-        if self.walk_mocap_frame < prev_mocap_frame:
-            # We looped over the motion
-            offset = self.walk_motion.displacement[-1]
-
-        distance_covered = max(
-            0, new_cum_distance_covered + offset - prev_cum_distance_covered
-        )
-        dist_diff = min(distance_to_walk, distance_covered)
+        # prev_cum_distance_covered = self.walk_motion.displacement[
+        #     prev_mocap_frame
+        # ]
+        # new_cum_distance_covered = self.walk_motion.displacement[
+        #     self.walk_mocap_frame
+        # ]
+
+        # offset = 0
+        # if self.walk_mocap_frame < prev_mocap_frame:
+        #     # We looped over the motion
+        #     offset = self.walk_motion.displacement[-1]
+
+        # distance_covered = max(
+        #     0, new_cum_distance_covered + offset - prev_cum_distance_covered
+        # )
+        # dist_diff = min(distance_to_walk, distance_covered)
 
         new_pose = self.walk_motion.poses[self.walk_mocap_frame]
         joint_pose, obj_transform = new_pose.joints, new_pose.root_transform
@@ -427,7 +432,9 @@ def calculate_walk_pose_directional(
         # The base_transform here is independent of transforms caused by the current
         # motion pose.
         obj_transform_base = look_at_path_T
-        forward_V_dist = forward_V * dist_diff * distance_multiplier
+        # HACK FROM MIKAEL
+        # forward_V_dist = forward_V * dist_diff * distance_multiplier
+        forward_V_dist = forward_V * self.meters_per_step * distance_multiplier
         obj_transform_base.translation += forward_V_dist
 
         rot_offset = mn.Matrix4.rotation(

From c71b55fb5047b6321724767f949ec0261b2a9649 Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Wed, 15 May 2024 17:10:42 -0400
Subject: [PATCH 13/88] logging

---
 .../habitat_hitl/_internal/hitl_driver.py       | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/habitat-hitl/habitat_hitl/_internal/hitl_driver.py b/habitat-hitl/habitat_hitl/_internal/hitl_driver.py
index 4ef175c5b5..527b839897 100644
--- a/habitat-hitl/habitat_hitl/_internal/hitl_driver.py
+++ b/habitat-hitl/habitat_hitl/_internal/hitl_driver.py
@@ -10,12 +10,14 @@
 
 import abc
 import json
+import logging
 from datetime import datetime
 from functools import wraps
 from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional
 
 import magnum as mn
 import numpy as np
+from habitat_llm.utils.analysis import CodeTimer
 
 import habitat
 import habitat.gym
@@ -93,6 +95,16 @@ def __init__(
         self._play_episodes_filter_str = self._hitl_config.episodes_filter
         self._num_recorded_episodes = 0
         self._gui_input = gui_input
+        logging.basicConfig(
+            filename="/home/priyamp/hitl/app_timing.log",
+            filemode="a",
+            format="%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s",
+            datefmt="%H:%M:%S",
+            level=logging.DEBUG,
+            force=True,
+        )
+        self._logger = logging.getLogger("HitlDriver")
+        self._logger.debug("HitlDriver initialized")
 
         with habitat.config.read_write(config):  # type: ignore
             # needed so we can provide keyframes to GuiApplication
@@ -319,8 +331,9 @@ def _compute_action_and_step_env(self):
         if self._hitl_config.disable_policies_and_stepping:
             return
 
-        action = self.ctrl_helper.update(self._obs)
-        self._env_step(action)
+        with CodeTimer("compute_action_and_step_env", self._logger):
+            action = self.ctrl_helper.update(self._obs)
+            self._env_step(action)
 
         if self._save_episode_record:
             self._record_action(action)

From 05ac703a6294963a20e2bdcfd7ddef1a53d4dd54 Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Fri, 17 May 2024 16:26:51 -0400
Subject: [PATCH 14/88] moving habitat-llm stuff to dedicated file/module

---
 .../collaboration_episode_loader.py           | 10 +++++++++
 examples/hitl/rearrange_v2/rearrange_v2.py    | 22 +------------------
 2 files changed, 11 insertions(+), 21 deletions(-)

diff --git a/examples/hitl/rearrange_v2/collaboration_episode_loader.py b/examples/hitl/rearrange_v2/collaboration_episode_loader.py
index c4cf6714fb..b2e6e6c33c 100644
--- a/examples/hitl/rearrange_v2/collaboration_episode_loader.py
+++ b/examples/hitl/rearrange_v2/collaboration_episode_loader.py
@@ -10,6 +10,11 @@
 
 try:
     from habitat_llm.agent.env import dataset  # noqa: F401
+    from habitat_llm.agent.env import (
+        register_actions,
+        register_measures,
+        register_sensors,
+    )
     from habitat_llm.agent.env.dataset import CollaborationEpisode
 
     collaboration_episode_enabled = True
@@ -38,6 +43,11 @@ def load_collaboration_episode_data(
 
         return episode_data
 
+    def register_habitat_llm_extensions(config):
+        register_actions(config)
+        register_measures(config)
+        register_sensors(config)
+
 else:
 
     def load_collaboration_episode_data(
diff --git a/examples/hitl/rearrange_v2/rearrange_v2.py b/examples/hitl/rearrange_v2/rearrange_v2.py
index 0cb737f2da..f788186ba7 100644
--- a/examples/hitl/rearrange_v2/rearrange_v2.py
+++ b/examples/hitl/rearrange_v2/rearrange_v2.py
@@ -44,26 +44,6 @@
 from habitat_hitl.environment.hablab_utils import get_agent_art_obj_transform
 from habitat_sim.utils.common import quat_from_magnum, quat_to_coeffs
 
-try:
-    from habitat_llm.agent.env import (
-        register_actions,
-        register_measures,
-        register_sensors,
-    )
-except ImportError:
-    print("Habitat-LLM not installed. Skipping import.")
-
-
-# -------- HABITAT-LLM SPECIFIC CODE --------
-
-
-def register_habitat_llm_extensions(config):
-    register_actions(config)
-    register_measures(config)
-    register_sensors(config)
-
-
-# -------- HABITAT-LLM SPECIFIC CODE --------
 UP = mn.Vector3(0, 1, 0)
 
 
@@ -437,7 +417,7 @@ def sim_update(self, dt: float, post_sim_update_dict):
 )
 def main(config):
     if hasattr(config, "habitat_llm") and config.habitat_llm.enable:
-        register_habitat_llm_extensions(config)
+        collaboration_episode_loader.register_habitat_llm_extensions(config)
     hitl_main(
         config,
         lambda app_service: AppStateRearrangeV2(app_service),

From 998dd6d82b975734155883b280d3ae13346cfa16 Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Fri, 17 May 2024 16:27:43 -0400
Subject: [PATCH 15/88] +header,+-logging,-llm util

---
 .../environment/controllers/llm_controller.py | 81 ++++++++++---------
 1 file changed, 42 insertions(+), 39 deletions(-)

diff --git a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
index dde7cac879..6607e8ecea 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
@@ -1,3 +1,12 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Meta Platforms, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# This controller assumes you are using a habitat-llm Agent downstream
+# code for interface followed by a habitat-llm Agent will be released in the future
+
 import logging
 import threading
 from typing import Any, Dict, Union
@@ -7,7 +16,6 @@
 from habitat_llm.agent.env import EnvironmentInterface
 from habitat_llm.planner.llm_planner import LLMPlanner
 from habitat_llm.utils import fix_config, setup_config
-from habitat_llm.utils.analysis import CodeTimer
 from hydra.utils import instantiate
 from omegaconf import DictConfig
 
@@ -28,6 +36,7 @@ def __init__(
         is_multi_agent: bool,
         config: DictConfig,
         gym_habitat_env: GymHabitatEnv,
+        log_to_file: bool = False,
     ):
         self._config = config
         self._is_multi_agent = is_multi_agent
@@ -38,18 +47,23 @@ def __init__(
         self._agent_action_length = 28
         self._thread: Union[None, threading.Thread] = None
         self._low_level_actions: Union[None, dict, np.ndarray] = {}
+        self._task_done = False
         self._iter = 0
         self._skip_iters = 0
-        logging.basicConfig(
-            filename="/home/priyamp/hitl/act_timing.log",
-            filemode="a",
-            format="%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s",
-            datefmt="%H:%M:%S",
-            level=logging.DEBUG,
-            force=True,
-        )
-        self._analysis_logger = logging.getLogger("LLMController")
-        self._analysis_logger.debug("LLMController initialized")
+        if log_to_file:
+            import datetime
+
+            now = datetime.datetime.now()
+            logging.basicConfig(
+                filename=f"./act_timing_{now:%Y-%m-%d}_{now:%H-%M}.log",
+                filemode="a",
+                format="%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s",
+                datefmt="%H:%M:%S",
+                level=logging.DEBUG,
+                force=True,
+            )
+            self._analysis_logger = logging.getLogger("LLMController")
+            self._analysis_logger.debug("LLMController initialized")
 
         with habitat.config.read_write(self._config):
             fix_config(self._config)
@@ -85,8 +99,6 @@ def initialize_agents(self, agent_configs):
             agent = Agent(
                 agent_conf.uid, agent_conf.config, self.environment_interface
             )
-
-            # Make sure that its unique by adding to the set
             agents.append(agent)
         return agents
 
@@ -126,42 +138,33 @@ def _act(self, observations, *args, **kwargs):
         (
             self._low_level_actions,
             planner_info,
-            task_done,
+            self._task_done,
         ) = self.planner.get_next_action(
             self.current_instruction,
             observations,
             self.environment_interface.world_graph,
             verbose=True,
         )
-        if task_done:
-            print("Task Done")
         return
 
     def act(self, observations, *args, **kwargs):
-        if self._iter < self._skip_iters:
+        if self._iter < self._skip_iters or self._task_done:
             self._iter += 1
             return np.zeros(self._agent_action_length)
-        with CodeTimer("LLMController.act", self._analysis_logger):
-            low_level_actions = np.zeros(self._agent_action_length)
-            if self._thread is None:
-                self._thread = threading.Thread(
-                    target=self._act, args=(observations,), kwargs=kwargs
-                )
-                self._thread.start()
+        low_level_actions = np.zeros(self._agent_action_length)
+        if self._thread is None:
+            self._thread = threading.Thread(
+                target=self._act, args=(observations,), kwargs=kwargs
+            )
+            self._thread.start()
+        else:
+            if self._thread.is_alive():
+                pass
             else:
-                if self._thread.is_alive():
-                    pass
-                else:
-                    self._thread = None
-                    if self._low_level_actions != {}:
-                        low_level_actions = self._low_level_actions[
-                            str(self._agent_idx)
-                        ][:-248]
-
-        # if low_level_actions:
-        #     low_level_actions = low_level_actions[str(self._agent_idx)]
-        #     # NOTE: truncating the action here, as this includes both Spot and Human actions
-        #     low_level_actions = low_level_actions[:-248]
-        # else:
-        #     low_level_actions = np.zeros(self._agent_action_length)
+                self._thread = None
+                if self._low_level_actions != {}:
+                    low_level_actions = self._low_level_actions[
+                        str(self._agent_idx)
+                    ][:-248]
+
         return low_level_actions

From ff0fb40244dad35ba1345d969898788e2575a265 Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Fri, 17 May 2024 16:36:15 -0400
Subject: [PATCH 16/88] remove unused config

---
 .../lang_rearrange_spot_humanoid_visual.yaml  | 43 -------------------
 1 file changed, 43 deletions(-)
 delete mode 100644 examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid_visual.yaml

diff --git a/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid_visual.yaml b/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid_visual.yaml
deleted file mode 100644
index 18da93ac34..0000000000
--- a/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid_visual.yaml
+++ /dev/null
@@ -1,43 +0,0 @@
-# @package _global_
-
-defaults:
-  - language_rearrange_multi_agent
-  - hitl_defaults
-  - _self_
-
-habitat:
-  # various config args to ensure the episode never ends
-  environment:
-    max_episode_steps: 0
-    iterator_options:
-      # For the demo, we want to showcase the episodes in the specified order
-      shuffle: False
-  dataset:
-    data_path: "data/prerelease_1k_v2.json.gz"
-    scenes_dir: "data/fpss"
-
-habitat_baselines:
-  # todo: document these choices
-  eval:
-    should_load_ckpt: False
-  rl:
-    agent:
-      num_pool_agents_per_type: [1, 1]
-    policy:
-
-habitat_hitl:
-  window:
-    title: "Rearrange"
-    width: 1300
-    height: 1000
-  gui_controlled_agents:
-    - agent_index: 0
-      lin_speed: 10.0
-      ang_speed: 300
-    - agent_index: 1
-      lin_speed: 10.0
-      ang_speed: 300
-  hide_humanoid_in_gui: True
-  camera:
-    first_person_mode: True
-  remove_visual_sensors: False

From a87b1ba622c8bd18e56b1135f5938d98908e4137 Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Fri, 17 May 2024 16:36:58 -0400
Subject: [PATCH 17/88] optional logging, -llb-deps

---
 .../habitat_hitl/_internal/hitl_driver.py     | 36 ++++++++++---------
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/habitat-hitl/habitat_hitl/_internal/hitl_driver.py b/habitat-hitl/habitat_hitl/_internal/hitl_driver.py
index bef3e5dd22..c60057fef3 100644
--- a/habitat-hitl/habitat_hitl/_internal/hitl_driver.py
+++ b/habitat-hitl/habitat_hitl/_internal/hitl_driver.py
@@ -17,7 +17,6 @@
 
 import magnum as mn
 import numpy as np
-from habitat_llm.utils.analysis import CodeTimer
 
 import habitat
 import habitat.gym
@@ -87,6 +86,7 @@ def __init__(
         debug_line_drawer: Optional[DebugLineRender],
         text_drawer: AbstractTextDrawer,
         create_app_state_lambda: Callable,
+        log_to_file: bool = False,
     ):
         if "habitat_hitl" not in config:
             raise RuntimeError(
@@ -97,16 +97,21 @@ def __init__(
         self._play_episodes_filter_str = self._hitl_config.episodes_filter
         self._num_recorded_episodes = 0
         self._gui_input = gui_input
-        logging.basicConfig(
-            filename="/home/priyamp/hitl/app_timing.log",
-            filemode="a",
-            format="%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s",
-            datefmt="%H:%M:%S",
-            level=logging.DEBUG,
-            force=True,
-        )
-        self._logger = logging.getLogger("HitlDriver")
-        self._logger.debug("HitlDriver initialized")
+        self._logger = None
+        if log_to_file:
+            import datetime
+
+            now = datetime.datetime.now()
+            logging.basicConfig(
+                filename=f"./hitl_driver_timing_{now:%Y-%m-%d}_{now:%H-%M}.log",
+                filemode="a",
+                format="%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s",
+                datefmt="%H:%M:%S",
+                level=logging.DEBUG,
+                force=True,
+            )
+            self._logger = logging.getLogger("HitlDriver")
+            self._logger.debug("HitlDriver initialized")
 
         with habitat.config.read_write(config):  # type: ignore
             # needed so we can provide keyframes to GuiApplication
@@ -337,9 +342,8 @@ def _compute_action_and_step_env(self):
         if self._hitl_config.disable_policies_and_stepping:
             return
 
-        with CodeTimer("compute_action_and_step_env", self._logger):
-            action = self.ctrl_helper.update(self._obs)
-            self._env_step(action)
+        action = self.ctrl_helper.update(self._obs)
+        self._env_step(action)
 
         if self._save_episode_record:
             self._record_action(action)
@@ -385,8 +389,8 @@ def _reset_environment(self):
             self._remote_client_state.clear_history()
 
         # todo: fix duplicate calls to self.ctrl_helper.on_environment_reset() here
-        # if self.ctrl_helper:
-        #     self.ctrl_helper.on_environment_reset()
+        if self.ctrl_helper:
+            self.ctrl_helper.on_environment_reset()
 
         if self._save_episode_record:
             self._reset_episode_recorder()

From a0ab1c2be1a0b3254e232bbc5483b6a98b98ae82 Mon Sep 17 00:00:00 2001
From: Alexander Clegg <alexanderwclegg@gmail.com>
Date: Thu, 16 May 2024 15:53:55 -0700
Subject: [PATCH 18/88] Disable new clone protection for git lfs (#1961)

---
 .circleci/config.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 1e51bc1a61..7552ea33c7 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -203,6 +203,9 @@ jobs:
       - run:
           name: Download test data
           command: |
+              # Disable clone protection for git lfs
+              export GIT_CLONE_PROTECTION_ACTIVE=false
+
               sudo apt install git-lfs
               export PATH=$HOME/miniconda/bin:/usr/local/cuda/bin:$PATH
               . activate habitat

From 48fdfdbeaba83288b1644c31ba5e227baaae279b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mika=C3=ABl=20Dallaire=20C=C3=B4t=C3=A9?=
 <110583667+0mdc@users.noreply.github.com>
Date: Fri, 17 May 2024 19:42:27 +0200
Subject: [PATCH 19/88] HITL - Make client loading state available to
 applications. (#1955)

* Make client loading state available to applications.

* Simplify code.
---
 habitat-hitl/habitat_hitl/core/remote_client_state.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/habitat-hitl/habitat_hitl/core/remote_client_state.py b/habitat-hitl/habitat_hitl/core/remote_client_state.py
index d32cdc9ee0..5809972bda 100644
--- a/habitat-hitl/habitat_hitl/core/remote_client_state.py
+++ b/habitat-hitl/habitat_hitl/core/remote_client_state.py
@@ -57,6 +57,8 @@ def __init__(
             self._client_state_history.append([])
             self._receive_rate_trackers.append(AverageRateTracker(2.0))
 
+        self._client_loading: List[bool] = [False] * users.max_user_count
+
         # temp map VR button to key
         self._button_map = {
             0: GuiInput.KeyNS.ZERO,
@@ -81,6 +83,10 @@ def get_gui_inputs(self) -> List[GuiInput]:
         """Get a list of all GuiInputs indexed by user index."""
         return self._gui_inputs
 
+    def is_user_loading(self, user_index: int) -> bool:
+        """Return true if the specified user's client is in a loading state."""
+        return self._client_loading[user_index]
+
     def bind_gui_input(self, gui_input: GuiInput, user_index: int) -> None:
         """
         Bind the specified GuiInput to a specified user, allowing the associated remote client to control it.
@@ -282,6 +288,11 @@ def _update_input_state(
             # frame and other ways that keyHeld, keyDown, and keyUp can be inconsistent.
             last_client_state = client_states[-1]
 
+            # Loading states.
+            self._client_loading[user_index] = last_client_state.get(
+                "isLoading", False
+            )
+
             input_json = (
                 last_client_state["input"]
                 if "input" in last_client_state

From 0963a59ae62eeefcd7bf8083283f47c3d6dec8d0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mika=C3=ABl=20Dallaire=20C=C3=B4t=C3=A9?=
 <110583667+0mdc@users.noreply.github.com>
Date: Fri, 17 May 2024 19:43:14 +0200
Subject: [PATCH 20/88] Add buttons and modal dialog boxes. (#1956)

---
 .../core/client_message_manager.py            | 42 +++++++++++++++++++
 .../habitat_hitl/core/remote_client_state.py  | 19 ++++++++-
 2 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/habitat-hitl/habitat_hitl/core/client_message_manager.py b/habitat-hitl/habitat_hitl/core/client_message_manager.py
index 3daddd3c8c..2a84716587 100644
--- a/habitat-hitl/habitat_hitl/core/client_message_manager.py
+++ b/habitat-hitl/habitat_hitl/core/client_message_manager.py
@@ -4,6 +4,7 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 
+from dataclasses import dataclass
 from typing import Final, List, Optional, Union
 
 import magnum as mn
@@ -14,6 +15,19 @@
 DEFAULT_NORMAL: Final[List[float]] = [0.0, 1.0, 0.0]
 
 
+# TODO: Move to another file.
+@dataclass
+class UIButton:
+    """
+    Networked UI button. Use RemoteClientState.ui_button_pressed() to retrieve state.
+    """
+
+    def __init__(self, button_id: str, text: str, enabled: bool):
+        self.button_id = button_id
+        self.text = text
+        self.enabled = enabled
+
+
 class ClientMessageManager:
     r"""
     Extends gfx-replay keyframes to include server messages to be interpreted by the clients.
@@ -144,6 +158,34 @@ def add_text(
                 {"text": text, "position": [pos[0], pos[1]]}
             )
 
+    def show_modal_dialogue_box(
+        self,
+        title: str,
+        text: str,
+        buttons: List[UIButton],
+        destination_mask: Mask = Mask.ALL,
+    ):
+        r"""
+        Show a modal dialog box with buttons.
+        There can only be one modal dialog box at a time.
+        """
+        for user_index in self._users.indices(destination_mask):
+            message = self._messages[user_index]
+
+            message["dialog"] = {
+                "title": title,
+                "text": text,
+                "buttons": [],
+            }
+            for button in buttons:
+                message["dialog"]["buttons"].append(
+                    {
+                        "id": button.button_id,
+                        "text": button.text,
+                        "enabled": button.enabled,
+                    }
+                )
+
     def change_humanoid_position(
         self, pos: List[float], destination_mask: Mask = Mask.ALL
     ) -> None:
diff --git a/habitat-hitl/habitat_hitl/core/remote_client_state.py b/habitat-hitl/habitat_hitl/core/remote_client_state.py
index 5809972bda..3f66b682c3 100644
--- a/habitat-hitl/habitat_hitl/core/remote_client_state.py
+++ b/habitat-hitl/habitat_hitl/core/remote_client_state.py
@@ -5,7 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import math
-from typing import Any, List, Optional, Tuple
+from typing import Any, List, Optional, Set, Tuple
 
 import magnum as mn
 
@@ -49,6 +49,9 @@ def __init__(
         self._on_client_connected = Event()
         self._on_client_disconnected = Event()
 
+        # TODO: Handle UI in a different class.
+        self._pressed_ui_buttons: List[Set[str]] = []
+
         self._gui_inputs: List[GuiInput] = []
         self._client_state_history: List[List[ClientState]] = []
         self._receive_rate_trackers: List[AverageRateTracker] = []
@@ -56,6 +59,7 @@ def __init__(
             self._gui_inputs.append(GuiInput())
             self._client_state_history.append([])
             self._receive_rate_trackers.append(AverageRateTracker(2.0))
+            self._pressed_ui_buttons.append(set())
 
         self._client_loading: List[bool] = [False] * users.max_user_count
 
@@ -95,6 +99,9 @@ def bind_gui_input(self, gui_input: GuiInput, user_index: int) -> None:
         assert user_index < len(self._gui_inputs)
         self._gui_inputs[user_index] = gui_input
 
+    def ui_button_pressed(self, user_index: int, button_id: str) -> bool:
+        return button_id in self._pressed_ui_buttons[user_index]
+
     def get_history_length(self) -> int:
         """Length of client state history preserved. Anything beyond this horizon is discarded."""
         return 4
@@ -217,13 +224,19 @@ def _update_input_state(
         if len(all_client_states) == 0 or len(self._gui_inputs) == 0:
             return
 
-        # Gather all recent keyDown and keyUp events
+        # Gather all input events.
         for user_index in range(len(all_client_states)):
             client_states = all_client_states[user_index]
             if len(client_states) == 0:
                 continue
             gui_input = self._gui_inputs[user_index]
             for client_state in client_states:
+                # UI element events.
+                ui = client_state.get("ui", None)
+                if ui is not None:
+                    for button in ui.get("buttonsPressed", []):
+                        self._pressed_ui_buttons[user_index].add(button)
+
                 input_json = (
                     client_state["input"] if "input" in client_state else None
                 )
@@ -458,11 +471,13 @@ def get_new_connection_records(self) -> List[ConnectionRecord]:
     def on_frame_end(self) -> None:
         for user_index in self._users.indices(Mask.ALL):
             self._gui_inputs[user_index].on_frame_end()
+            self._pressed_ui_buttons[user_index].clear()
         self._new_connection_records = None
 
     def clear_history(self, user_mask=Mask.ALL) -> None:
         for user_index in self._users.indices(user_mask):
             self._client_state_history[user_index].clear()
+            self._pressed_ui_buttons[user_index].clear()
 
     def kick(self, user_mask: Mask) -> None:
         """

From 4e8650c87544128062fdc2fdce4be210724d9032 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mika=C3=ABl=20Dallaire=20C=C3=B4t=C3=A9?=
 <110583667+0mdc@users.noreply.github.com>
Date: Fri, 17 May 2024 19:43:46 +0200
Subject: [PATCH 21/88] Add configuration to enable new connections by default.
 (#1960)

---
 .../habitat_hitl/_internal/networking/interprocess_record.py | 5 ++++-
 habitat-hitl/habitat_hitl/config/hitl_defaults.yaml          | 3 +++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/habitat-hitl/habitat_hitl/_internal/networking/interprocess_record.py b/habitat-hitl/habitat_hitl/_internal/networking/interprocess_record.py
index 8d3e1fb449..8bc6fdefec 100644
--- a/habitat-hitl/habitat_hitl/_internal/networking/interprocess_record.py
+++ b/habitat-hitl/habitat_hitl/_internal/networking/interprocess_record.py
@@ -27,7 +27,10 @@ def __init__(self, networking_config) -> None:
         self._connection_record_queue: Queue[ConnectionRecord] = Queue()
         self._disconnection_record_queue: Queue[DisconnectionRecord] = Queue()
         self._kick_signal_queue: Queue[int] = Queue()
-        self._allow_new_connections = Value("b", False)
+
+        self._allow_new_connections = Value(
+            "b", networking_config.enable_connections_by_default
+        )
 
     def enable_new_connections(self, enabled: bool):
         """Signal the networking process whether it should accept new connections."""
diff --git a/habitat-hitl/habitat_hitl/config/hitl_defaults.yaml b/habitat-hitl/habitat_hitl/config/hitl_defaults.yaml
index e26fe215d6..c532a6e943 100644
--- a/habitat-hitl/habitat_hitl/config/hitl_defaults.yaml
+++ b/habitat-hitl/habitat_hitl/config/hitl_defaults.yaml
@@ -23,6 +23,9 @@ habitat_hitl:
     # Number of accepted concurrent clients (multiplayer if higher than one). All connections beyond this count will be rejected. Beware that this can be different from the agent count.
     max_client_count: 1
 
+    # Accept incoming connections by default. If disabled, connections must be activated using InterprocessRecord.enable_new_connections().
+    enable_connections_by_default: True
+
     # We'll listen for incoming client connections at this port.
     port: 8888
 

From 8a653c2eb078eb49121d6dc9e42a052fa44433fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mika=C3=ABl=20Dallaire=20C=C3=B4t=C3=A9?=
 <110583667+0mdc@users.noreply.github.com>
Date: Fri, 17 May 2024 19:58:27 +0200
Subject: [PATCH 22/88] Add textboxes. (#1957)

---
 .../core/client_message_manager.py            | 19 +++++++++++++++++++
 .../habitat_hitl/core/remote_client_state.py  | 12 +++++++++++-
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/habitat-hitl/habitat_hitl/core/client_message_manager.py b/habitat-hitl/habitat_hitl/core/client_message_manager.py
index 2a84716587..b717fa61ad 100644
--- a/habitat-hitl/habitat_hitl/core/client_message_manager.py
+++ b/habitat-hitl/habitat_hitl/core/client_message_manager.py
@@ -28,6 +28,18 @@ def __init__(self, button_id: str, text: str, enabled: bool):
         self.enabled = enabled
 
 
+@dataclass
+class UITextbox:
+    """
+    Networked UI textbox. Use RemoteClientState.get_textbox_content() to retrieve content.
+    """
+
+    def __init__(self, textbox_id: str, text: str, enabled: bool):
+        self.textbox_id = textbox_id
+        self.text = text
+        self.enabled = enabled
+
+
 class ClientMessageManager:
     r"""
     Extends gfx-replay keyframes to include server messages to be interpreted by the clients.
@@ -163,6 +175,7 @@ def show_modal_dialogue_box(
         title: str,
         text: str,
         buttons: List[UIButton],
+        textbox: Optional[UITextbox] = None,
         destination_mask: Mask = Mask.ALL,
     ):
         r"""
@@ -177,6 +190,12 @@ def show_modal_dialogue_box(
                 "text": text,
                 "buttons": [],
             }
+            if textbox is not None:
+                message["dialog"]["textbox"] = {
+                    "id": textbox.textbox_id,
+                    "text": textbox.text,
+                    "enabled": textbox.enabled,
+                }
             for button in buttons:
                 message["dialog"]["buttons"].append(
                     {
diff --git a/habitat-hitl/habitat_hitl/core/remote_client_state.py b/habitat-hitl/habitat_hitl/core/remote_client_state.py
index 3f66b682c3..6b28a7c7f7 100644
--- a/habitat-hitl/habitat_hitl/core/remote_client_state.py
+++ b/habitat-hitl/habitat_hitl/core/remote_client_state.py
@@ -5,7 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import math
-from typing import Any, List, Optional, Set, Tuple
+from typing import Any, Dict, List, Optional, Set, Tuple
 
 import magnum as mn
 
@@ -51,6 +51,7 @@ def __init__(
 
         # TODO: Handle UI in a different class.
         self._pressed_ui_buttons: List[Set[str]] = []
+        self._textboxes: List[Dict[str, str]] = []
 
         self._gui_inputs: List[GuiInput] = []
         self._client_state_history: List[List[ClientState]] = []
@@ -60,6 +61,7 @@ def __init__(
             self._client_state_history.append([])
             self._receive_rate_trackers.append(AverageRateTracker(2.0))
             self._pressed_ui_buttons.append(set())
+            self._textboxes.append({})
 
         self._client_loading: List[bool] = [False] * users.max_user_count
 
@@ -102,6 +104,10 @@ def bind_gui_input(self, gui_input: GuiInput, user_index: int) -> None:
     def ui_button_pressed(self, user_index: int, button_id: str) -> bool:
         return button_id in self._pressed_ui_buttons[user_index]
 
+    def get_textbox_content(self, user_index: int, textbox_id: str) -> str:
+        user_textboxes = self._textboxes[user_index]
+        return user_textboxes.get(textbox_id, "")
+
     def get_history_length(self) -> int:
         """Length of client state history preserved. Anything beyond this horizon is discarded."""
         return 4
@@ -236,6 +242,8 @@ def _update_input_state(
                 if ui is not None:
                     for button in ui.get("buttonsPressed", []):
                         self._pressed_ui_buttons[user_index].add(button)
+                    for textbox_id, text in ui.get("textboxes", {}).items():
+                        self._textboxes[user_index][textbox_id] = text
 
                 input_json = (
                     client_state["input"] if "input" in client_state else None
@@ -472,12 +480,14 @@ def on_frame_end(self) -> None:
         for user_index in self._users.indices(Mask.ALL):
             self._gui_inputs[user_index].on_frame_end()
             self._pressed_ui_buttons[user_index].clear()
+            self._textboxes[user_index].clear()
         self._new_connection_records = None
 
     def clear_history(self, user_mask=Mask.ALL) -> None:
         for user_index in self._users.indices(user_mask):
             self._client_state_history[user_index].clear()
             self._pressed_ui_buttons[user_index].clear()
+            self._textboxes[user_index].clear()
 
     def kick(self, user_mask: Mask) -> None:
         """

From 783506ee79df5685c1d056c62566e7483a4ccf15 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mika=C3=ABl=20Dallaire=20C=C3=B4t=C3=A9?=
 <110583667+0mdc@users.noreply.github.com>
Date: Fri, 17 May 2024 20:08:31 +0200
Subject: [PATCH 23/88] HITL - Add picture-in-picture viewports. (#1958)

* Add viewports.

* Add viewports to rearrange_v2.

* Improve viewport rect description.
---
 examples/hitl/rearrange_v2/rearrange_v2.py    | 32 ++++++++-
 .../core/client_message_manager.py            | 68 ++++++++++++++++++-
 2 files changed, 98 insertions(+), 2 deletions(-)

diff --git a/examples/hitl/rearrange_v2/rearrange_v2.py b/examples/hitl/rearrange_v2/rearrange_v2.py
index f788186ba7..4a4151004e 100644
--- a/examples/hitl/rearrange_v2/rearrange_v2.py
+++ b/examples/hitl/rearrange_v2/rearrange_v2.py
@@ -5,6 +5,8 @@
 # LICENSE file in the root directory of this source tree.
 
 
+from __future__ import annotations
+
 # Must call this before importing Habitat or Magnum.
 # fmt: off
 import ctypes
@@ -13,7 +15,6 @@
 sys.setdlopenflags(sys.getdlopenflags() | ctypes.RTLD_GLOBAL)
 # fmt: on
 
-
 from typing import List, Optional
 
 # This registers collaboration episodes into this application.
@@ -45,6 +46,7 @@
 from habitat_sim.utils.common import quat_from_magnum, quat_to_coeffs
 
 UP = mn.Vector3(0, 1, 0)
+PIP_VIEWPORT_ID = 0  # ID of the picture-in-picture viewport that shows other agent's perspective.
 
 
 class DataLogger:
@@ -127,6 +129,7 @@ def __init__(
         self.show_gui_text = True
         self.task_instruction = ""
         self.signal_change_episode = False
+        self.pip_initialized = False
 
         # If in remote mode, get the remote input. Else get the server (local) input.
         self.gui_input = (
@@ -186,6 +189,28 @@ def update(self, dt: float):
         self.ui.update()
         self.ui.draw_ui()
 
+    def draw_pip_viewport(self, pip_user_data: UserData):
+        """
+        Draw a picture-in-picture viewport showing another agent's perspective.
+        """
+        # Lazy init:
+        if not self.pip_initialized:
+            self.pip_initialized = True
+
+            # Define picture-in-picture (PIP) viewport.
+            self.app_service.client_message_manager.set_viewport_properties(
+                viewport_id=PIP_VIEWPORT_ID,
+                viewport_rect_xywh=[0.8, 0.02, 0.18, 0.18],
+                destination_mask=Mask.from_index(self.user_index),
+            )
+
+        # Show picture-in-picture (PIP) viewport.
+        self.app_service.client_message_manager.show_viewport(
+            viewport_id=PIP_VIEWPORT_ID,
+            cam_transform=pip_user_data.cam_transform,
+            destination_mask=Mask.from_index(self.user_index),
+        )
+
     def _get_camera_lookat_pos(self) -> mn.Vector3:
         agent_root = get_agent_art_obj_transform(
             self.app_service.sim,
@@ -396,6 +421,11 @@ def sim_update(self, dt: float, post_sim_update_dict):
 
         self._sps_tracker.increment()
 
+        # Draw the picture-in-picture showing other agent's perspective.
+        if self._users.max_user_count == 2:
+            self._user_data[0].draw_pip_viewport(self._user_data[1])
+            self._user_data[1].draw_pip_viewport(self._user_data[0])
+
         if not self._paused:
             for user_index in self._users.indices(Mask.ALL):
                 self._user_data[user_index].update(dt)
diff --git a/habitat-hitl/habitat_hitl/core/client_message_manager.py b/habitat-hitl/habitat_hitl/core/client_message_manager.py
index b717fa61ad..06dabe2bea 100644
--- a/habitat-hitl/habitat_hitl/core/client_message_manager.py
+++ b/habitat-hitl/habitat_hitl/core/client_message_manager.py
@@ -5,7 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 from dataclasses import dataclass
-from typing import Final, List, Optional, Union
+from typing import Any, Dict, Final, List, Optional, Union
 
 import magnum as mn
 
@@ -242,6 +242,50 @@ def set_server_keyframe_id(
             message = self._messages[user_index]
             message["serverKeyframeId"] = keyframe_id
 
+    def set_viewport_properties(
+        self,
+        viewport_id: int,
+        viewport_rect_xywh: List[float],
+        destination_mask: Mask = Mask.ALL,
+    ):
+        r"""
+        Set the properties of a viewport. Unlike show_viewport(), this does not have to be called every frame.
+        Use viewport_id '-1' to edit the default viewport.
+
+        viewport_id: Unique identifier of the viewport.
+        viewport_rect_xywh: Viewport rect (x position, y position, width, height).
+                            In window normalized coordinates, i.e. all values in range [0,1] relative to window size.
+        """
+        for user_index in self._users.indices(destination_mask):
+            message = self._messages[user_index]
+            viewport_properties = _obtain_viewport_properties(
+                message, viewport_id
+            )
+            viewport_properties["rect"] = viewport_rect_xywh
+
+    def show_viewport(
+        self,
+        viewport_id: int,
+        cam_transform: mn.Matrix4,
+        destination_mask: Mask = Mask.ALL,
+    ):
+        """
+        Show a picture-in-picture viewport rendering the specified camera matrix.
+        This must be repeatedly called for the viewport to stay visible.
+        The viewport_id '-1' is reserved for the main viewport. It is always visible.
+        Use set_viewport_properties() to configure the viewport.
+        """
+        assert viewport_id != -1
+        for user_index in self._users.indices(destination_mask):
+            message = self._messages[user_index]
+            viewport_properties = _obtain_viewport_properties(
+                message, viewport_id
+            )
+            viewport_properties["enabled"] = True
+            viewport_properties["camera"] = _create_transform_dict(
+                cam_transform
+            )
+
     def update_navmesh_triangles(
         self,
         triangle_vertices: List[List[float]],
@@ -290,3 +334,25 @@ def update_camera_transform(
                 rot[2],
                 rot[3],
             ]
+
+
+def _create_transform_dict(transform: mn.Matrix4) -> Dict[str, List[float]]:
+    """Create a message dictionary from a transform."""
+    p = transform.translation
+    r = mn.Quaternion.from_matrix(transform.rotation())
+    rv = r.vector
+    return {
+        "translation": [p[0], p[1], p[2]],
+        "rotation": [r.scalar, rv[0], rv[1], rv[2]],
+    }
+
+
+def _obtain_viewport_properties(
+    message: Message, viewport_id: int
+) -> Dict[str, Any]:
+    """Get or create the properties dict of an object_id."""
+    if "viewports" not in message:
+        message["viewports"] = {}
+    if viewport_id not in message["viewports"]:
+        message["viewports"][viewport_id] = {}
+    return message["viewports"][viewport_id]

From e7e76abcc2707f2e4b5da9cd502da760e2dea1e7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mika=C3=ABl=20Dallaire=20C=C3=B4t=C3=A9?=
 <110583667+0mdc@users.noreply.github.com>
Date: Fri, 17 May 2024 20:12:10 +0200
Subject: [PATCH 24/88] HITL - Add object visbility handling and hide "self" in
 viewports. (#1959)

* Add object visiblity and viewport visbility layers.

* Hide self in viewports.

* Don't render viewports if networking is disabled.
---
 examples/hitl/rearrange_v2/rearrange_v2.py    | 38 ++++++++++++++++++
 .../core/client_message_manager.py            | 39 ++++++++++++++++++-
 2 files changed, 76 insertions(+), 1 deletion(-)

diff --git a/examples/hitl/rearrange_v2/rearrange_v2.py b/examples/hitl/rearrange_v2/rearrange_v2.py
index 4a4151004e..2b4864173e 100644
--- a/examples/hitl/rearrange_v2/rearrange_v2.py
+++ b/examples/hitl/rearrange_v2/rearrange_v2.py
@@ -121,6 +121,7 @@ def __init__(
         client_helper: ClientHelper,
     ):
         self.app_service = app_service
+        self.world = world
         self.user_index = user_index
         self.gui_agent_controller = gui_agent_controller
         self.server_sps_tracker = server_sps_tracker
@@ -163,6 +164,26 @@ def reset(self):
         self.camera_helper.update(self._get_camera_lookat_pos(), dt=0)
         self.ui.reset()
 
+        # If networking is enabled...
+        if self.app_service.client_message_manager:
+            # Assign user agent objects to their own layer.
+            agent_index = self.gui_agent_controller._agent_idx
+            agent_object_ids = self.world.get_agent_object_ids(agent_index)
+            for agent_object_id in agent_object_ids:
+                self.app_service.client_message_manager.set_object_visibility_layer(
+                    object_id=agent_object_id,
+                    layer_id=agent_index,
+                    destination_mask=Mask.from_index(self.user_index),
+                )
+
+            # Show all layers except "user_index" in the default viewport.
+            # This hides the user's own agent in the first person view.
+            self.app_service.client_message_manager.set_viewport_properties(
+                viewport_id=-1,
+                visible_layer_ids=Mask.all_except_index(agent_index),
+                destination_mask=Mask.from_index(self.user_index),
+            )
+
     def update(self, dt: float):
         if self.gui_input.get_key_down(GuiInput.KeyNS.H):
             self.show_gui_text = not self.show_gui_text
@@ -193,14 +214,31 @@ def draw_pip_viewport(self, pip_user_data: UserData):
         """
         Draw a picture-in-picture viewport showing another agent's perspective.
         """
+        # If networking is disabled, skip.
+        if not self.app_service.client_message_manager:
+            return
+
         # Lazy init:
         if not self.pip_initialized:
             self.pip_initialized = True
 
+            # Assign pip agent objects to their own layer.
+            pip_agent_index = pip_user_data.gui_agent_controller._agent_idx
+            agent_object_ids = self.world.get_agent_object_ids(pip_agent_index)
+            for agent_object_id in agent_object_ids:
+                self.app_service.client_message_manager.set_object_visibility_layer(
+                    object_id=agent_object_id,
+                    layer_id=pip_agent_index,
+                    destination_mask=Mask.from_index(self.user_index),
+                )
+
             # Define picture-in-picture (PIP) viewport.
+            # Show all layers except "pip_user_index".
+            # This hides the other agent in the picture-in-picture viewport.
             self.app_service.client_message_manager.set_viewport_properties(
                 viewport_id=PIP_VIEWPORT_ID,
                 viewport_rect_xywh=[0.8, 0.02, 0.18, 0.18],
+                visible_layer_ids=Mask.all_except_index(pip_agent_index),
                 destination_mask=Mask.from_index(self.user_index),
             )
 
diff --git a/habitat-hitl/habitat_hitl/core/client_message_manager.py b/habitat-hitl/habitat_hitl/core/client_message_manager.py
index 06dabe2bea..2e2f8aadf1 100644
--- a/habitat-hitl/habitat_hitl/core/client_message_manager.py
+++ b/habitat-hitl/habitat_hitl/core/client_message_manager.py
@@ -13,6 +13,7 @@
 from habitat_hitl.core.user_mask import Mask, Users
 
 DEFAULT_NORMAL: Final[List[float]] = [0.0, 1.0, 0.0]
+DEFAULT_VIEWPORT_SIZE: Final[List[float]] = [0.0, 0.0, 1.0, 1.0]
 
 
 # TODO: Move to another file.
@@ -242,10 +243,29 @@ def set_server_keyframe_id(
             message = self._messages[user_index]
             message["serverKeyframeId"] = keyframe_id
 
+    def set_object_visibility_layer(
+        self,
+        object_id: int,
+        layer_id: int = -1,
+        destination_mask: Mask = Mask.ALL,
+    ):
+        r"""
+        Set the visibility layer of the instance associated with specified habitat-sim objectId.
+        The layer_id '-1' is the default layer and is visible to all viewports.
+        There are 8 additional layers for controlling visibility (0 to 7).
+        """
+        assert layer_id >= -1
+        assert layer_id < 8
+        for user_index in self._users.indices(destination_mask):
+            message = self._messages[user_index]
+            object_properties = _obtain_object_properties(message, object_id)
+            object_properties["layer"] = layer_id
+
     def set_viewport_properties(
         self,
         viewport_id: int,
-        viewport_rect_xywh: List[float],
+        viewport_rect_xywh: List[float] = DEFAULT_VIEWPORT_SIZE,
+        visible_layer_ids: Mask = Mask.ALL,
         destination_mask: Mask = Mask.ALL,
     ):
         r"""
@@ -255,12 +275,18 @@ def set_viewport_properties(
         viewport_id: Unique identifier of the viewport.
         viewport_rect_xywh: Viewport rect (x position, y position, width, height).
                             In window normalized coordinates, i.e. all values in range [0,1] relative to window size.
+        visible_layer_ids: Visibility layers. Only objects assigned to these layers will be visible to this viewport.
         """
+        layers = Users(8)  # Maximum of 8 layers.
         for user_index in self._users.indices(destination_mask):
             message = self._messages[user_index]
             viewport_properties = _obtain_viewport_properties(
                 message, viewport_id
             )
+            # TODO: Use mask int instead of array
+            viewport_properties["layers"] = []
+            for layer in layers.indices(visible_layer_ids):
+                viewport_properties["layers"].append(layer)
             viewport_properties["rect"] = viewport_rect_xywh
 
     def show_viewport(
@@ -347,6 +373,17 @@ def _create_transform_dict(transform: mn.Matrix4) -> Dict[str, List[float]]:
     }
 
 
+def _obtain_object_properties(
+    message: Message, object_id: int
+) -> Dict[str, Any]:
+    """Get or create the properties dict of an object_id."""
+    if "objects" not in message:
+        message["objects"] = {}
+    if object_id not in message["objects"]:
+        message["objects"][object_id] = {}
+    return message["objects"][object_id]
+
+
 def _obtain_viewport_properties(
     message: Message, viewport_id: int
 ) -> Dict[str, Any]:

From 8a93681429cc3518cba1dec26e0762cdf6a8bbf1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mika=C3=ABl=20Dallaire=20C=C3=B4t=C3=A9?=
 <110583667+0mdc@users.noreply.github.com>
Date: Fri, 17 May 2024 20:12:27 +0200
Subject: [PATCH 25/88] Add autogenerated navmeshes to gitignore. (#1963)

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index f3e2ce5408..2bc3951112 100644
--- a/.gitignore
+++ b/.gitignore
@@ -100,3 +100,6 @@ data
 /sandbox
 /plots/outputs
 /wandb/
+
+# Autogenerated navmesh files
+*.navmesh

From 74c8df77983ec876331ff7173df550c84319d209 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mika=C3=ABl=20Dallaire=20C=C3=B4t=C3=A9?=
 <110583667+0mdc@users.noreply.github.com>
Date: Sat, 18 May 2024 13:38:52 -0400
Subject: [PATCH 26/88] HITL - Rearrange State Machine (#1964)

* Add viewports.

* Add viewports to rearrange_v2.

* Add object visiblity and viewport visbility layers.

* Hide self in viewports.

* Don't render viewports if networking is disabled.

* Turn rearrange_v2 into a state machine. Add lobby.

* Disable rearrange_v2 test.

* Config changes.

* Change episode termination states.

* Add comment to lobby start session delay.
---
 examples/hitl/rearrange_v2/app_data.py        |  19 ++
 examples/hitl/rearrange_v2/app_state_base.py  |  65 ++++
 examples/hitl/rearrange_v2/app_state_lobby.py |  69 +++++
 examples/hitl/rearrange_v2/app_state_reset.py |  36 +++
 examples/hitl/rearrange_v2/app_states.py      |  38 +++
 .../config/lang_rearrange_humanoid_only.yaml  |   8 +-
 .../config/lang_rearrange_spot_humanoid.yaml  |   7 +-
 examples/hitl/rearrange_v2/main.py            |  41 +++
 examples/hitl/rearrange_v2/rearrange_v2.py    | 282 +++++++++---------
 examples/hitl/rearrange_v2/state_machine.py   |  84 ++++++
 examples/hitl/rearrange_v2/util.py            |  13 +
 .../habitat_hitl/_internal/hitl_driver.py     |   8 +-
 .../habitat_hitl/core/client_helper.py        |  25 +-
 .../habitat_hitl/core/remote_client_state.py  |  10 +
 habitat-hitl/test/test_example_apps.py        |   3 +-
 15 files changed, 565 insertions(+), 143 deletions(-)
 create mode 100644 examples/hitl/rearrange_v2/app_data.py
 create mode 100644 examples/hitl/rearrange_v2/app_state_base.py
 create mode 100644 examples/hitl/rearrange_v2/app_state_lobby.py
 create mode 100644 examples/hitl/rearrange_v2/app_state_reset.py
 create mode 100644 examples/hitl/rearrange_v2/app_states.py
 create mode 100644 examples/hitl/rearrange_v2/main.py
 create mode 100644 examples/hitl/rearrange_v2/state_machine.py

diff --git a/examples/hitl/rearrange_v2/app_data.py b/examples/hitl/rearrange_v2/app_data.py
new file mode 100644
index 0000000000..fdd3410d1f
--- /dev/null
+++ b/examples/hitl/rearrange_v2/app_data.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Meta Platforms, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict
+
+from habitat_hitl.core.types import ConnectionRecord
+
+
+class AppData:
+    """
+    RearrangeV2 application data shared by all states.
+    """
+
+    def __init__(self, max_user_count: int):
+        self.max_user_count = max_user_count
+        self.connected_users: Dict[int, ConnectionRecord] = {}
diff --git a/examples/hitl/rearrange_v2/app_state_base.py b/examples/hitl/rearrange_v2/app_state_base.py
new file mode 100644
index 0000000000..e8633d35c4
--- /dev/null
+++ b/examples/hitl/rearrange_v2/app_state_base.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Meta Platforms, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from __future__ import annotations
+
+from typing import Optional
+
+from app_data import AppData
+
+from habitat_hitl.app_states.app_service import AppService
+from habitat_hitl.app_states.app_state_abc import AppState
+from habitat_hitl.core.text_drawer import TextOnScreenAlignment
+from habitat_hitl.core.user_mask import Mask
+
+
+class AppStateBase(AppState):
+    def __init__(
+        self,
+        app_service: AppService,
+        app_data: AppData,
+    ):
+        self._app_service = app_service
+        self._app_data = app_data
+        self._cancel = False
+        self._time_since_last_connection = 0
+        self._save_keyframes = True
+
+    def on_enter(self):
+        print(f"Entering state: {type(self)}")
+
+    def on_exit(self):
+        print(f"Exiting state: {type(self)}")
+
+    def try_cancel(self):
+        self._cancel = True
+
+    def get_next_state(self) -> Optional[AppStateBase]:
+        pass
+
+    def on_environment_reset(self, episode_recorder_dict):
+        pass
+
+    def sim_update(self, dt: float, post_sim_update_dict):
+        pass
+
+    def record_state(self):
+        pass
+
+    def _status_message(self, message: str) -> None:
+        """Send a message to all users."""
+        if len(message) > 0:
+            self._app_service.text_drawer.add_text(
+                message,
+                TextOnScreenAlignment.TOP_CENTER,
+                text_delta_x=-280,
+                text_delta_y=-50,
+                destination_mask=Mask.ALL,
+            )
+
+    def _kick_all_users(self) -> None:
+        "Kick all users."
+        self._app_service.remote_client_state.kick(Mask.ALL)
diff --git a/examples/hitl/rearrange_v2/app_state_lobby.py b/examples/hitl/rearrange_v2/app_state_lobby.py
new file mode 100644
index 0000000000..b23ca2b369
--- /dev/null
+++ b/examples/hitl/rearrange_v2/app_state_lobby.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Meta Platforms, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Final, Optional
+
+from app_data import AppData
+from app_state_base import AppStateBase
+from app_states import create_app_state_rearrange
+
+from habitat_hitl.app_states.app_service import AppService
+
+# Delay to start the session after all users have connected.
+# Occasionally, connection errors may occur rapidly after connecting, causing the session to start needlessly.
+START_SESSION_DELAY: Final[float] = 0.5
+
+
+class AppStateLobby(AppStateBase):
+    """
+    Idle state.
+    Ends when the target user count is reached.
+    """
+
+    def __init__(self, app_service: AppService, app_data: AppData):
+        super().__init__(app_service, app_data)
+        self._save_keyframes = False
+
+    def on_enter(self):
+        super().on_enter()
+        # Enable new connections
+        # TODO: Create API in RemoteClientState
+        self._app_service._remote_client_state._interprocess_record.enable_new_connections(
+            True
+        )
+
+    def on_exit(self):
+        super().on_exit()
+        # Disable new connections
+        # TODO: Create API in RemoteClientState
+        self._app_service._remote_client_state._interprocess_record.enable_new_connections(
+            False
+        )
+
+    def get_next_state(self) -> Optional[AppStateBase]:
+        # If all users are connected, start the session.
+        # NOTE: We wait START_SESSION_DELAY to mitigate early disconnects.
+        if (
+            len(self._app_data.connected_users)
+            == self._app_data.max_user_count
+            and self._time_since_last_connection > START_SESSION_DELAY
+        ):
+            return create_app_state_rearrange(
+                self._app_service, self._app_data
+            )
+        return None
+
+    def sim_update(self, dt: float, post_sim_update_dict):
+        # Show lobby status.
+        missing_users = self._app_data.max_user_count - len(
+            self._app_data.connected_users
+        )
+        if missing_users > 0:
+            s = "s" if missing_users > 1 else ""
+            message = f"Waiting for {missing_users} participant{s} to join."
+            self._status_message(message)
+        else:
+            self._status_message("Loading...")
diff --git a/examples/hitl/rearrange_v2/app_state_reset.py b/examples/hitl/rearrange_v2/app_state_reset.py
new file mode 100644
index 0000000000..3d3a1495e7
--- /dev/null
+++ b/examples/hitl/rearrange_v2/app_state_reset.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Meta Platforms, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Optional
+
+from app_data import AppData
+from app_state_base import AppStateBase
+from app_states import create_app_state_lobby
+
+from habitat_hitl.app_states.app_service import AppService
+
+
+class AppStateReset(AppStateBase):
+    """
+    Kick all users and restore state for a new session.
+    """
+
+    def __init__(self, app_service: AppService, app_data: AppData):
+        super().__init__(app_service, app_data)
+        self._save_keyframes = False
+
+    def on_enter(self):
+        super().on_enter()
+
+        # Kick all users.
+        self._kick_all_users()
+
+    def get_next_state(self) -> Optional[AppStateBase]:
+        # Wait for users to be kicked.
+        if len(self._app_data.connected_users) == 0:
+            return create_app_state_lobby(self._app_service, self._app_data)
+        else:
+            return None
diff --git a/examples/hitl/rearrange_v2/app_states.py b/examples/hitl/rearrange_v2/app_states.py
new file mode 100644
index 0000000000..9729321ce8
--- /dev/null
+++ b/examples/hitl/rearrange_v2/app_states.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Meta Platforms, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Boilerplate code for creating states without circular dependencies.
+"""
+
+from app_data import AppData
+from app_state_base import AppStateBase
+
+from habitat_hitl.app_states.app_service import AppService
+
+
+def create_app_state_reset(
+    app_service: AppService, app_data: AppData
+) -> AppStateBase:
+    from app_state_reset import AppStateReset
+
+    return AppStateReset(app_service, app_data)
+
+
+def create_app_state_lobby(
+    app_service: AppService, app_data: AppData
+) -> AppStateBase:
+    from app_state_lobby import AppStateLobby
+
+    return AppStateLobby(app_service, app_data)
+
+
+def create_app_state_rearrange(
+    app_service: AppService, app_data: AppData
+) -> AppStateBase:
+    from rearrange_v2 import AppStateRearrangeV2
+
+    return AppStateRearrangeV2(app_service, app_data)
diff --git a/examples/hitl/rearrange_v2/config/lang_rearrange_humanoid_only.yaml b/examples/hitl/rearrange_v2/config/lang_rearrange_humanoid_only.yaml
index ce17cef51f..777547db77 100644
--- a/examples/hitl/rearrange_v2/config/lang_rearrange_humanoid_only.yaml
+++ b/examples/hitl/rearrange_v2/config/lang_rearrange_humanoid_only.yaml
@@ -35,6 +35,8 @@ habitat_hitl:
   hide_humanoid_in_gui: True
   camera:
     first_person_mode: True
-  data_collection:
-    save_filepath_base: my_session
-    save_episode_record: True
+  networking:
+    client_sync:
+      server_camera: False
+      server_input: False
+    client_max_idle_duration: 180.0
diff --git a/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml b/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml
index 7d076fc367..8e58967569 100644
--- a/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml
+++ b/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml
@@ -42,6 +42,11 @@ habitat_hitl:
     - agent_index: 1
       lin_speed: 10.0
       ang_speed: 300
-  hide_humanoid_in_gui: True
+  hide_humanoid_in_gui: False
   camera:
     first_person_mode: True
+  networking:
+    client_sync:
+      server_camera: False
+      server_input: False
+    client_max_idle_duration: 180.0
diff --git a/examples/hitl/rearrange_v2/main.py b/examples/hitl/rearrange_v2/main.py
new file mode 100644
index 0000000000..fbcc780632
--- /dev/null
+++ b/examples/hitl/rearrange_v2/main.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Meta Platforms, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from __future__ import annotations
+
+# Must call this before importing Habitat or Magnum.
+# fmt: off
+import ctypes
+import sys
+
+sys.setdlopenflags(sys.getdlopenflags() | ctypes.RTLD_GLOBAL)
+# fmt: on
+
+# This registers collaboration episodes into this application.
+import collaboration_episode_loader  # noqa: 401
+import hydra
+from state_machine import StateMachine
+
+from habitat_hitl.core.hitl_main import hitl_main
+from habitat_hitl.core.hydra_utils import register_hydra_plugins
+
+
+@hydra.main(
+    version_base=None, config_path="config", config_name="rearrange_v2"
+)
+def main(config):
+    # We don't sync the server camera. Instead, we maintain one camera per user.
+    assert config.habitat_hitl.networking.client_sync.server_camera == False
+
+    hitl_main(
+        config,
+        lambda app_service: StateMachine(app_service),
+    )
+
+
+if __name__ == "__main__":
+    register_hydra_plugins()
+    main()
diff --git a/examples/hitl/rearrange_v2/rearrange_v2.py b/examples/hitl/rearrange_v2/rearrange_v2.py
index 2b4864173e..51593a1254 100644
--- a/examples/hitl/rearrange_v2/rearrange_v2.py
+++ b/examples/hitl/rearrange_v2/rearrange_v2.py
@@ -7,33 +7,22 @@
 
 from __future__ import annotations
 
-# Must call this before importing Habitat or Magnum.
-# fmt: off
-import ctypes
-import sys
+from typing import Dict, List, Optional
 
-sys.setdlopenflags(sys.getdlopenflags() | ctypes.RTLD_GLOBAL)
-# fmt: on
-
-from typing import List, Optional
-
-# This registers collaboration episodes into this application.
-import collaboration_episode_loader  # noqa: 401
-import hydra
 import magnum as mn
 import numpy as np
+from app_data import AppData
+from app_state_base import AppStateBase
+from app_states import create_app_state_reset
 from ui import UI
+from util import UP
 from world import World
 
 from habitat_hitl._internal.networking.average_rate_tracker import (
     AverageRateTracker,
 )
 from habitat_hitl.app_states.app_service import AppService
-from habitat_hitl.app_states.app_state_abc import AppState
-from habitat_hitl.core.client_helper import ClientHelper
 from habitat_hitl.core.gui_input import GuiInput
-from habitat_hitl.core.hitl_main import hitl_main
-from habitat_hitl.core.hydra_utils import register_hydra_plugins
 from habitat_hitl.core.text_drawer import TextOnScreenAlignment
 from habitat_hitl.core.user_mask import Mask
 from habitat_hitl.environment.camera_helper import CameraHelper
@@ -45,7 +34,6 @@
 from habitat_hitl.environment.hablab_utils import get_agent_art_obj_transform
 from habitat_sim.utils.common import quat_from_magnum, quat_to_coeffs
 
-UP = mn.Vector3(0, 1, 0)
 PIP_VIEWPORT_ID = 0  # ID of the picture-in-picture viewport that shows other agent's perspective.
 
 
@@ -118,18 +106,18 @@ def __init__(
         world: World,
         gui_agent_controller: GuiController,
         server_sps_tracker: AverageRateTracker,
-        client_helper: ClientHelper,
     ):
         self.app_service = app_service
         self.world = world
         self.user_index = user_index
         self.gui_agent_controller = gui_agent_controller
         self.server_sps_tracker = server_sps_tracker
-        self.client_helper = client_helper
+        self.client_helper = (
+            self.app_service.remote_client_state._client_helper
+        )
         self.cam_transform = mn.Matrix4.identity_init()
         self.show_gui_text = True
         self.task_instruction = ""
-        self.signal_change_episode = False
         self.pip_initialized = False
 
         # If in remote mode, get the remote input. Else get the server (local) input.
@@ -138,6 +126,8 @@ def __init__(
             if app_service.remote_client_state is not None
             else self.app_service.gui_input
         )
+        self.episode_finished = False
+        self.episode_success = False
 
         self.camera_helper = CameraHelper(
             app_service.hitl_config,
@@ -160,7 +150,6 @@ def __init__(
         gui_agent_controller._gui_input = self.gui_input
 
     def reset(self):
-        self.signal_change_episode = False
         self.camera_helper.update(self._get_camera_lookat_pos(), dt=0)
         self.ui.reset()
 
@@ -189,7 +178,8 @@ def update(self, dt: float):
             self.show_gui_text = not self.show_gui_text
 
         if self.gui_input.get_key_down(GuiInput.KeyNS.ZERO):
-            self.signal_change_episode = True
+            self.episode_finished = True
+            self.episode_success = True
 
         if self.client_helper:
             self.client_helper.update(
@@ -262,40 +252,28 @@ def _is_user_idle_this_frame(self) -> bool:
         return not self.gui_input.get_any_input()
 
 
-class AppStateRearrangeV2(AppState):
+class AppStateRearrangeV2(AppStateBase):
     """
     Multiplayer rearrangement HITL application.
     """
 
-    def __init__(self, app_service: AppService):
+    def __init__(self, app_service: AppService, app_data: AppData):
+        super().__init__(app_service, app_data)
+        self._save_keyframes = False  # Done on env step (rearrange_sim).
         self._app_service = app_service
         self._gui_agent_controllers = self._app_service.gui_agent_controllers
-        self._num_users = len(self._gui_agent_controllers)
-        self._can_grasp_place_threshold = (
-            self._app_service.hitl_config.can_grasp_place_threshold
-        )
         self._num_agents = len(self._gui_agent_controllers)
         self._users = self._app_service.users
-        self._paused = False
-        self._client_helper: Optional[ClientHelper] = None
-
-        if self._app_service.hitl_config.networking.enable:
-            self._client_helper = ClientHelper(
-                self._app_service.hitl_config,
-                self._app_service.remote_client_state,
-                self._app_service.client_message_manager,
-                self._users,
-            )
 
+        self._sps_tracker = AverageRateTracker(2.0)
         self._server_user_index = 0
         self._server_gui_input = self._app_service.gui_input
         self._server_input_enabled = False
-
-        self._sps_tracker = AverageRateTracker(2.0)
+        self._elapsed_time = 0.0
 
         self._user_data: List[UserData] = []
 
-        self._world = World(self._app_service.sim)
+        self._world = World(app_service.sim)
 
         for user_index in self._users.indices(Mask.ALL):
             self._user_data.append(
@@ -306,31 +284,62 @@ def __init__(self, app_service: AppService):
                     gui_agent_controller=self._gui_agent_controllers[
                         user_index
                     ],
-                    client_helper=self._client_helper,
                     server_sps_tracker=self._sps_tracker,
                 )
             )
 
+        # Reset the environment immediately.
+        self.on_environment_reset(None)
+
+    def get_next_state(self) -> Optional[AppStateBase]:
+        # If cancelled, skip upload and clean-up.
+        if self._cancel or self._is_episode_finished():
+            return create_app_state_reset(self._app_service, self._app_data)
+        else:
+            return None
+
+    def on_enter(self):
+        super().on_enter()
+
+        user_index_to_agent_index_map: Dict[int, int] = {}
+        for user_index in range(len(self._user_data)):
+            user_index_to_agent_index_map[user_index] = self._user_data[
+                user_index
+            ].gui_agent_controller._agent_idx
+
+    def on_exit(self):
+        super().on_exit()
+
+    def _is_episode_finished(self) -> bool:
+        """
+        Determines whether all users have finished their tasks.
+        """
+        return all(
+            self._user_data[user_index].episode_finished
+            for user_index in self._users.indices(Mask.ALL)
+        )
+
     def on_environment_reset(self, episode_recorder_dict):
         self._world.reset()
 
+        # Reset AFK timers.
+        # TODO: Move to idle_kick_timer class. Make it per-user. Couple it with "user_data" class
+        # TODO
+        self._app_service.remote_client_state._client_helper.activate_users()
+
         # Set the task instruction
         current_episode = self._app_service.env.current_episode
+        if hasattr(current_episode, "instruction"):
+            task_instruction = current_episode.instruction
+            # TODO: Users will have different instructions.
+            for user_index in self._users.indices(Mask.ALL):
+                self._user_data[user_index].task_instruction = task_instruction
 
-        episode_data = (
-            collaboration_episode_loader.load_collaboration_episode_data(
-                current_episode
-            )
-        )
         for user_index in self._users.indices(Mask.ALL):
-            self._user_data[
-                user_index
-            ].task_instruction = episode_data.instruction
             self._user_data[user_index].reset()
 
-        client_message_manager = self._app_service.client_message_manager
-        if client_message_manager:
-            client_message_manager.signal_scene_change(Mask.ALL)
+        # Insert a keyframe immediately.
+        self._app_service.sim.gfx_replay_manager.save_keyframe()
 
     def _update_grasping_and_set_act_hints(self, user_index: int):
         gui_agent_controller = self._user_data[user_index].gui_agent_controller
@@ -349,31 +358,29 @@ def _update_grasping_and_set_act_hints(self, user_index: int):
             reach_pos=None,
         )
 
-    def _get_gui_controlled_agent_index(self, user_index) -> int:
+    def _get_gui_controlled_agent_index(self, user_index):
         return self._gui_agent_controllers[user_index]._agent_idx
 
     def _get_controls_text(self, user_index: int):
-        if self._paused:
-            return "Session ended."
-
-        if not self._user_data[user_index].show_gui_text:
-            return ""
-
         controls_str: str = ""
-        controls_str += "H: Toggle help\n"
-        controls_str += "Look: Middle click (drag), I, K\n"
-        controls_str += "Walk: W, S\n"
-        controls_str += "Turn: A, D\n"
-        controls_str += "Finish episode: Zero (0)\n"
-        controls_str += "Open/close: Double-click\n"
-        controls_str += "Pick object: Double-click\n"
-        controls_str += "Place object: Right click (hold)\n"
+        if self._user_data[user_index].show_gui_text:
+            controls_str += "H: Toggle help\n"
+            controls_str += "Look: Middle click (drag), I, K\n"
+            controls_str += "Walk: W, S\n"
+            controls_str += "Turn: A, D\n"
+            controls_str += "Finish episode: Zero (0)\n"
+            controls_str += "Open/close: Double-click\n"
+            controls_str += "Pick object: Double-click\n"
+            controls_str += "Place object: Right click (hold)\n"
+
+        client_helper = self._app_service.remote_client_state._client_helper
+        idle_time = client_helper.get_idle_time(user_index)
+        if idle_time > 10:
+            controls_str += f"Idle time {idle_time}s\n"
+
         return controls_str
 
     def _get_status_text(self, user_index: int):
-        if self._paused:
-            return ""
-
         status_str = ""
 
         if len(self._user_data[user_index].task_instruction) > 0:
@@ -382,12 +389,22 @@ def _get_status_text(self, user_index: int):
                 + self._user_data[user_index].task_instruction
                 + "\n"
             )
-        if self._user_data[user_index].client_helper and self._user_data[
-            user_index
-        ].client_helper.do_show_idle_kick_warning(user_index):
-            status_str += (
-                "\n\nAre you still there?\nPress any key to keep playing!\n"
+
+        if (
+            self._users.max_user_count > 1
+            and not self._user_data[user_index].episode_finished
+        ):
+            if self._has_any_user_finished_success():
+                status_str += "\n\nThe other participant has signaled that the task is completed.\nPress '0' when you are done."
+            elif self._has_any_user_finished_failure():
+                status_str += "\n\nThe other participant has signaled a problem with the task.\nPress '0' to continue."
+
+        client_helper = self._app_service.remote_client_state._client_helper
+        if client_helper.do_show_idle_kick_warning(user_index):
+            remaining_time = str(
+                client_helper.get_remaining_idle_time(user_index)
             )
+            status_str += f"\n\nAre you still there?\nPress any key in the next {remaining_time}s to keep playing!\n"
 
         return status_str
 
@@ -410,42 +427,32 @@ def _update_help_text(self, user_index: int):
                 destination_mask=Mask.from_index(user_index),
             )
 
-    def is_user_idle_this_frame(self) -> bool:
-        return not self._app_service.gui_input.get_any_input()
-
-    def _check_change_episode(self):
-        # If all users signaled to change episode:
-        change_episode = True
-        for user_index in self._users.indices(Mask.ALL):
-            change_episode &= self._user_data[user_index].signal_change_episode
-
-        if (
-            change_episode
-            and self._app_service.episode_helper.next_episode_exists()
-        ):
-            # for user_index in self._users.indices(Mask.ALL):
-            #    self._user_data[user_index].signal_change_episode = False
-            self._app_service.end_episode(do_reset=True)
-
     def sim_update(self, dt: float, post_sim_update_dict):
-        if (
-            not self._app_service.hitl_config.networking.enable
-            and self._server_gui_input.get_key_down(GuiInput.KeyNS.ESC)
-        ):
-            self._app_service.end_episode()
-            post_sim_update_dict["application_exit"] = True
-            return
-
-        # Switch the server-controlled user.
-        if (
-            self._users.max_user_count > 0
-            and self._server_gui_input.get_key_down(GuiInput.KeyNS.TAB)
-        ):
-            self._server_user_index = (
-                self._server_user_index + 1
-            ) % self._users.max_user_count
-
-        # Copy server input to user input.
+        if not self._app_service.hitl_config.experimental.headless.do_headless:
+            # Server GUI exit.
+            if (
+                not self._app_service.hitl_config.networking.enable
+                and self._server_gui_input.get_key_down(GuiInput.KeyNS.ESC)
+            ):
+                self._app_service.end_episode()
+                post_sim_update_dict["application_exit"] = True
+                return
+
+            # Skip the form when changing the episode from the server.
+            if self._server_gui_input.get_key_down(GuiInput.KeyNS.ZERO):
+                server_user = self._user_data[self._server_user_index]
+                server_user.episode_finished = True
+                server_user.episode_success = True
+
+            # Switch the server-controlled user.
+            if self._num_agents > 0 and self._server_gui_input.get_key_down(
+                GuiInput.KeyNS.TAB
+            ):
+                self._server_user_index = (
+                    self._server_user_index + 1
+                ) % self._num_agents
+
+        # Copy server input to user input when server input is active.
         if self._app_service.hitl_config.networking.enable:
             server_user_input = self._user_data[
                 self._server_user_index
@@ -459,19 +466,17 @@ def sim_update(self, dt: float, post_sim_update_dict):
 
         self._sps_tracker.increment()
 
+        for user_index in self._users.indices(Mask.ALL):
+            self._user_data[user_index].update(dt)
+            self._update_grasping_and_set_act_hints(user_index)
+            self._update_help_text(user_index)
+
         # Draw the picture-in-picture showing other agent's perspective.
         if self._users.max_user_count == 2:
             self._user_data[0].draw_pip_viewport(self._user_data[1])
             self._user_data[1].draw_pip_viewport(self._user_data[0])
 
-        if not self._paused:
-            for user_index in self._users.indices(Mask.ALL):
-                self._user_data[user_index].update(dt)
-                self._update_grasping_and_set_act_hints(user_index)
-            self._app_service.compute_action_and_step_env()
-
-        for user_index in self._users.indices(Mask.ALL):
-            self._update_help_text(user_index)
+        self._app_service.compute_action_and_step_env()
 
         # Set the server camera.
         server_cam_transform = self._user_data[
@@ -479,19 +484,28 @@ def sim_update(self, dt: float, post_sim_update_dict):
         ].cam_transform
         post_sim_update_dict["cam_transform"] = server_cam_transform
 
+        #  Collect data.
+        self._elapsed_time += dt
+        if self._is_any_user_active():
+            # TODO: Add data collection.
+            pass
 
-@hydra.main(
-    version_base=None, config_path="config", config_name="rearrange_v2"
-)
-def main(config):
-    if hasattr(config, "habitat_llm") and config.habitat_llm.enable:
-        collaboration_episode_loader.register_habitat_llm_extensions(config)
-    hitl_main(
-        config,
-        lambda app_service: AppStateRearrangeV2(app_service),
-    )
-
-
-if __name__ == "__main__":
-    register_hydra_plugins()
-    main()
+    def _is_any_user_active(self) -> bool:
+        return any(
+            self._user_data[user_index].gui_input.get_any_input()
+            for user_index in range(self._app_data.max_user_count)
+        )
+
+    def _has_any_user_finished_success(self) -> bool:
+        return any(
+            self._user_data[user_index].episode_finished
+            and self._user_data[user_index].episode_success
+            for user_index in range(self._app_data.max_user_count)
+        )
+
+    def _has_any_user_finished_failure(self) -> bool:
+        return any(
+            self._user_data[user_index].episode_finished
+            and not self._user_data[user_index].episode_success
+            for user_index in range(self._app_data.max_user_count)
+        )
diff --git a/examples/hitl/rearrange_v2/state_machine.py b/examples/hitl/rearrange_v2/state_machine.py
new file mode 100644
index 0000000000..21fe414807
--- /dev/null
+++ b/examples/hitl/rearrange_v2/state_machine.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Meta Platforms, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from app_data import AppData
+from app_state_base import AppStateBase
+from app_states import create_app_state_reset
+from util import get_empty_view
+
+from habitat_hitl.app_states.app_service import AppService
+from habitat_hitl.app_states.app_state_abc import AppState
+from habitat_hitl.core.types import ConnectionRecord, DisconnectionRecord
+
+
+class StateMachine(AppState):
+    """
+    RearrangeV2 state machine.
+    It is itself an AppState containing sub-states.
+    """
+
+    def __init__(
+        self,
+        app_service: AppService,
+    ):
+        self._app_service = app_service
+        self._app_data = AppData(
+            app_service.hitl_config.networking.max_client_count
+        )
+        self._app_state: AppStateBase = create_app_state_reset(
+            app_service, self._app_data
+        )
+        self._empty_view_matrix = get_empty_view(self._app_service.sim)
+
+        if app_service.hitl_config.networking.enable:
+            app_service.remote_client_state.on_client_connected.registerCallback(
+                self._on_client_connected
+            )
+            app_service.remote_client_state.on_client_disconnected.registerCallback(
+                self._on_client_disconnected
+            )
+
+    def _on_client_connected(self, connection: ConnectionRecord):
+        user_index = connection["userIndex"]
+        if user_index in self._app_data.connected_users:
+            raise RuntimeError(
+                f"User index {user_index} already connected! Aborting."
+            )
+        self._app_data.connected_users[connection["userIndex"]] = connection
+        self._app_state._time_since_last_connection = 0.0
+
+    def _on_client_disconnected(self, disconnection: DisconnectionRecord):
+        user_index = disconnection["userIndex"]
+        if user_index not in self._app_data.connected_users:
+            # TODO: Investigate why clients sometimes keep connecting/disconnecting.
+            print(f"User index {user_index} already disconnected!")
+            # raise RuntimeError(f"User index {user_index} already disconnected! Aborting.")
+        else:
+            del self._app_data.connected_users[user_index]
+
+        # If a user has disconnected, send a cancellation signal to the current state.
+        self._app_state.try_cancel()
+
+    def on_environment_reset(self, episode_recorder_dict):
+        self._app_state.on_environment_reset(episode_recorder_dict)
+
+    def sim_update(self, dt: float, post_sim_update_dict):
+        self._app_state._time_since_last_connection += dt
+        post_sim_update_dict["cam_transform"] = self._empty_view_matrix
+        self._app_state.sim_update(dt, post_sim_update_dict)
+
+        next_state = self._app_state.get_next_state()
+        if next_state is not None:
+            self._app_state.on_exit()
+            self._app_state = next_state
+            self._app_state.on_enter()
+
+        if self._app_state._save_keyframes == True:
+            self._app_service.sim.gfx_replay_manager.save_keyframe()
+
+    def record_state(self):
+        pass  # Unused override.
diff --git a/examples/hitl/rearrange_v2/util.py b/examples/hitl/rearrange_v2/util.py
index 1f25733298..4d3b14d0d7 100644
--- a/examples/hitl/rearrange_v2/util.py
+++ b/examples/hitl/rearrange_v2/util.py
@@ -6,7 +6,20 @@
 
 from time import time
 
+import magnum as mn
+
+UP = mn.Vector3(0, 1, 0)
+FWD = mn.Vector3(0, 0, 1)
+
 
 def timestamp() -> str:
     "Generate a Unix timestamp at the current time."
     return str(int(time()))
+
+
+def get_empty_view(sim) -> mn.Matrix4:
+    """
+    Get a view looking into the void.
+    Used to avoid displaying previously-loaded content in intermediate stages.
+    """
+    return mn.Matrix4.look_at(1000 * FWD, FWD, UP)
diff --git a/habitat-hitl/habitat_hitl/_internal/hitl_driver.py b/habitat-hitl/habitat_hitl/_internal/hitl_driver.py
index c60057fef3..1dc3c54a5b 100644
--- a/habitat-hitl/habitat_hitl/_internal/hitl_driver.py
+++ b/habitat-hitl/habitat_hitl/_internal/hitl_driver.py
@@ -265,9 +265,11 @@ def _check_init_server(
             )
             launch_networking_process(self._interprocess_record)
             self._remote_client_state = RemoteClientState(
-                self._interprocess_record,
-                gui_drawer,
-                users,
+                hitl_config=self._hitl_config,
+                client_message_manager=self._client_message_manager,
+                interprocess_record=self._interprocess_record,
+                gui_drawer=gui_drawer,
+                users=users,
             )
             # Bind the server input to user 0
             if self._hitl_config.networking.client_sync.server_input:
diff --git a/habitat-hitl/habitat_hitl/core/client_helper.py b/habitat-hitl/habitat_hitl/core/client_helper.py
index 7b02cbd631..1a05c91506 100644
--- a/habitat-hitl/habitat_hitl/core/client_helper.py
+++ b/habitat-hitl/habitat_hitl/core/client_helper.py
@@ -55,6 +55,14 @@ def __init__(
             self._on_client_disconnected
         )
 
+    def activate_users(self) -> None:
+        """
+        Reset idle timer for all users.
+        """
+        for user_index in range(self._users.max_user_count):
+            self._show_idle_kick_warning[user_index] = False
+            self._last_activity[user_index] = datetime.now()
+
     def _reset_user(self, user_index: int):
         self._show_idle_kick_warning[user_index] = False
         self._last_activity[user_index] = datetime.now()
@@ -82,10 +90,25 @@ def do_show_idle_kick_warning(self, user_index: int) -> Optional[bool]:
         """Indicates that the user should be warned that they will be kicked imminently."""
         return self._show_idle_kick_warning[user_index]
 
+    def get_idle_time(self, user_index: int) -> int:
+        """Returns the current idle time."""
+        if not self._kick_active:
+            return 0
+        now = datetime.now()
+        last_activity = self._last_activity[user_index]
+        span = now - last_activity
+        return int(span.total_seconds())
+
+    def get_remaining_idle_time(self, user_index: int) -> int:
+        """Returns the remaining idle time before kicking."""
+        if not self._kick_active:
+            return 0
+        return int(self._max_idle_duration - self.get_idle_time(user_index))
+
     def _update_idle_kick(
         self, user_index: int, is_user_idle_this_frame: bool
     ) -> None:
-        """Tracks whether the user is AFK. After some time, they will be kicked."""
+        """Tracks whether the user is idle. After some time, they will be kicked."""
 
         if not self._kick_active or user_index not in self._users.indices(
             self._connected_users
diff --git a/habitat-hitl/habitat_hitl/core/remote_client_state.py b/habitat-hitl/habitat_hitl/core/remote_client_state.py
index 6b28a7c7f7..8cc89c2363 100644
--- a/habitat-hitl/habitat_hitl/core/remote_client_state.py
+++ b/habitat-hitl/habitat_hitl/core/remote_client_state.py
@@ -15,6 +15,8 @@
 from habitat_hitl._internal.networking.interprocess_record import (
     InterprocessRecord,
 )
+from habitat_hitl.core.client_helper import ClientHelper
+from habitat_hitl.core.client_message_manager import ClientMessageManager
 from habitat_hitl.core.event import Event
 from habitat_hitl.core.gui_drawer import GuiDrawer
 from habitat_hitl.core.gui_input import GuiInput
@@ -36,6 +38,8 @@ class RemoteClientState:
 
     def __init__(
         self,
+        hitl_config,  # TODO: Coupling with ClientHelper
+        client_message_manager: ClientMessageManager,  # TODO: Coupling with ClientHelper
         interprocess_record: InterprocessRecord,
         gui_drawer: GuiDrawer,
         users: Users,
@@ -65,6 +69,12 @@ def __init__(
 
         self._client_loading: List[bool] = [False] * users.max_user_count
 
+        # TODO: Temporary coupling.
+        #       ClientHelper lifetime is directly coupled with RemoteClientState.
+        self._client_helper = ClientHelper(
+            hitl_config, self, client_message_manager, users
+        )
+
         # temp map VR button to key
         self._button_map = {
             0: GuiInput.KeyNS.ZERO,
diff --git a/habitat-hitl/test/test_example_apps.py b/habitat-hitl/test/test_example_apps.py
index 53d7301bbc..48024268a8 100644
--- a/habitat-hitl/test/test_example_apps.py
+++ b/habitat-hitl/test/test_example_apps.py
@@ -88,11 +88,12 @@ def test_hitl_example_rearrange(args):
     run_main_as_subprocess(args)
 
 
+@pytest.mark.skip(reason="Cannot currently be tested.")
 @pytest.mark.parametrize(
     "args",
     [
         (
-            "examples/hitl/rearrange_v2/rearrange_v2.py",
+            "examples/hitl/rearrange_v2/main.py",
             "--config-dir",
             "habitat-hitl/test/config",
             "+experiment=smoke_test",

From bb2167d50938f7d0aeb6c7750599bc622914b4e2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mika=C3=ABl=20Dallaire=20C=C3=B4t=C3=A9?=
 <110583667+0mdc@users.noreply.github.com>
Date: Mon, 20 May 2024 14:58:26 -0400
Subject: [PATCH 27/88] HITL - Rearrange session handling (#1965)

* Add session management.

* Formatting changes.

* Add clarifications to episode resolution.

* Document temporary hack to check for client-side loading status.

* Review pass - variable renaming and camera matrix chaching.
---
 examples/hitl/rearrange_v2/app_state_base.py  |   6 +
 .../rearrange_v2/app_state_end_session.py     |  59 +++++++
 .../rearrange_v2/app_state_load_episode.py    | 125 +++++++++++++++
 examples/hitl/rearrange_v2/app_state_lobby.py |   4 +-
 .../rearrange_v2/app_state_start_screen.py    | 126 +++++++++++++++
 .../rearrange_v2/app_state_start_session.py   | 149 ++++++++++++++++++
 examples/hitl/rearrange_v2/app_states.py      |  46 +++++-
 examples/hitl/rearrange_v2/rearrange_v2.py    |  27 +++-
 examples/hitl/rearrange_v2/session.py         |  32 ++++
 examples/hitl/rearrange_v2/util.py            |  15 ++
 10 files changed, 579 insertions(+), 10 deletions(-)
 create mode 100644 examples/hitl/rearrange_v2/app_state_end_session.py
 create mode 100644 examples/hitl/rearrange_v2/app_state_load_episode.py
 create mode 100644 examples/hitl/rearrange_v2/app_state_start_screen.py
 create mode 100644 examples/hitl/rearrange_v2/app_state_start_session.py
 create mode 100644 examples/hitl/rearrange_v2/session.py

diff --git a/examples/hitl/rearrange_v2/app_state_base.py b/examples/hitl/rearrange_v2/app_state_base.py
index e8633d35c4..5a2238e748 100644
--- a/examples/hitl/rearrange_v2/app_state_base.py
+++ b/examples/hitl/rearrange_v2/app_state_base.py
@@ -63,3 +63,9 @@ def _status_message(self, message: str) -> None:
     def _kick_all_users(self) -> None:
         "Kick all users."
         self._app_service.remote_client_state.kick(Mask.ALL)
+
+    def _is_server_gui_enabled(self) -> bool:
+        "Returns true if the local server GUI is available."
+        return (
+            not self._app_service.hitl_config.experimental.headless.do_headless
+        )
diff --git a/examples/hitl/rearrange_v2/app_state_end_session.py b/examples/hitl/rearrange_v2/app_state_end_session.py
new file mode 100644
index 0000000000..c5691e14f3
--- /dev/null
+++ b/examples/hitl/rearrange_v2/app_state_end_session.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Meta Platforms, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Optional
+
+from app_data import AppData
+from app_state_base import AppStateBase
+from app_states import create_app_state_reset
+from session import Session
+from util import get_top_down_view
+
+from habitat_hitl.app_states.app_service import AppService
+from habitat_hitl.core.user_mask import Mask
+
+# Duration of the end session message, before users are kicked.
+SESSION_END_DELAY = 5.0
+
+
+class AppStateEndSession(AppStateBase):
+    """
+    * Indicate users that the session is terminated.
+    * Upload collected data.
+    """
+
+    def __init__(
+        self, app_service: AppService, app_data: AppData, session: Session
+    ):
+        super().__init__(app_service, app_data)
+        self._session = session
+        self._elapsed_time = 0.0
+        self._save_keyframes = False
+
+        self._status = "Session ended."
+        if len(session.error) > 0:
+            self._status += f"\nError: {session.error}"
+
+    def get_next_state(self) -> Optional[AppStateBase]:
+        if self._elapsed_time > SESSION_END_DELAY:
+            self._end_session()
+            return create_app_state_reset(self._app_service, self._app_data)
+        return None
+
+    def sim_update(self, dt: float, post_sim_update_dict):
+        # Top-down view.
+        cam_matrix = get_top_down_view(self._app_service.sim)
+        post_sim_update_dict["cam_transform"] = cam_matrix
+        self._app_service._client_message_manager.update_camera_transform(
+            cam_matrix, destination_mask=Mask.ALL
+        )
+
+        self._status_message(self._status)
+        self._elapsed_time += dt
+
+    def _end_session(self):
+        # TODO: Data collection.
+        pass
diff --git a/examples/hitl/rearrange_v2/app_state_load_episode.py b/examples/hitl/rearrange_v2/app_state_load_episode.py
new file mode 100644
index 0000000000..6d54adcaf3
--- /dev/null
+++ b/examples/hitl/rearrange_v2/app_state_load_episode.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Meta Platforms, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Optional
+
+from app_data import AppData
+from app_state_base import AppStateBase
+from app_states import (
+    create_app_state_cancel_session,
+    create_app_state_end_session,
+    create_app_state_start_screen,
+)
+from session import Session
+from util import get_top_down_view
+
+from habitat_hitl.app_states.app_service import AppService
+from habitat_hitl.core.user_mask import Mask
+
+
+class AppStateLoadEpisode(AppStateBase):
+    """
+    Load an episode.
+    A loading screen is shown while the content loads.
+    * If a next episode exists, launch RearrangeV2.
+    * If all episodes are done, end session.
+    * If any user disconnects, cancel the session.
+    """
+
+    def __init__(
+        self, app_service: AppService, app_data: AppData, session: Session
+    ):
+        super().__init__(app_service, app_data)
+        self._session = session
+        self._loading = True
+        self._session_ended = False
+        self._frame_number = 0
+        self._save_keyframes = False
+
+    def get_next_state(self) -> Optional[AppStateBase]:
+        if self._cancel:
+            return create_app_state_cancel_session(
+                self._app_service,
+                self._app_data,
+                self._session,
+                "User disconnected.",
+            )
+        if self._session_ended:
+            return create_app_state_end_session(
+                self._app_service, self._app_data, self._session
+            )
+        # When all clients finish loading, show the start screen.
+        if not self._loading:
+            return create_app_state_start_screen(
+                self._app_service, self._app_data, self._session
+            )
+        return None
+
+    def sim_update(self, dt: float, post_sim_update_dict):
+        self._status_message("Loading...")
+
+        # Skip a frame so that the status message reaches the client before the server loads the scene and blocks.
+        if self._frame_number == 1:
+            self._increment_episode()
+        # Once the scene loaded, show a top-down view.
+        elif self._frame_number > 1:
+            cam_matrix = get_top_down_view(self._app_service.sim)
+            post_sim_update_dict["cam_transform"] = cam_matrix
+            self._app_service._client_message_manager.update_camera_transform(
+                cam_matrix, destination_mask=Mask.ALL
+            )
+            # Wait for clients to signal that content finished loading on their end.
+            # HACK: The server isn't immediately aware that clients are loading. For now, we simply skip some frames.
+            # TODO: Use the keyframe ID from 'ClientMessageManager.set_server_keyframe_id()' to find the when the loading state is up-to-date.
+            if self._frame_number > 20:
+                any_client_loading = False
+                for user_index in range(self._app_data.max_user_count):
+                    if self._app_service.remote_client_state._client_loading[
+                        user_index
+                    ]:
+                        any_client_loading = True
+                        break
+                if not any_client_loading:
+                    self._loading = False
+
+        self._frame_number += 1
+
+    def _increment_episode(self):
+        session = self._session
+        assert session.episode_ids is not None
+        if session.current_episode_index < len(session.episode_ids):
+            self._set_episode(session.current_episode_index)
+            session.current_episode_index += 1
+        else:
+            self._session_ended = True
+
+    def _set_episode(self, episode_index: int):
+        session = self._session
+
+        # Set the ID of the next episode to play in lab.
+        next_episode_id = session.episode_ids[episode_index]
+        print(f"Next episode index: {next_episode_id}.")
+        try:
+            next_episode_index = int(next_episode_id)
+            self._app_service.episode_helper.set_next_episode_by_index(
+                next_episode_index
+            )
+        except Exception as e:
+            print(f"ERROR: Invalid episode index {next_episode_id}. {e}")
+            print("Loading episode index 0.")
+            self._app_service.episode_helper.set_next_episode_by_index(0)
+
+        # Once an episode ID has been set, lab needs to be reset to load the episode.
+        self._app_service.end_episode(do_reset=True)
+
+        # Signal the clients that the scene has changed.
+        client_message_manager = self._app_service.client_message_manager
+        if client_message_manager:
+            client_message_manager.signal_scene_change(Mask.ALL)
+
+        # Save a keyframe. This propagates the new content to the clients, initiating client-side loading.
+        # Beware that the client "loading" state won't immediately be visible to the server.
+        self._app_service.sim.gfx_replay_manager.save_keyframe()
diff --git a/examples/hitl/rearrange_v2/app_state_lobby.py b/examples/hitl/rearrange_v2/app_state_lobby.py
index b23ca2b369..083aaa000f 100644
--- a/examples/hitl/rearrange_v2/app_state_lobby.py
+++ b/examples/hitl/rearrange_v2/app_state_lobby.py
@@ -8,7 +8,7 @@
 
 from app_data import AppData
 from app_state_base import AppStateBase
-from app_states import create_app_state_rearrange
+from app_states import create_app_state_start_session
 
 from habitat_hitl.app_states.app_service import AppService
 
@@ -51,7 +51,7 @@ def get_next_state(self) -> Optional[AppStateBase]:
             == self._app_data.max_user_count
             and self._time_since_last_connection > START_SESSION_DELAY
         ):
-            return create_app_state_rearrange(
+            return create_app_state_start_session(
                 self._app_service, self._app_data
             )
         return None
diff --git a/examples/hitl/rearrange_v2/app_state_start_screen.py b/examples/hitl/rearrange_v2/app_state_start_screen.py
new file mode 100644
index 0000000000..3773525358
--- /dev/null
+++ b/examples/hitl/rearrange_v2/app_state_start_screen.py
@@ -0,0 +1,126 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Meta Platforms, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import List, Optional
+
+from app_data import AppData
+from app_state_base import AppStateBase
+from app_states import (
+    create_app_state_cancel_session,
+    create_app_state_rearrange,
+)
+from session import Session
+from util import get_top_down_view
+
+from habitat_hitl.app_states.app_service import AppService
+from habitat_hitl.core.client_message_manager import UIButton
+from habitat_hitl.core.key_mapping import KeyCode
+from habitat_hitl.core.text_drawer import TextOnScreenAlignment
+from habitat_hitl.core.user_mask import Mask
+
+START_BUTTON_ID = "start"
+START_SCREEN_TIMEOUT = 180.0
+SKIP_START_SCREEN = False
+
+
+class AppStateStartScreen(AppStateBase):
+    """
+    Start screen with a "Start" button that all users must press before starting the session.
+    Cancellable.
+    """
+
+    def __init__(
+        self, app_service: AppService, app_data: AppData, session: Session
+    ):
+        super().__init__(app_service, app_data)
+        self._session = session
+        self._has_user_pressed_start_button: List[bool] = [
+            False
+        ] * self._app_data.max_user_count
+        self._elapsed_time: float = 0.0
+        self._timeout = False  # TODO: Error management
+        self._save_keyframes = True
+        self._cam_matrix = get_top_down_view(self._app_service.sim)
+
+    def get_next_state(self) -> Optional[AppStateBase]:
+        if self._cancel:
+            error = "Timeout" if self._timeout else "User disconnected"
+            return create_app_state_cancel_session(
+                self._app_service, self._app_data, self._session, error
+            )
+
+        # If all users pressed the "Start" button, begin the session.
+        ready_to_start = True
+        for user_ready in self._has_user_pressed_start_button:
+            ready_to_start &= user_ready
+        if ready_to_start or SKIP_START_SCREEN:
+            return create_app_state_rearrange(
+                self._app_service, self._app_data, self._session
+            )
+
+        return None
+
+    def sim_update(self, dt: float, post_sim_update_dict):
+        # Top-down view.
+        cam_matrix = self._cam_matrix
+        post_sim_update_dict["cam_transform"] = cam_matrix
+        self._app_service._client_message_manager.update_camera_transform(
+            cam_matrix, destination_mask=Mask.ALL
+        )
+
+        # Time limit to start the experiment.
+        self._elapsed_time += dt
+        remaining_time = START_SCREEN_TIMEOUT - self._elapsed_time
+        if remaining_time <= 0:
+            self._cancel = True
+            self._timeout = True
+            return
+        remaining_time_int = int(remaining_time)
+        title = f"New Session (Expires in: {remaining_time_int}s)"
+
+        # Show dialogue box with "Start" button.
+        for user_index in range(self._app_data.max_user_count):
+            button_pressed = (
+                self._app_service.remote_client_state.ui_button_pressed(
+                    user_index, START_BUTTON_ID
+                )
+            )
+            self._has_user_pressed_start_button[user_index] |= button_pressed
+
+            if not self._has_user_pressed_start_button[user_index]:
+                self._app_service.client_message_manager.show_modal_dialogue_box(
+                    title,
+                    "Press 'Start' to begin the experiment.",
+                    [UIButton(START_BUTTON_ID, "Start", True)],
+                    destination_mask=Mask.from_index(user_index),
+                )
+            else:
+                self._app_service.client_message_manager.show_modal_dialogue_box(
+                    title,
+                    "Waiting for other participants...",
+                    [UIButton(START_BUTTON_ID, "Start", False)],
+                    destination_mask=Mask.from_index(user_index),
+                )
+
+        # Server-only: Press numeric keys to start episode on behalf of users.
+        if self._is_server_gui_enabled():
+            server_message = "Press numeric keys to start on behalf of users."
+            first_key = int(KeyCode.ONE)
+            for user_index in range(len(self._has_user_pressed_start_button)):
+                if self._app_service.gui_input.get_key_down(
+                    KeyCode(first_key + user_index)
+                ):
+                    self._has_user_pressed_start_button[user_index] = True
+                user_ready = self._has_user_pressed_start_button[user_index]
+                server_message += f"\n[{user_index + 1}]: User {user_index}: {'Ready' if user_ready else 'Not ready'}."
+
+            self._app_service.text_drawer.add_text(
+                server_message,
+                TextOnScreenAlignment.TOP_LEFT,
+                text_delta_x=0,
+                text_delta_y=-50,
+                destination_mask=Mask.NONE,
+            )
diff --git a/examples/hitl/rearrange_v2/app_state_start_session.py b/examples/hitl/rearrange_v2/app_state_start_session.py
new file mode 100644
index 0000000000..3f781236f0
--- /dev/null
+++ b/examples/hitl/rearrange_v2/app_state_start_session.py
@@ -0,0 +1,149 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Meta Platforms, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import List, Optional
+
+from app_data import AppData
+from app_state_base import AppStateBase
+from app_states import (
+    create_app_state_cancel_session,
+    create_app_state_load_episode,
+)
+from session import Session
+
+from habitat_hitl.app_states.app_service import AppService
+
+
+class AppStateStartSession(AppStateBase):
+    def __init__(self, app_service: AppService, app_data: AppData):
+        super().__init__(app_service, app_data)
+        self._new_session: Optional[Session] = None
+        self._save_keyframes = False
+
+    def get_next_state(self) -> Optional[AppStateBase]:
+        episode_ids = self._try_get_episode_ids()
+        if episode_ids is not None:
+            # Start the session.
+            self._new_session = Session(
+                self._app_service.config,
+                list(episode_ids),
+                dict(self._app_data.connected_users),
+            )
+
+            if self._cancel:
+                return create_app_state_cancel_session(
+                    self._app_service,
+                    self._app_data,
+                    self._new_session,
+                    "User disconnected",
+                )
+            else:
+                return create_app_state_load_episode(
+                    self._app_service, self._app_data, self._new_session
+                )
+        else:
+            # Create partial session record for data collection.
+            self._new_session = Session(
+                self._app_service.config,
+                [],
+                dict(self._app_data.connected_users),
+            )
+            return create_app_state_cancel_session(
+                self._app_service,
+                self._app_data,
+                self._new_session,
+                "Invalid session",
+            )
+
+    def _try_get_episode_ids(self) -> Optional[List[str]]:
+        """
+        Attempt to get episodes from client connection parameters.
+        Episode IDs are indices within the episode sets.
+
+        Format: {lower_bound_inclusive}-{upper_bound_exclusive} (e.g. "100-110").
+
+        Returns None if the episode set cannot be resolved. This can happen in multiple cases:
+        * 'episodes' field is missing from connection parameters.
+        * Users are requesting different episodes in a multiplayer session, indicating a matching issue.
+        * Invalid 'episodes' format.
+        * Episode indices out of bounds.
+        """
+        data = self._app_data
+
+        # Sanity checking.
+        if len(data.connected_users) == 0:
+            print("No user connected. Cancelling session.")
+            return None
+        connection_record = list(data.connected_users.values())[0]
+
+        # Validate that episodes are selected.
+        if "episodes" not in connection_record:
+            print("Users did not request episodes. Cancelling session.")
+            return None
+        episodes_str = connection_record["episodes"]
+
+        # Validate that all users are requesting the same episodes.
+        for connection_record in data.connected_users.values():
+            if connection_record["episodes"] != episodes_str:
+                print(
+                    "Users are requesting different episodes! Cancelling session."
+                )
+                return None
+
+        # Validate that the episode set is not empty.
+        if episodes_str is None or len(episodes_str) == 0:
+            print("Users did not request episodes. Cancelling session.")
+            return None
+
+        # Format: {lower_bound}-{upper_bound} E.g. 100-110
+        # Upper bound is exclusive.
+        episode_range_str = episodes_str.split("-")
+        if len(episode_range_str) != 2:
+            print("Invalid episode range. Cancelling session.")
+            return None
+
+        # Validate that episodes are numeric.
+        start_episode_id = (
+            int(episode_range_str[0])
+            if episode_range_str[0].isdecimal()
+            else None
+        )
+        last_episode_id = (
+            int(episode_range_str[1])
+            if episode_range_str[0].isdecimal()
+            else None
+        )
+        if (
+            start_episode_id is None
+            or last_episode_id is None
+            or start_episode_id < 0
+        ):
+            print("Invalid episode names. Cancelling session.")
+            return None
+
+        total_episode_count = len(
+            self._app_service.episode_helper._episode_iterator.episodes
+        )
+
+        # Validate episode range.
+        if start_episode_id >= total_episode_count:
+            print("Invalid episode names. Cancelling session.")
+            return None
+
+        if last_episode_id >= total_episode_count:
+            last_episode_id = total_episode_count
+
+        # If in decreasing order, swap.
+        if start_episode_id > last_episode_id:
+            temp = last_episode_id
+            last_episode_id = start_episode_id
+            start_episode_id = temp
+
+        episode_ids: List[str] = []
+        for episode_id in range(start_episode_id, last_episode_id):
+            episode_ids.append(str(episode_id))
+
+        return episode_ids
diff --git a/examples/hitl/rearrange_v2/app_states.py b/examples/hitl/rearrange_v2/app_states.py
index 9729321ce8..7df1a93b3b 100644
--- a/examples/hitl/rearrange_v2/app_states.py
+++ b/examples/hitl/rearrange_v2/app_states.py
@@ -10,6 +10,7 @@
 
 from app_data import AppData
 from app_state_base import AppStateBase
+from session import Session
 
 from habitat_hitl.app_states.app_service import AppService
 
@@ -30,9 +31,50 @@ def create_app_state_lobby(
     return AppStateLobby(app_service, app_data)
 
 
-def create_app_state_rearrange(
+def create_app_state_start_session(
     app_service: AppService, app_data: AppData
+) -> AppStateBase:
+    from app_state_start_session import AppStateStartSession
+
+    return AppStateStartSession(app_service, app_data)
+
+
+def create_app_state_load_episode(
+    app_service: AppService, app_data: AppData, session: Session
+) -> AppStateBase:
+    from app_state_load_episode import AppStateLoadEpisode
+
+    return AppStateLoadEpisode(app_service, app_data, session)
+
+
+def create_app_state_start_screen(
+    app_service: AppService, app_data: AppData, session: Session
+) -> AppStateBase:
+    from app_state_start_screen import AppStateStartScreen
+
+    return AppStateStartScreen(app_service, app_data, session)
+
+
+def create_app_state_rearrange(
+    app_service: AppService, app_data: AppData, session: Session
 ) -> AppStateBase:
     from rearrange_v2 import AppStateRearrangeV2
 
-    return AppStateRearrangeV2(app_service, app_data)
+    return AppStateRearrangeV2(app_service, app_data, session)
+
+
+def create_app_state_end_session(
+    app_service: AppService, app_data: AppData, session: Session
+) -> AppStateBase:
+    from app_state_end_session import AppStateEndSession
+
+    return AppStateEndSession(app_service, app_data, session)
+
+
+def create_app_state_cancel_session(
+    app_service: AppService, app_data: AppData, session: Session, error: str
+) -> AppStateBase:
+    from app_state_end_session import AppStateEndSession
+
+    session.error = error
+    return AppStateEndSession(app_service, app_data, session)
diff --git a/examples/hitl/rearrange_v2/rearrange_v2.py b/examples/hitl/rearrange_v2/rearrange_v2.py
index 51593a1254..6912e3dff2 100644
--- a/examples/hitl/rearrange_v2/rearrange_v2.py
+++ b/examples/hitl/rearrange_v2/rearrange_v2.py
@@ -13,7 +13,11 @@
 import numpy as np
 from app_data import AppData
 from app_state_base import AppStateBase
-from app_states import create_app_state_reset
+from app_states import (
+    create_app_state_cancel_session,
+    create_app_state_load_episode,
+)
+from session import Session
 from ui import UI
 from util import UP
 from world import World
@@ -257,10 +261,13 @@ class AppStateRearrangeV2(AppStateBase):
     Multiplayer rearrangement HITL application.
     """
 
-    def __init__(self, app_service: AppService, app_data: AppData):
+    def __init__(
+        self, app_service: AppService, app_data: AppData, session: Session
+    ):
         super().__init__(app_service, app_data)
         self._save_keyframes = False  # Done on env step (rearrange_sim).
         self._app_service = app_service
+        self._session = session
         self._gui_agent_controllers = self._app_service.gui_agent_controllers
         self._num_agents = len(self._gui_agent_controllers)
         self._users = self._app_service.users
@@ -292,9 +299,17 @@ def __init__(self, app_service: AppService, app_data: AppData):
         self.on_environment_reset(None)
 
     def get_next_state(self) -> Optional[AppStateBase]:
-        # If cancelled, skip upload and clean-up.
-        if self._cancel or self._is_episode_finished():
-            return create_app_state_reset(self._app_service, self._app_data)
+        if self._cancel:
+            return create_app_state_cancel_session(
+                self._app_service,
+                self._app_data,
+                self._session,
+                "User disconnected",
+            )
+        elif self._is_episode_finished():
+            return create_app_state_load_episode(
+                self._app_service, self._app_data, self._session
+            )
         else:
             return None
 
@@ -428,7 +443,7 @@ def _update_help_text(self, user_index: int):
             )
 
     def sim_update(self, dt: float, post_sim_update_dict):
-        if not self._app_service.hitl_config.experimental.headless.do_headless:
+        if self._is_server_gui_enabled():
             # Server GUI exit.
             if (
                 not self._app_service.hitl_config.networking.enable
diff --git a/examples/hitl/rearrange_v2/session.py b/examples/hitl/rearrange_v2/session.py
new file mode 100644
index 0000000000..493dfcc5c5
--- /dev/null
+++ b/examples/hitl/rearrange_v2/session.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Meta Platforms, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Any, Dict, List
+
+from habitat_hitl.core.types import ConnectionRecord
+
+
+class Session:
+    """
+    Data for a single RearrangeV2 session.
+    A session is defined as a sequence of episodes done by a fixed set of users.
+    """
+
+    def __init__(
+        self,
+        config: Any,
+        episode_ids: List[str],
+        connection_records: Dict[int, ConnectionRecord],
+    ):
+        self.success = False
+        self.episode_ids = episode_ids
+        self.current_episode_index = 0
+        self.connection_records = connection_records
+        self.error = ""  # Use this to display error that causes termination
+
+        # Use the port as a discriminator for when there are multiple concurrent servers.
+        output_folder_suffix = str(config.habitat_hitl.networking.port)
+        self.output_folder = f"output_{output_folder_suffix}"
diff --git a/examples/hitl/rearrange_v2/util.py b/examples/hitl/rearrange_v2/util.py
index 4d3b14d0d7..f60e645227 100644
--- a/examples/hitl/rearrange_v2/util.py
+++ b/examples/hitl/rearrange_v2/util.py
@@ -8,6 +8,11 @@
 
 import magnum as mn
 
+# TODO: Move outside of tutorial.
+from habitat_hitl.environment.hitl_tutorial import (
+    _lookat_bounding_box_top_down,
+)
+
 UP = mn.Vector3(0, 1, 0)
 FWD = mn.Vector3(0, 0, 1)
 
@@ -17,6 +22,16 @@ def timestamp() -> str:
     return str(int(time()))
 
 
+def get_top_down_view(sim) -> mn.Matrix4:
+    """
+    Get a top-down view of the current scene.
+    """
+    scene_root_node = sim.get_active_scene_graph().get_root_node()
+    scene_target_bb: mn.Range3D = scene_root_node.cumulative_bb
+    look_at = _lookat_bounding_box_top_down(200, scene_target_bb, FWD)
+    return mn.Matrix4.look_at(look_at[0], look_at[1], UP)
+
+
 def get_empty_view(sim) -> mn.Matrix4:
     """
     Get a view looking into the void.

From a9a29120de370d8b8034760800978ada5388e4f9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mika=C3=ABl=20Dallaire=20C=C3=B4t=C3=A9?=
 <110583667+0mdc@users.noreply.github.com>
Date: Mon, 20 May 2024 19:04:31 -0400
Subject: [PATCH 28/88] HITL - Data collection (#1967)

* Add session management.

* Formatting changes.

* Add clarifications to episode resolution.

* Document temporary hack to check for client-side loading status.

* Add session recorder, ui events and data upload.

* Change path handling in session upload code.
---
 .../rearrange_v2/app_state_end_session.py     |  63 +++++++++-
 .../config/language_rearrange.yaml            |   5 +
 examples/hitl/rearrange_v2/rearrange_v2.py    | 119 +++++++++++++++---
 examples/hitl/rearrange_v2/session.py         |  11 +-
 .../hitl/rearrange_v2/session_recorder.py     |  85 +++++++++++++
 examples/hitl/rearrange_v2/ui.py              |  72 +++++++++++
 6 files changed, 335 insertions(+), 20 deletions(-)
 create mode 100644 examples/hitl/rearrange_v2/session_recorder.py

diff --git a/examples/hitl/rearrange_v2/app_state_end_session.py b/examples/hitl/rearrange_v2/app_state_end_session.py
index c5691e14f3..bc5bb5e54c 100644
--- a/examples/hitl/rearrange_v2/app_state_end_session.py
+++ b/examples/hitl/rearrange_v2/app_state_end_session.py
@@ -4,15 +4,23 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 
+import os
+import shutil
 from typing import Optional
 
 from app_data import AppData
 from app_state_base import AppStateBase
 from app_states import create_app_state_reset
+from s3_upload import (
+    generate_unique_session_id,
+    make_s3_filename,
+    upload_file_to_s3,
+)
 from session import Session
 from util import get_top_down_view
 
 from habitat_hitl.app_states.app_service import AppService
+from habitat_hitl.core.serialize_utils import save_as_json_gzip
 from habitat_hitl.core.user_mask import Mask
 
 # Duration of the end session message, before users are kicked.
@@ -55,5 +63,56 @@ def sim_update(self, dt: float, post_sim_update_dict):
         self._elapsed_time += dt
 
     def _end_session(self):
-        # TODO: Data collection.
-        pass
+        session = self._session
+        if session is None:
+            print("Null session. Skipping S3 upload.")
+            return
+
+        # Finalize session.
+        if self._session.error == "":
+            session.success = True
+        session.session_recorder.end_session(self._session.error)
+
+        # Get data collection parameters.
+        try:
+            config = self._app_service.config
+            data_collection_config = config.rearrange_v2.data_collection
+            s3_path = data_collection_config.s3_path
+            s3_subdir = "complete" if session.success else "incomplete"
+            s3_path = os.path.join(s3_path, s3_subdir)
+
+            # Use the port as a discriminator for when there are multiple concurrent servers.
+            output_folder_suffix = str(config.habitat_hitl.networking.port)
+            output_folder = f"output_{output_folder_suffix}"
+
+            output_file_name = data_collection_config.output_file_name
+            output_file = f"{output_file_name}.json.gz"
+
+        except Exception as e:
+            print(f"Invalid data collection config. Skipping S3 upload. {e}")
+            return
+
+        # Delete previous output directory
+        if os.path.exists(output_folder):
+            shutil.rmtree(output_folder)
+
+        # Create new output directory
+        os.makedirs(output_folder)
+        json_path = os.path.join(output_folder, output_file)
+        save_as_json_gzip(session.session_recorder, json_path)
+
+        # Generate unique session ID
+        session_id = generate_unique_session_id(
+            session.episode_ids, session.connection_records
+        )
+
+        # Upload output directory
+        orig_file_names = [
+            f
+            for f in os.listdir(output_folder)
+            if os.path.isfile(os.path.join(output_folder, f))
+        ]
+        for orig_file_name in orig_file_names:
+            local_file_path = os.path.join(output_folder, orig_file_name)
+            s3_file_name = make_s3_filename(session_id, orig_file_name)
+            upload_file_to_s3(local_file_path, s3_file_name, s3_path)
diff --git a/examples/hitl/rearrange_v2/config/language_rearrange.yaml b/examples/hitl/rearrange_v2/config/language_rearrange.yaml
index 9c47487dac..2d30872ce9 100644
--- a/examples/hitl/rearrange_v2/config/language_rearrange.yaml
+++ b/examples/hitl/rearrange_v2/config/language_rearrange.yaml
@@ -79,3 +79,8 @@ habitat:
   dataset:
     type: "CollaborationDataset-v0"
     data_path: data/datasets/hssd/llm_rearrange/v2/60scenes_dataset_776eps_with_eval.json.gz
+
+rearrange_v2:
+  data_collection:
+    s3_path: "Placeholder/"
+    output_file_name: "session"
diff --git a/examples/hitl/rearrange_v2/rearrange_v2.py b/examples/hitl/rearrange_v2/rearrange_v2.py
index 6912e3dff2..ba13986849 100644
--- a/examples/hitl/rearrange_v2/rearrange_v2.py
+++ b/examples/hitl/rearrange_v2/rearrange_v2.py
@@ -7,7 +7,7 @@
 
 from __future__ import annotations
 
-from typing import Dict, List, Optional
+from typing import Any, Dict, List, Optional
 
 import magnum as mn
 import numpy as np
@@ -41,10 +41,14 @@
 PIP_VIEWPORT_ID = 0  # ID of the picture-in-picture viewport that shows other agent's perspective.
 
 
-class DataLogger:
-    def __init__(self, app_service: AppService):
+class FrameRecorder:
+    def __init__(
+        self, app_service: AppService, app_data: AppData, world: World
+    ):
         self._app_service = app_service
+        self._app_data = app_data
         self._sim = app_service.sim
+        self._world = world
 
     def get_num_agents(self):
         return len(self._sim.agents_mgr._all_agent_data)
@@ -87,15 +91,29 @@ def get_objects_state(self):
             )
         return object_states
 
-    def record_state(self, task_completed: bool = False):
-        agent_states = self.get_agents_state()
-        object_states = self.get_objects_state()
-
-        self._app_service.step_recorder.record("agent_states", agent_states)
-        self._app_service.step_recorder.record("object_states", object_states)
-        self._app_service.step_recorder.record(
-            "task_completed", task_completed
-        )
+    def record_state(
+        self, elapsed_time: float, user_data: List[UserData]
+    ) -> Dict[str, Any]:
+        data: Dict[str, Any] = {
+            "t": elapsed_time,
+            "users": [],
+            "object_states": self.get_objects_state(),
+            "agent_states": self.get_agents_state(),
+        }
+
+        for user_index in range(len(user_data)):
+            u = user_data[user_index]
+            user_data_dict = {
+                "task_completed": u.episode_finished,
+                "task_succeeded": u.episode_success,
+                "camera_transform": u.cam_transform,
+                "held_object": u.ui._held_object_id,
+                "hovered_object": u.ui._hover_selection.object_id,
+                "events": u.pop_ui_events(),
+            }
+            data["users"].append(user_data_dict)
+
+        return data
 
 
 class UserData:
@@ -124,6 +142,9 @@ def __init__(
         self.task_instruction = ""
         self.pip_initialized = False
 
+        # Events for data collection.
+        self.ui_events: List[Dict[str, Any]] = []
+
         # If in remote mode, get the remote input. Else get the server (local) input.
         self.gui_input = (
             app_service.remote_client_state.get_gui_input(user_index)
@@ -149,6 +170,12 @@ def __init__(
             camera_helper=self.camera_helper,
         )
 
+        # Register UI callbacks
+        self.ui.on_pick.registerCallback(self._on_pick)
+        self.ui.on_place.registerCallback(self._on_place)
+        self.ui.on_open.registerCallback(self._on_open)
+        self.ui.on_close.registerCallback(self._on_close)
+
         # HACK: Work around GuiController input.
         # TODO: Communicate to the controller via action hints.
         gui_agent_controller._gui_input = self.gui_input
@@ -243,6 +270,11 @@ def draw_pip_viewport(self, pip_user_data: UserData):
             destination_mask=Mask.from_index(self.user_index),
         )
 
+    def pop_ui_events(self) -> List[Dict[str, Any]]:
+        events = list(self.ui_events)
+        self.ui_events.clear()
+        return events
+
     def _get_camera_lookat_pos(self) -> mn.Vector3:
         agent_root = get_agent_art_obj_transform(
             self.app_service.sim,
@@ -255,6 +287,43 @@ def _get_camera_lookat_pos(self) -> mn.Vector3:
     def _is_user_idle_this_frame(self) -> bool:
         return not self.gui_input.get_any_input()
 
+    def _on_pick(self, e: UI.PickEventData):
+        self.ui_events.append(
+            {
+                "type": "pick",
+                "obj_handle": e.object_handle,
+                "obj_id": e.object_id,
+            }
+        )
+
+    def _on_place(self, e: UI.PlaceEventData):
+        self.ui_events.append(
+            {
+                "type": "place",
+                "obj_handle": e.object_handle,
+                "obj_id": e.object_id,
+                "receptacle_id": e.receptacle_id,
+            }
+        )
+
+    def _on_open(self, e: UI.OpenEventData):
+        self.ui_events.append(
+            {
+                "type": "open",
+                "obj_handle": e.object_handle,
+                "obj_id": e.object_id,
+            }
+        )
+
+    def _on_close(self, e: UI.CloseEventData):
+        self.ui_events.append(
+            {
+                "type": "close",
+                "obj_handle": e.object_handle,
+                "obj_id": e.object_id,
+            }
+        )
+
 
 class AppStateRearrangeV2(AppStateBase):
     """
@@ -295,6 +364,10 @@ def __init__(
                 )
             )
 
+        self._frame_recorder = FrameRecorder(
+            app_service, app_data, self._world
+        )
+
         # Reset the environment immediately.
         self.on_environment_reset(None)
 
@@ -322,9 +395,24 @@ def on_enter(self):
                 user_index
             ].gui_agent_controller._agent_idx
 
+        episode = self._app_service.episode_helper.current_episode
+        self._session.session_recorder.start_episode(
+            episode.episode_id,
+            episode.scene_id,
+            episode.scene_dataset_config,
+            user_index_to_agent_index_map,
+        )
+
     def on_exit(self):
         super().on_exit()
 
+        episode_success = all(
+            self._user_data[user_index].episode_success
+            for user_index in range(self._app_data.max_user_count)
+        )
+
+        self._session.session_recorder.end_episode(episode_success)
+
     def _is_episode_finished(self) -> bool:
         """
         Determines whether all users have finished their tasks.
@@ -501,9 +589,12 @@ def sim_update(self, dt: float, post_sim_update_dict):
 
         #  Collect data.
         self._elapsed_time += dt
+        # TODO: Always record with non-human agent.
         if self._is_any_user_active():
-            # TODO: Add data collection.
-            pass
+            frame_data = self._frame_recorder.record_state(
+                self._elapsed_time, self._user_data
+            )
+            self._session.session_recorder.record_frame(frame_data)
 
     def _is_any_user_active(self) -> bool:
         return any(
diff --git a/examples/hitl/rearrange_v2/session.py b/examples/hitl/rearrange_v2/session.py
index 493dfcc5c5..4b03d5867f 100644
--- a/examples/hitl/rearrange_v2/session.py
+++ b/examples/hitl/rearrange_v2/session.py
@@ -6,6 +6,8 @@
 
 from typing import Any, Dict, List
 
+from session_recorder import SessionRecorder
+
 from habitat_hitl.core.types import ConnectionRecord
 
 
@@ -25,8 +27,9 @@ def __init__(
         self.episode_ids = episode_ids
         self.current_episode_index = 0
         self.connection_records = connection_records
-        self.error = ""  # Use this to display error that causes termination
 
-        # Use the port as a discriminator for when there are multiple concurrent servers.
-        output_folder_suffix = str(config.habitat_hitl.networking.port)
-        self.output_folder = f"output_{output_folder_suffix}"
+        self.session_recorder = SessionRecorder(
+            config, connection_records, episode_ids
+        )
+
+        self.error = ""  # Use this to display error that causes termination
diff --git a/examples/hitl/rearrange_v2/session_recorder.py b/examples/hitl/rearrange_v2/session_recorder.py
new file mode 100644
index 0000000000..db7f2344a7
--- /dev/null
+++ b/examples/hitl/rearrange_v2/session_recorder.py
@@ -0,0 +1,85 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Meta Platforms, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Any, Dict, List
+
+from util import timestamp
+
+from habitat_hitl.core.types import ConnectionRecord
+
+
+class SessionRecorder:
+    def __init__(
+        self,
+        config: Dict[str, Any],
+        connection_records: Dict[int, ConnectionRecord],
+        episode_ids: List[str],
+    ):
+        self.data = {
+            "episode_ids": episode_ids,
+            "completed": False,
+            "error": "",
+            "start_timestamp": timestamp(),
+            "end_timestamp": timestamp(),
+            "config": config,
+            "frame_count": 0,
+            "users": [],
+            "episodes": [],
+        }
+
+        for user_index in range(len(connection_records)):
+            self.data["users"].append(
+                {
+                    "user_index": user_index,
+                    "connection_record": connection_records[user_index],
+                }
+            )
+
+    def end_session(self, error: str):
+        self.data["end_timestamp"] = timestamp()
+        self.data["completed"] = True
+        self.data["error"] = error
+
+    def start_episode(
+        self,
+        episode_id: str,
+        scene_id: str,
+        dataset: str,
+        user_index_to_agent_index_map: Dict[int, int],
+    ):
+        self.data["episodes"].append(
+            {
+                "episode_id": episode_id,
+                "scene_id": scene_id,
+                "start_timestamp": timestamp(),
+                "end_timestamp": timestamp(),
+                "completed": False,
+                "success": False,
+                "frame_count": 0,
+                "dataset": dataset,
+                "user_index_to_agent_index_map": user_index_to_agent_index_map,
+                "frames": [],
+            }
+        )
+
+    def end_episode(
+        self,
+        success: bool,
+    ):
+        self.data["episodes"][-1]["end_timestamp"] = timestamp()
+        self.data["episodes"][-1]["success"] = success
+        self.data["episodes"][-1]["completed"] = True
+
+    def record_frame(
+        self,
+        frame_data: Dict[str, Any],
+    ):
+        self.data["end_timestamp"] = timestamp()
+        self.data["frame_count"] += 1
+
+        self.data["episodes"][-1]["end_timestamp"] = timestamp()
+        self.data["episodes"][-1]["frame_count"] += 1
+        self.data["episodes"][-1]["frames"].append(frame_data)
diff --git a/examples/hitl/rearrange_v2/ui.py b/examples/hitl/rearrange_v2/ui.py
index 7991fb874c..7117167de0 100644
--- a/examples/hitl/rearrange_v2/ui.py
+++ b/examples/hitl/rearrange_v2/ui.py
@@ -4,6 +4,9 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 
+from __future__ import annotations
+
+from dataclasses import dataclass
 from datetime import datetime, timedelta
 from typing import List, Optional, Tuple
 
@@ -12,6 +15,7 @@
 
 from habitat.sims.habitat_simulator import sim_utilities
 from habitat.tasks.rearrange.rearrange_sim import RearrangeSim
+from habitat_hitl.core.event import Event
 from habitat_hitl.core.gui_drawer import GuiDrawer
 from habitat_hitl.core.gui_input import GuiInput
 from habitat_hitl.core.key_mapping import KeyCode, MouseButton
@@ -113,6 +117,49 @@ def place_selection_fn(gui_input: GuiInput) -> bool:
         )
         self._selections.append(self._place_selection)
 
+        # Set up user events
+        self._on_pick = Event()
+        self._on_place = Event()
+        self._on_open = Event()
+        self._on_close = Event()
+
+    @dataclass
+    class PickEventData:
+        object_id: int
+        object_handle: str
+
+    @property
+    def on_pick(self) -> Event:
+        return self._on_pick
+
+    @dataclass
+    class PlaceEventData:
+        object_id: int
+        object_handle: str
+        receptacle_id: int
+
+    @property
+    def on_place(self) -> Event:
+        return self._on_place
+
+    @dataclass
+    class OpenEventData:
+        object_id: int
+        object_handle: str
+
+    @property
+    def on_open(self) -> Event:
+        return self._on_open
+
+    @dataclass
+    class CloseEventData:
+        object_id: int
+        object_handle: str
+
+    @property
+    def on_close(self) -> Event:
+        return self._on_close
+
     def selection_discriminator_ignore_agents(self, object_id: int) -> bool:
         """Allow selection through agents."""
         return object_id not in self._world._agent_object_ids
@@ -186,6 +233,12 @@ def _pick_object(self, object_id: int) -> None:
                     self._held_object_id = object_id
                     self._place_selection.deselect()
                     self._world._all_held_object_ids.add(object_id)
+                    self._on_pick.invoke(
+                        UI.PickEventData(
+                            object_id=object_id,
+                            object_handle=rigid_object.handle,
+                        )
+                    )
 
     def _update_held_object_placement(self) -> None:
         """Update the location of the held object."""
@@ -228,6 +281,13 @@ def _place_object(self) -> None:
             self._held_object_id = None
             self._place_selection.deselect()
             self._world._all_held_object_ids.remove(object_id)
+            self._on_place.invoke(
+                UI.PlaceEventData(
+                    object_id=object_id,
+                    object_handle=rigid_object.handle,
+                    receptacle_id=self._place_selection.object_id,
+                )
+            )
 
     def _interact_with_object(self, object_id: int) -> None:
         """Open/close the selected object. Must be interactable."""
@@ -245,9 +305,21 @@ def _interact_with_object(self, object_id: int) -> None:
                     if link_id in self._world._opened_link_set:
                         sim_utilities.close_link(ao, link_index)
                         self._world._opened_link_set.remove(link_id)
+                        self._on_close.invoke(
+                            UI.OpenEventData(
+                                object_id=object_id,
+                                object_handle=ao.handle,
+                            )
+                        )
                     else:
                         sim_utilities.open_link(ao, link_index)
                         self._world._opened_link_set.add(link_id)
+                        self._on_open.invoke(
+                            UI.CloseEventData(
+                                object_id=object_id,
+                                object_handle=ao.handle,
+                            )
+                        )
 
     def _user_pos(self) -> mn.Vector3:
         """Get the translation of the agent controlled by the user."""

From 91481fdfd32d04ed1e551abcb94b85ae15a8921d Mon Sep 17 00:00:00 2001
From: Alexander Clegg <alexanderwclegg@gmail.com>
Date: Tue, 21 May 2024 08:19:27 -0700
Subject: [PATCH 29/88] [BE/CI] - remove auto downloads (#1962)

* Disable new clone protection for git lfs

* remove auto asset downloader from the baseline tests

* add ycb to test asset download

* add rearrange dataset to asset download step

* turn off clone protection in hitl test
---
 .circleci/config.yml           |  3 ++-
 test/test_baseline_trainers.py |  9 ---------
 test/test_baseline_training.py | 14 --------------
 3 files changed, 2 insertions(+), 24 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 7552ea33c7..674987b5e6 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -211,7 +211,7 @@ jobs:
               . activate habitat
               git lfs install
               conda install -y gitpython git-lfs
-              python -m habitat_sim.utils.datasets_download --uids ci_test_assets franka_panda hab_spot_arm hab3_bench_assets --data-path habitat-sim/data/ --no-replace --no-prune
+              python -m habitat_sim.utils.datasets_download --uids ci_test_assets franka_panda hab_spot_arm hab3_bench_assets ycb rearrange_dataset_v2 --data-path habitat-sim/data/ --no-replace --no-prune
       - run:
           name: Run sim benchmark
           command: |
@@ -284,6 +284,7 @@ jobs:
               . activate habitat; cd habitat-lab
               export PYTHONPATH=.:$PYTHONPATH
               export MULTI_PROC_OFFSET=0 && export MAGNUM_LOG=quiet && export HABITAT_SIM_LOG=quiet
+              export GIT_CLONE_PROTECTION_ACTIVE=false
               python -m habitat_sim.utils.datasets_download --uids hab3-episodes hab3_bench_assets habitat_humanoids hab_spot_arm ycb --data-path data/ --no-replace --no-prune
               python -m pytest habitat-hitl/test
       - run:
diff --git a/test/test_baseline_trainers.py b/test/test_baseline_trainers.py
index 0dfb9d5ec7..56eb64af59 100644
--- a/test/test_baseline_trainers.py
+++ b/test/test_baseline_trainers.py
@@ -20,7 +20,6 @@
     import torch
     import torch.distributed
 
-    import habitat_sim.utils.datasets_download as data_downloader
     from habitat_baselines.common.base_trainer import BaseRLTrainer
     from habitat_baselines.common.baseline_registry import baseline_registry
     from habitat_baselines.config.default import get_config
@@ -46,14 +45,6 @@
 from habitat_baselines.rl.ppo.evaluator import pause_envs
 
 
-@pytest.fixture(scope="module", autouse=True)
-def download_data():
-    # Download the needed datasets
-    data_downloader.main(
-        ["--uids", "rearrange_task_assets", "--no-replace", "--no-prune"]
-    )
-
-
 @pytest.mark.skipif(
     not baseline_installed, reason="baseline sub-module not installed"
 )
diff --git a/test/test_baseline_training.py b/test/test_baseline_training.py
index c1813b7e76..7cc38c7576 100644
--- a/test/test_baseline_training.py
+++ b/test/test_baseline_training.py
@@ -21,7 +21,6 @@
     import torch
     import torch.distributed
 
-    import habitat_sim.utils.datasets_download as data_downloader
     from habitat_baselines.common.baseline_registry import baseline_registry
     from habitat_baselines.config.default import get_config
 
@@ -37,19 +36,6 @@
     pygame_installed = False
 
 
-def setup_function(test_trainers):
-    # Download the needed datasets
-    data_downloader.main(
-        [
-            "--uids",
-            "rearrange_task_assets",
-            "hab3_bench_assets",
-            "--no-replace",
-            "--no-prune",
-        ]
-    )
-
-
 @pytest.mark.skipif(
     int(os.environ.get("TEST_BASELINE_SMALL", 0)) == 0,
     reason="Full training tests did not run. Need `export TEST_BASELINE_SMALL=1",

From c3963a618a8550fab87cb1817c519b13664d88c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mika=C3=ABl=20Dallaire=20C=C3=B4t=C3=A9?=
 <110583667+0mdc@users.noreply.github.com>
Date: Tue, 21 May 2024 11:58:44 -0400
Subject: [PATCH 30/88] HITL - Add end episode form and error reporting (#1968)

* Add session management.

* Formatting changes.

* Add clarifications to episode resolution.

* Document temporary hack to check for client-side loading status.

* Add session recorder, ui events and data upload.

* Add end episode form with error reporting.
---
 .../hitl/rearrange_v2/end_episode_form.py     | 269 ++++++++++++++++++
 examples/hitl/rearrange_v2/rearrange_v2.py    | 133 +++++++--
 2 files changed, 372 insertions(+), 30 deletions(-)
 create mode 100644 examples/hitl/rearrange_v2/end_episode_form.py

diff --git a/examples/hitl/rearrange_v2/end_episode_form.py b/examples/hitl/rearrange_v2/end_episode_form.py
new file mode 100644
index 0000000000..dfea6c44d8
--- /dev/null
+++ b/examples/hitl/rearrange_v2/end_episode_form.py
@@ -0,0 +1,269 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Meta Platforms, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import List, Optional
+
+from habitat_hitl.app_states.app_service import AppService
+from habitat_hitl.core.client_message_manager import UIButton, UITextbox
+from habitat_hitl.core.event import Event
+from habitat_hitl.core.user_mask import Mask
+
+
+@dataclass
+class ErrorReport:
+    """
+    Error reported by a user.
+    """
+
+    user_message: str
+
+
+@dataclass
+class FormData:
+    """
+    Data contained within the end episode form.
+    """
+
+    app_service: AppService
+    user_index: int
+    current_state: Optional[BaseFormState]
+
+    error_report_text: str
+
+    on_cancel: Event
+    on_episode_success: Event
+    on_error_reported: Event
+
+
+class BaseFormState:
+    """
+    Base state for a GUI form.
+    """
+
+    def __init__(self, data: FormData):
+        self._data = data
+
+    def step(self):
+        pass
+
+    def change_state(self, next_state: Optional[BaseFormState]):
+        self._data.current_state = next_state
+
+    def cancel(self):
+        self._data.on_cancel.invoke(None)
+        self.change_state(None)
+
+
+class EndEpisodeFormState(BaseFormState):
+    """
+    End episode form.
+    User is presented with the following options:
+    * End episode
+    * Report error
+    * Cancel
+    """
+
+    def __init__(self, data: FormData):
+        self._data = data
+
+    def step(self):
+        app_service = self._data.app_service
+        user_index = self._data.user_index
+
+        id_cancel = "cancel"
+        id_success = "success"
+        id_failure = "failure"
+        buttons: List[UIButton] = [
+            UIButton(id_cancel, "Cancel", enabled=True),
+            UIButton(id_success, "Yes", enabled=True),
+            UIButton(id_failure, "Report Error", enabled=True),
+        ]
+        app_service.client_message_manager.show_modal_dialogue_box(
+            "End Task",
+            "Was the task completed successfully?",
+            buttons,
+            destination_mask=Mask.from_index(user_index),
+        )
+        client_state = app_service.remote_client_state
+
+        # If cancel button is clicked.
+        if client_state.ui_button_pressed(user_index, id_cancel):
+            self.cancel()
+
+        # If episode finished button is clicked.
+        if client_state.ui_button_pressed(user_index, id_success):
+            self._data.on_episode_success.invoke(None)
+            self.change_state(EpisodeSuccessFormState(self._data))
+
+        # If report error button is clicked.
+        if client_state.ui_button_pressed(user_index, id_failure):
+            self.change_state(ErrorReportFormState(self._data))
+
+
+class EpisodeSuccessFormState(BaseFormState):
+    """
+    Episode success form.
+    User can cancel at any time.
+    """
+
+    def __init__(self, data: FormData):
+        self._data = data
+
+    def step(self):
+        app_service = self._data.app_service
+        user_index = self._data.user_index
+
+        id_cancel = "cancel"
+        buttons: List[UIButton] = [
+            UIButton(id_cancel, "Cancel", enabled=True),
+        ]
+        app_service.client_message_manager.show_modal_dialogue_box(
+            "Task Done",
+            "Waiting for the other participant to finish...",
+            buttons,
+            destination_mask=Mask.from_index(user_index),
+        )
+        client_state = app_service.remote_client_state
+
+        # If cancel button is clicked.
+        if client_state.ui_button_pressed(user_index, id_cancel):
+            self.cancel()
+
+
+class ErrorReportFormState(BaseFormState):
+    """
+    Episode success form.
+    User can cancel at any time.
+    """
+
+    def __init__(self, data: FormData):
+        self._data = data
+
+    def step(self):
+        app_service = self._data.app_service
+        user_index = self._data.user_index
+
+        id_cancel = "cancel"
+        id_report_error = "report_error"
+        id_textbox = "report_text"
+        buttons: List[UIButton] = [
+            UIButton(id_cancel, "Cancel", enabled=True),
+            UIButton(id_report_error, "Report Error", enabled=True),
+        ]
+        textbox = UITextbox(
+            id_textbox, self._data.error_report_text, enabled=True
+        )
+        app_service.client_message_manager.show_modal_dialogue_box(
+            "Report Error",
+            "Write a short description of the problem.\nFor example: 'Could not find the phone'.",
+            buttons,
+            textbox=textbox,
+            destination_mask=Mask.from_index(user_index),
+        )
+        client_state = app_service.remote_client_state
+
+        # Read textbox content.
+        self._data.error_report_text = client_state.get_textbox_content(
+            user_index, id_textbox
+        )
+
+        # If cancel button is clicked.
+        if client_state.ui_button_pressed(user_index, id_cancel):
+            self.cancel()
+
+        # If report error button is clicked.
+        if client_state.ui_button_pressed(user_index, id_report_error):
+            self._data.on_error_reported.invoke(
+                ErrorReport(user_message=self._data.error_report_text)
+            )
+            self.change_state(ErrorReportedFormState(self._data))
+
+
+class ErrorReportedFormState(BaseFormState):
+    """
+    Episode success form.
+    User can cancel at any time.
+    """
+
+    def __init__(self, data: FormData):
+        self._data = data
+
+    def step(self):
+        app_service = self._data.app_service
+        user_index = self._data.user_index
+
+        id_cancel = "cancel"
+        buttons: List[UIButton] = [
+            UIButton(id_cancel, "Cancel", enabled=True),
+        ]
+        id_textbox = "report_text_confirmation"
+        textbox_report_confirmation = UITextbox(
+            id_textbox, self._data.error_report_text, enabled=False
+        )
+        app_service.client_message_manager.show_modal_dialogue_box(
+            "Error Reported",
+            "Waiting for the other participant to finish...",
+            buttons,
+            textbox=textbox_report_confirmation,
+            destination_mask=Mask.from_index(user_index),
+        )
+
+        # If cancel button is clicked.
+        client_state = app_service.remote_client_state
+        if client_state.ui_button_pressed(user_index, id_cancel):
+            self.cancel()
+
+
+class EndEpisodeForm:
+    """
+    Modal dialog box containing a form to signal episode completion.
+    User can either signal the episode as completed, report an error or cancel to return to previous activity.
+    The form is a state machine, each page being a state.
+    """
+
+    def __init__(
+        self,
+        user_index: int,
+        app_service: AppService,
+    ):
+        self._data = FormData(
+            app_service=app_service,
+            user_index=user_index,
+            current_state=None,
+            error_report_text="",
+            on_cancel=Event(),
+            on_episode_success=Event(),
+            on_error_reported=Event(),
+        )
+
+    def show(self):
+        self._data.current_state = EndEpisodeFormState(self._data)
+
+    def hide(self):
+        self._data.current_state = None
+
+    def is_form_shown(self) -> bool:
+        return self._data.current_state != None
+
+    def step(self):
+        if self._data.current_state is None:
+            return
+        self._data.current_state.step()
+
+    @property
+    def on_cancel(self) -> Event:
+        return self._data.on_cancel
+
+    @property
+    def on_episode_success(self) -> Event:
+        return self._data.on_episode_success
+
+    @property
+    def on_error_reported(self) -> Event:
+        return self._data.on_error_reported
diff --git a/examples/hitl/rearrange_v2/rearrange_v2.py b/examples/hitl/rearrange_v2/rearrange_v2.py
index ba13986849..28d7fcd92b 100644
--- a/examples/hitl/rearrange_v2/rearrange_v2.py
+++ b/examples/hitl/rearrange_v2/rearrange_v2.py
@@ -7,6 +7,7 @@
 
 from __future__ import annotations
 
+from enum import Enum
 from typing import Any, Dict, List, Optional
 
 import magnum as mn
@@ -17,6 +18,7 @@
     create_app_state_cancel_session,
     create_app_state_load_episode,
 )
+from end_episode_form import EndEpisodeForm, ErrorReport
 from session import Session
 from ui import UI
 from util import UP
@@ -41,6 +43,12 @@
 PIP_VIEWPORT_ID = 0  # ID of the picture-in-picture viewport that shows other agent's perspective.
 
 
+class EpisodeCompletionStatus(Enum):
+    PENDING = (0,)
+    SUCCESS = (1,)
+    FAILURE = (2,)
+
+
 class FrameRecorder:
     def __init__(
         self, app_service: AppService, app_data: AppData, world: World
@@ -104,8 +112,10 @@ def record_state(
         for user_index in range(len(user_data)):
             u = user_data[user_index]
             user_data_dict = {
-                "task_completed": u.episode_finished,
-                "task_succeeded": u.episode_success,
+                "task_completed": u.episode_completion_status
+                != EpisodeCompletionStatus.PENDING,
+                "task_succeeded": u.episode_completion_status
+                == EpisodeCompletionStatus.SUCCESS,
                 "camera_transform": u.cam_transform,
                 "held_object": u.ui._held_object_id,
                 "hovered_object": u.ui._hover_selection.object_id,
@@ -151,8 +161,6 @@ def __init__(
             if app_service.remote_client_state is not None
             else self.app_service.gui_input
         )
-        self.episode_finished = False
-        self.episode_success = False
 
         self.camera_helper = CameraHelper(
             app_service.hitl_config,
@@ -170,12 +178,25 @@ def __init__(
             camera_helper=self.camera_helper,
         )
 
+        self.end_episode_form = EndEpisodeForm(user_index, app_service)
+        self.episode_completion_status = EpisodeCompletionStatus.PENDING
+
         # Register UI callbacks
         self.ui.on_pick.registerCallback(self._on_pick)
         self.ui.on_place.registerCallback(self._on_place)
         self.ui.on_open.registerCallback(self._on_open)
         self.ui.on_close.registerCallback(self._on_close)
 
+        self.end_episode_form.on_cancel.registerCallback(
+            self._on_episode_form_cancelled
+        )
+        self.end_episode_form.on_episode_success.registerCallback(
+            self._on_episode_success
+        )
+        self.end_episode_form.on_error_reported.registerCallback(
+            self._on_error_reported
+        )
+
         # HACK: Work around GuiController input.
         # TODO: Communicate to the controller via action hints.
         gui_agent_controller._gui_input = self.gui_input
@@ -205,12 +226,15 @@ def reset(self):
             )
 
     def update(self, dt: float):
+        if self.end_episode_form.is_form_shown():
+            self.end_episode_form.step()
+            return
+
         if self.gui_input.get_key_down(GuiInput.KeyNS.H):
             self.show_gui_text = not self.show_gui_text
 
         if self.gui_input.get_key_down(GuiInput.KeyNS.ZERO):
-            self.episode_finished = True
-            self.episode_success = True
+            self.end_episode_form.show()
 
         if self.client_helper:
             self.client_helper.update(
@@ -324,6 +348,35 @@ def _on_close(self, e: UI.CloseEventData):
             }
         )
 
+    def _on_episode_form_cancelled(self, _e: Any = None):
+        self.ui_events.append(
+            {
+                "type": "end_episode_form_cancelled",
+            }
+        )
+        self.episode_completion_status = EpisodeCompletionStatus.PENDING
+
+    def _on_episode_success(self, _e: Any = None):
+        self.ui_events.append(
+            {
+                "type": "episode_success",
+            }
+        )
+        self.episode_completion_status = EpisodeCompletionStatus.SUCCESS
+        print(f"User {self.user_index} has signaled the episode as completed.")
+
+    def _on_error_reported(self, error_report: ErrorReport):
+        self.ui_events.append(
+            {
+                "type": "episode_failure",
+                "error_report": error_report.user_message,
+            }
+        )
+        self.episode_completion_status = EpisodeCompletionStatus.FAILURE
+        print(
+            f"User {self.user_index} has signaled a problem with the episode: '{error_report.user_message}'."
+        )
+
 
 class AppStateRearrangeV2(AppStateBase):
     """
@@ -406,28 +459,14 @@ def on_enter(self):
     def on_exit(self):
         super().on_exit()
 
-        episode_success = all(
-            self._user_data[user_index].episode_success
-            for user_index in range(self._app_data.max_user_count)
-        )
-
+        episode_success = self._is_episode_successful()
         self._session.session_recorder.end_episode(episode_success)
 
-    def _is_episode_finished(self) -> bool:
-        """
-        Determines whether all users have finished their tasks.
-        """
-        return all(
-            self._user_data[user_index].episode_finished
-            for user_index in self._users.indices(Mask.ALL)
-        )
-
     def on_environment_reset(self, episode_recorder_dict):
         self._world.reset()
 
         # Reset AFK timers.
         # TODO: Move to idle_kick_timer class. Make it per-user. Couple it with "user_data" class
-        # TODO
         self._app_service.remote_client_state._client_helper.activate_users()
 
         # Set the task instruction
@@ -495,12 +534,13 @@ def _get_status_text(self, user_index: int):
 
         if (
             self._users.max_user_count > 1
-            and not self._user_data[user_index].episode_finished
+            and self._user_data[user_index].episode_completion_status
+            == EpisodeCompletionStatus.PENDING
         ):
             if self._has_any_user_finished_success():
-                status_str += "\n\nThe other participant has signaled that the task is completed.\nPress '0' when you are done."
+                status_str += "\n\nThe other participant signaled that the task is completed.\nPress '0' when you are done."
             elif self._has_any_user_finished_failure():
-                status_str += "\n\nThe other participant has signaled a problem with the task.\nPress '0' to continue."
+                status_str += "\n\nThe other participant signaled a problem with the task.\nPress '0' to continue."
 
         client_helper = self._app_service.remote_client_state._client_helper
         if client_helper.do_show_idle_kick_warning(user_index):
@@ -544,8 +584,11 @@ def sim_update(self, dt: float, post_sim_update_dict):
             # Skip the form when changing the episode from the server.
             if self._server_gui_input.get_key_down(GuiInput.KeyNS.ZERO):
                 server_user = self._user_data[self._server_user_index]
-                server_user.episode_finished = True
-                server_user.episode_success = True
+                if (
+                    server_user.episode_completion_status
+                    == EpisodeCompletionStatus.PENDING
+                ):
+                    server_user._on_episode_success()
 
             # Switch the server-controlled user.
             if self._num_agents > 0 and self._server_gui_input.get_key_down(
@@ -597,21 +640,51 @@ def sim_update(self, dt: float, post_sim_update_dict):
             self._session.session_recorder.record_frame(frame_data)
 
     def _is_any_user_active(self) -> bool:
+        """
+        Returns true if any user is active during the frame.
+        """
         return any(
             self._user_data[user_index].gui_input.get_any_input()
+            or len(self._user_data[user_index].ui_events) > 0
             for user_index in range(self._app_data.max_user_count)
         )
 
     def _has_any_user_finished_success(self) -> bool:
+        """
+        Returns true if any user completed the episode successfully.
+        """
         return any(
-            self._user_data[user_index].episode_finished
-            and self._user_data[user_index].episode_success
+            self._user_data[user_index].episode_completion_status
+            == EpisodeCompletionStatus.SUCCESS
             for user_index in range(self._app_data.max_user_count)
         )
 
     def _has_any_user_finished_failure(self) -> bool:
+        """
+        Returns true if any user completed the episode unsuccessfully.
+        """
         return any(
-            self._user_data[user_index].episode_finished
-            and not self._user_data[user_index].episode_success
+            self._user_data[user_index].episode_completion_status
+            == EpisodeCompletionStatus.FAILURE
+            for user_index in range(self._app_data.max_user_count)
+        )
+
+    def _is_episode_finished(self) -> bool:
+        """
+        Returns true if all users finished the episode, regardless of success.
+        """
+        return all(
+            self._user_data[user_index].episode_completion_status
+            != EpisodeCompletionStatus.PENDING
+            for user_index in range(self._app_data.max_user_count)
+        )
+
+    def _is_episode_successful(self) -> bool:
+        """
+        Returns true if all users finished the episode successfully.
+        """
+        return all(
+            self._user_data[user_index].episode_completion_status
+            == EpisodeCompletionStatus.SUCCESS
             for user_index in range(self._app_data.max_user_count)
         )

From 1031c8df4d22e3b54a408643a5696a6bfe822b4b Mon Sep 17 00:00:00 2001
From: Alexander Clegg <alexanderwclegg@gmail.com>
Date: Tue, 21 May 2024 14:19:24 -0700
Subject: [PATCH 31/88] Embodied Unoccluded Navmesh Snap util (#1949)

*  add new method embodied_unoccluded_navmesh_snap and test

* update APIs with ignore_object_ids and fix some doc typos

* refactor to unify MobileManipulatorParams and SpotParams, add navmesh_offsets to params

* remove navmesh_offset from config in favor of params and refactor the BaseVelNonCylinderAction
---
 .../articulated_agents/mobile_manipulator.py  |  43 ++-
 .../articulated_agents/robots/spot_robot.py   |  86 +-----
 .../hssd_spot_human_social_nav.yaml           |   2 -
 .../config/default_structured_configs.py      |   2 -
 .../actions/spot_base_arm_empty.yaml          |   2 -
 .../datasets/rearrange/navmesh_utils.py       | 248 ++++++++++++++++--
 .../tasks/rearrange/actions/actions.py        |  14 +-
 habitat-lab/habitat/tasks/rearrange/utils.py  |  41 ++-
 test/test_navmesh_utils.py                    | 114 ++++++++
 9 files changed, 430 insertions(+), 122 deletions(-)
 create mode 100644 test/test_navmesh_utils.py

diff --git a/habitat-lab/habitat/articulated_agents/mobile_manipulator.py b/habitat-lab/habitat/articulated_agents/mobile_manipulator.py
index d5c9f1d1ab..acdfa7c947 100644
--- a/habitat-lab/habitat/articulated_agents/mobile_manipulator.py
+++ b/habitat-lab/habitat/articulated_agents/mobile_manipulator.py
@@ -39,9 +39,6 @@ class MobileManipulatorParams:
 
     :property arm_joints: The joint ids of the arm joints.
     :property gripper_joints: The habitat sim joint ids of any grippers.
-    :property wheel_joints: The joint ids of the wheels. If the wheels are not controlled, then this should be None
-    :property arm_init_params: The starting joint angles of the arm. If None, resets to 0.
-    :property gripper_init_params: The starting joint positions of the gripper. If None, resets to 0.
     :property ee_offset: The 3D offset from the end-effector link to the true end-effector position.
     :property ee_links: A list with the Habitat Sim link ID of the end-effector.
     :property ee_constraint: A (ee_count, 2, N) shaped array specifying the upper and lower limits for each end-effector joint where N is the arm DOF.
@@ -52,19 +49,29 @@ class MobileManipulatorParams:
     :property arm_mtr_pos_gain: The position gain of the arm motor.
     :property arm_mtr_vel_gain: The velocity gain of the arm motor.
     :property arm_mtr_max_impulse: The maximum impulse of the arm motor.
+    :property base_offset: The offset of the root transform from the center ground point for navmesh kinematic control.
+    :property base_link_names: The names of all links which should be treated as the frozen base of the robot/agent.
+    :property arm_init_params: The starting joint angles of the arm. If None, resets to 0.
+    :property gripper_init_params: The starting joint positions of the gripper. If None, resets to 0.
+    :property wheel_joints: The joint ids of the wheels. If the wheels are not controlled, then this should be None
     :property wheel_mtr_pos_gain: The position gain of the wheeled motor (if there are wheels).
     :property wheel_mtr_vel_gain: The velocity gain of the wheel motor (if there are wheels).
     :property wheel_mtr_max_impulse: The maximum impulse of the wheel motor (if there are wheels).
-    :property base_offset: The offset of the root transform from the center ground point for navmesh kinematic control.
+    :property leg_joints: The joint ids of the legs if applicable. If the legs are not controlled, then this should be None
+    :property leg_init_params: The starting joint positions of the leg joints. If None,
+        resets to 0.
+    :property leg_mtr_pos_gain: The position gain of the leg motor (if
+        there are legs).
+    :property leg_mtr_vel_gain: The velocity gain of the leg motor (if
+        there are legs).
+    :property leg_mtr_max_impulse: The maximum impulse of the leg motor (if
+        there are legs).
     :property ee_count: how many end effectors
+    :property navmesh_offsets: Optional list of 2D offsets from the robot's base_pos (x-forward) defining the centers of a set of cylinders forming a navmesh approximation of the robot for fast collision checking with PathFinder API
     """
 
     arm_joints: List[int]
     gripper_joints: List[int]
-    wheel_joints: Optional[List[int]]
-
-    arm_init_params: Optional[np.ndarray]
-    gripper_init_params: Optional[np.ndarray]
 
     ee_offset: List[mn.Vector3]
     ee_links: List[int]
@@ -80,15 +87,27 @@ class MobileManipulatorParams:
     arm_mtr_vel_gain: float
     arm_mtr_max_impulse: float
 
-    wheel_mtr_pos_gain: Optional[float]
-    wheel_mtr_vel_gain: Optional[float]
-    wheel_mtr_max_impulse: Optional[float]
-
     base_offset: mn.Vector3
     base_link_names: Set[str]
 
+    arm_init_params: Optional[np.ndarray] = None
+    gripper_init_params: Optional[np.ndarray] = None
+
+    wheel_joints: Optional[List[int]] = None
+    wheel_mtr_pos_gain: Optional[float] = None
+    wheel_mtr_vel_gain: Optional[float] = None
+    wheel_mtr_max_impulse: Optional[float] = None
+
+    leg_joints: Optional[List[int]] = None
+    leg_init_params: Optional[List[float]] = None
+    leg_mtr_pos_gain: Optional[float] = None
+    leg_mtr_vel_gain: Optional[float] = None
+    leg_mtr_max_impulse: Optional[float] = None
+
     ee_count: Optional[int] = 1
 
+    navmesh_offsets: Optional[List[mn.Vector2]] = None
+
 
 class MobileManipulator(Manipulator, ArticulatedAgentBase):
     """Robot with a controllable base and arm."""
diff --git a/habitat-lab/habitat/articulated_agents/robots/spot_robot.py b/habitat-lab/habitat/articulated_agents/robots/spot_robot.py
index 15301cdd8b..4a34a2c493 100644
--- a/habitat-lab/habitat/articulated_agents/robots/spot_robot.py
+++ b/habitat-lab/habitat/articulated_agents/robots/spot_robot.py
@@ -2,98 +2,25 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 
-from typing import Dict, List, Optional, Set
 
-import attr
 import magnum as mn
 import numpy as np
 
 from habitat.articulated_agents.mobile_manipulator import (
     ArticulatedAgentCameraParams,
     MobileManipulator,
+    MobileManipulatorParams,
 )
 
 
-@attr.s(auto_attribs=True, slots=True)
-class SpotParams:
-    """Data to configure a mobile manipulator.
-
-    :property arm_joints: The joint ids of the arm joints.
-    :property gripper_joints: The habitat sim joint ids of any grippers.
-    :property arm_init_params: The starting joint angles of the arm. If None,
-        resets to 0.
-    :property gripper_init_params: The starting joint positions of the gripper. If None,
-        resets to 0.
-    :property ee_offset: The 3D offset from the end-effector link to the true
-        end-effector position.
-    :property ee_link: The Habitat Sim link ID of the end-effector.
-    :property ee_constraint: A (2, 3) shaped array specifying the upper and
-        lower limits for the 3D end-effector position.
-    :property cameras: The cameras and where they should go. The key is the
-        prefix to match in the sensor names. For example, a key of `"head"`
-        will match sensors `"head_rgb"` and `"head_depth"`
-    :property gripper_closed_state: All gripper joints must achieve this
-        state for the gripper to be considered closed.
-    :property gripper_open_state: All gripper joints must achieve this
-        state for the gripper to be considered open.
-    :property gripper_state_eps: Error margin for detecting whether gripper is closed.
-    :property arm_mtr_pos_gain: The position gain of the arm motor.
-    :property arm_mtr_vel_gain: The velocity gain of the arm motor.
-    :property arm_mtr_max_impulse: The maximum impulse of the arm motor.
-    :property base_offset: The offset of the root transform from the center ground point for navmesh kinematic control.
-    :property base_link_names: The name of the links
-    :property leg_joints: The joint ids of the legs if applicable. If the legs are not controlled, then this should be None
-    :property leg_init_params: The starting joint positions of the leg joints. If None,
-        resets to 0.
-    :property leg_mtr_pos_gain: The position gain of the leg motor (if
-        there are legs).
-    :property leg_mtr_vel_gain: The velocity gain of the leg motor (if
-        there are legs).
-    :property leg_mtr_max_impulse: The maximum impulse of the leg motor (if
-        there are legs).
-    :property ee_count: how many end effectors
-    """
-
-    arm_joints: List[int]
-    gripper_joints: List[int]
-
-    arm_init_params: Optional[List[float]]
-    gripper_init_params: Optional[List[float]]
-
-    ee_offset: List[mn.Vector3]
-    ee_links: List[int]
-    ee_constraint: np.ndarray
-
-    cameras: Dict[str, ArticulatedAgentCameraParams]
-
-    gripper_closed_state: List[float]
-    gripper_open_state: List[float]
-    gripper_state_eps: float
-
-    arm_mtr_pos_gain: float
-    arm_mtr_vel_gain: float
-    arm_mtr_max_impulse: float
-
-    base_offset: mn.Vector3
-    base_link_names: Set[str]
-
-    leg_joints: Optional[List[int]] = None
-    leg_init_params: Optional[List[float]] = None
-    leg_mtr_pos_gain: Optional[float] = None
-    leg_mtr_vel_gain: Optional[float] = None
-    leg_mtr_max_impulse: Optional[float] = None
-
-    ee_count: Optional[int] = 1
-
-
 class SpotRobot(MobileManipulator):
     def _get_spot_params(self):
-        return SpotParams(
+        return MobileManipulatorParams(
             arm_joints=list(range(0, 7)),
             gripper_joints=[7],
             leg_joints=list(range(8, 20)),
-            arm_init_params=[0.0, -3.14, 0.0, 3.0, 0.0, 0.0, 0.0],
-            gripper_init_params=[-1.56],
+            arm_init_params=np.array([0.0, -3.14, 0.0, 3.0, 0.0, 0.0, 0.0]),
+            gripper_init_params=np.array([-1.56]),
             leg_init_params=[
                 0.0,
                 0.7,
@@ -172,8 +99,8 @@ def _get_spot_params(self):
                     relative_transform=mn.Matrix4.rotation_z(mn.Deg(-90)),
                 ),
             },
-            gripper_closed_state=[0.0],
-            gripper_open_state=[-1.56],
+            gripper_closed_state=np.array([0.0], dtype=np.float32),
+            gripper_open_state=np.array([-1.56], dtype=np.float32),
             gripper_state_eps=0.01,
             arm_mtr_pos_gain=0.3,
             arm_mtr_vel_gain=0.3,
@@ -185,6 +112,7 @@ def _get_spot_params(self):
             base_link_names={
                 "base",
             },
+            navmesh_offsets=[[0.0, 0.0], [0.25, 0.0], [-0.25, 0.0]],
         )
 
     @property
diff --git a/habitat-lab/habitat/config/benchmark/multi_agent/hssd_spot_human_social_nav.yaml b/habitat-lab/habitat/config/benchmark/multi_agent/hssd_spot_human_social_nav.yaml
index 24bd2848ab..f767262915 100644
--- a/habitat-lab/habitat/config/benchmark/multi_agent/hssd_spot_human_social_nav.yaml
+++ b/habitat-lab/habitat/config/benchmark/multi_agent/hssd_spot_human_social_nav.yaml
@@ -110,8 +110,6 @@ habitat:
         # There is a collision if the difference between the clamped NavMesh position and target position
         # is more than than collision_threshold for any point
         collision_threshold: 1e-5
-        # The x and y locations of the clamped NavMesh position
-        navmesh_offset: [[0.0, 0.0], [0.225, 0.0]]
         # If we allow the robot to move laterally
         enable_lateral_move: False
         # speed parameters
diff --git a/habitat-lab/habitat/config/default_structured_configs.py b/habitat-lab/habitat/config/default_structured_configs.py
index 1796cd1fe9..259a74c0e7 100644
--- a/habitat-lab/habitat/config/default_structured_configs.py
+++ b/habitat-lab/habitat/config/default_structured_configs.py
@@ -288,8 +288,6 @@ class BaseVelocityNonCylinderActionConfig(ActionConfig):
     # There is a collision if the difference between the clamped NavMesh position and target position
     # is more than collision_threshold for any point.
     collision_threshold: float = 1e-5
-    # The x and y locations of the clamped NavMesh position
-    navmesh_offset: Optional[List[float]] = None
     # If we allow the robot to move laterally.
     enable_lateral_move: bool = False
     # If the condition of sliding includes the checking of rotation
diff --git a/habitat-lab/habitat/config/habitat/task/rearrange/actions/spot_base_arm_empty.yaml b/habitat-lab/habitat/config/habitat/task/rearrange/actions/spot_base_arm_empty.yaml
index b75f5afb40..e9f378155d 100644
--- a/habitat-lab/habitat/config/habitat/task/rearrange/actions/spot_base_arm_empty.yaml
+++ b/habitat-lab/habitat/config/habitat/task/rearrange/actions/spot_base_arm_empty.yaml
@@ -22,7 +22,5 @@ base_velocity_non_cylinder:
   # There is a collision if the difference between the clamped NavMesh position and target position
   # is more than than collision_threshold for any point
   collision_threshold: 1e-5
-  # The x and y locations of the clamped NavMesh position
-  navmesh_offset: [[0.0, 0.0], [0.25, 0.0], [-0.25, 0.0]]
   # If we allow the robot to move laterally
   enable_lateral_move: False
diff --git a/habitat-lab/habitat/datasets/rearrange/navmesh_utils.py b/habitat-lab/habitat/datasets/rearrange/navmesh_utils.py
index 7c8d5bb5db..87787321ad 100644
--- a/habitat-lab/habitat/datasets/rearrange/navmesh_utils.py
+++ b/habitat-lab/habitat/datasets/rearrange/navmesh_utils.py
@@ -1,11 +1,17 @@
-from typing import Any, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple
 
 import magnum as mn
 import numpy as np
 
 import habitat_sim
+from habitat.articulated_agents.mobile_manipulator import MobileManipulator
 from habitat.core.logging import logger
 from habitat.sims.habitat_simulator.debug_visualizer import DebugVisualizer
+from habitat.tasks.rearrange.utils import (
+    general_sim_collision,
+    get_angle_to_pos,
+    rearrange_collision,
+)
 from habitat.tasks.utils import get_angle
 from habitat_sim.physics import VelocityControl
 
@@ -22,13 +28,13 @@ def snap_point_is_occluded(
     """
     Uses raycasting to check whether a target is occluded given a navmesh snap point.
 
-    :property target: The 3D position which should be unoccluded from the snap point.
-    :property snap_point: The navmesh snap point under consideration.
-    :property height: The height of the agent above the navmesh. Assumes the navmesh snap point is on the ground. Should be the maximum relative distance from navmesh ground to which a visibility check should indicate non-occlusion. The first check starts from this height. (E.g. agent_eyes_y - agent_base_y)
-    :property sim: The Simulator instance.
-    :property granularity: The distance between raycast samples. Finer granularity is more accurate, but more expensive.
-    :property target_object_ids: An optional set of object ids which indicate the target. If one of these objects is hit before any non-ignored object, the test is successful.
-    :property ignore_object_ids: An optional set of object ids which should be ignored in occlusion check.
+    :param target: The 3D position which should be unoccluded from the snap point.
+    :param snap_point: The navmesh snap point under consideration.
+    :param height: The height of the agent above the navmesh. Assumes the navmesh snap point is on the ground. Should be the maximum relative distance from navmesh ground to which a visibility check should indicate non-occlusion. The first check starts from this height. (E.g. agent_eyes_y - agent_base_y)
+    :param sim: The Simulator instance.
+    :param granularity: The distance between raycast samples. Finer granularity is more accurate, but more expensive.
+    :param target_object_ids: An optional set of object ids which indicate the target. If one of these objects is hit before any non-ignored object, the test is successful.
+    :param ignore_object_ids: An optional set of object ids which should be ignored in occlusion check.
 
     NOTE: If agent's eye height is known and only that height should be considered, provide eye height and granularity > height for fastest check.
 
@@ -91,17 +97,17 @@ def unoccluded_navmesh_snap(
     """
     Snap a point to the navmesh considering point visibility via raycasting.
 
-    :property pos: The 3D position to snap.
-    :property height: The height of the agent above the navmesh. Assumes the navmesh snap point is on the ground. Should be the maximum relative distance from navmesh ground to which a visibility check should indicate non-occlusion. The first check starts from this height. (E.g. agent_eyes_y - agent_base_y)
-    :property pathfinder: The PathFinder defining the NavMesh to use.
-    :property sim: The Simulator instance.
-    :property target_object_ids: An optional set of object ids which indicate the target. If one of these objects is hit before any non-ignored object, the test is successful. For example, when pos is an object's COM, that object should not occlude the point.
-    :property ignore_object_ids: An optional set of object ids which should be ignored in occlusion check. These objects should not stop the check. For example, the body and links of a robot.
-    :property island_id: Optionally restrict the search to a single navmesh island. Default -1 is the full navmesh.
-    :property search_offset: The additional radius to search for navmesh points around the target position. Added to the minimum distance from pos to navmesh.
-    :property test_batch_size: The number of sample navmesh points to consider when testing for occlusion.
-    :property max_samples: The maximum number of attempts to sample navmesh points for the test batch.
-    :property min_sample_dist: The minimum allowed L2 distance between samples in the test batch.
+    :param pos: The 3D position to snap.
+    :param height: The height of the agent above the navmesh. Assumes the navmesh snap point is on the ground. Should be the maximum relative distance from navmesh ground to which a visibility check should indicate non-occlusion. The first check starts from this height. (E.g. agent_eyes_y - agent_base_y)
+    :param pathfinder: The PathFinder defining the NavMesh to use.
+    :param sim: The Simulator instance.
+    :param target_object_ids: An optional set of object ids which indicate the target. If one of these objects is hit before any non-ignored object, the test is successful. For example, when pos is an object's COM, that object should not occlude the point.
+    :param ignore_object_ids: An optional set of object ids which should be ignored in occlusion check. These objects should not stop the check. For example, the body and links of a robot.
+    :param island_id: Optionally restrict the search to a single navmesh island. Default -1 is the full navmesh.
+    :param search_offset: The additional radius to search for navmesh points around the target position. Added to the minimum distance from pos to navmesh.
+    :param test_batch_size: The number of sample navmesh points to consider when testing for occlusion.
+    :param max_samples: The maximum number of attempts to sample navmesh points for the test batch.
+    :param min_sample_dist: The minimum allowed L2 distance between samples in the test batch.
 
     NOTE: this function is based on sampling and does not guarantee the closest point.
 
@@ -171,6 +177,208 @@ def unoccluded_navmesh_snap(
     return snap_point
 
 
+def embodied_unoccluded_navmesh_snap(
+    target_position: mn.Vector3,
+    height: float,
+    sim: habitat_sim.Simulator,
+    pathfinder: habitat_sim.nav.PathFinder = None,
+    target_object_ids: Optional[List[int]] = None,
+    ignore_object_ids: Optional[List[int]] = None,
+    island_id: int = -1,
+    search_offset: float = 1.5,
+    test_batch_size: int = 20,
+    max_samples: int = 200,
+    min_sample_dist: float = 0.5,
+    embodiment_heuristic_offsets: Optional[List[mn.Vector2]] = None,
+    agent_embodiment: Optional[MobileManipulator] = None,
+    orientation_noise: float = 0,
+    max_orientation_samples: int = 5,
+    data_out: Dict[Any, Any] = None,
+) -> Tuple[mn.Vector3, float, bool]:
+    """
+    Snap a robot embodiment close to a target point considering embodied constraints via the navmesh and raycasting for point visibility.
+
+    :param target_position: The 3D target position to snap.
+    :param height: The height of the agent above the navmesh. Assumes the navmesh snap point is on the ground. Should be the maximum relative distance from navmesh ground to which a visibility check should indicate non-occlusion. The first check starts from this height. (E.g. agent_eyes_y - agent_base_y)
+    :param sim: The RearrangeSimulator or Simulator instance. This choice will dictate the collision detection routine.
+    :param pathfinder: The PathFinder defining the NavMesh to use.
+    :param target_object_ids: An optional set of object ids which indicate the target. If one of these objects is hit before any non-ignored object, the test is successful. For example, when pos is an object's COM, that object should not occlude the point.
+    :param ignore_object_ids: An optional set of object ids which should be ignored in occlusion check. These objects should not stop the check. For example, the body and links of a robot.
+    :param island_id: Optionally restrict the search to a single navmesh island. Default -1 is the full navmesh.
+    :param search_offset: The additional radius to search for navmesh points around the target position. Added to the minimum distance from pos to navmesh.
+    :param test_batch_size: The number of sample navmesh points to consider when testing for occlusion.
+    :param max_samples: The maximum number of attempts to sample navmesh points for the test batch.
+    :param min_sample_dist: The minimum allowed L2 distance between samples in the test batch.
+    :param embodiment_heuristic_offsets: A set of 2D offsets describing navmesh cylinder center points forming a proxy for agent embodiment. Assumes x-forward, y to the side and 3D height fixed to navmesh. If provided, this proxy embodiment will be used for collision checking. If provided with an agent_embodiment, will be used instead of the MobileManipulatorParams.navmesh_offsets
+    :param agent_embodiment: The MobileManipulator to be used for collision checking if provided.
+    :param orientation_noise: Standard deviation of the gaussian used to sample orientation noise. If 0, states always face the target point. Noise is applied delta to this "target facing" orientation.
+    :param max_orientation_samples: The number of orientation noise samples to try for each candidate point.
+    :param data_out: Optionally provide a dictionary which can be filled with arbitrary detail data for external debugging and visualization.
+
+    NOTE: this function is based on sampling and does not guarantee the closest point.
+
+    :return: A Tuple containing: 1) An approximation of the closest unoccluded snap point to pos or None if an unoccluded point could not be found, 2) the sampled orientation if found or None, 3) a boolean success flag.
+    """
+
+    assert height > 0
+    assert search_offset > 0
+    assert test_batch_size > 0
+    assert max_samples > 0
+    assert orientation_noise >= 0
+
+    if pathfinder is None:
+        pathfinder = sim.pathfinder
+
+    assert pathfinder.is_loaded
+
+    # when an agent_embodiment is provided, use its navmesh_offsets unless overridden by input
+    if embodiment_heuristic_offsets is None and agent_embodiment is not None:
+        embodiment_heuristic_offsets = agent_embodiment.params.navmesh_offsets
+
+    # first try the closest snap point
+    snap_point = pathfinder.snap_point(target_position, island_id)
+
+    # distance to closest snap point is the absolute minimum
+    min_radius = (snap_point - target_position).length()
+    # expand the search radius
+    search_radius = min_radius + search_offset
+
+    # gather a test batch
+    test_batch: List[Tuple[mn.Vector3, float]] = []
+    sample_count = 0
+    while len(test_batch) < test_batch_size and sample_count < max_samples:
+        sample = pathfinder.get_random_navigable_point_near(
+            circle_center=target_position,
+            radius=search_radius,
+            island_index=island_id,
+        )
+        reject = False
+        for batch_sample in test_batch:
+            if np.linalg.norm(sample - batch_sample[0]) < min_sample_dist:
+                reject = True
+                break
+        if not reject:
+            test_batch.append(
+                (sample, float(np.linalg.norm(sample - target_position)))
+            )
+        sample_count += 1
+
+    # sort the test batch points by distance to the target
+    test_batch.sort(key=lambda s: s[1])
+
+    # find the closest unoccluded point in the test batch
+    for batch_sample in test_batch:
+        if not snap_point_is_occluded(
+            target_position,
+            batch_sample[0],
+            height,
+            sim,
+            target_object_ids=target_object_ids,
+            ignore_object_ids=ignore_object_ids,
+        ):
+            facing_target_angle = get_angle_to_pos(
+                np.array(target_position - batch_sample[0])
+            )
+
+            if (
+                embodiment_heuristic_offsets is None
+                and agent_embodiment is None
+            ):
+                # No embodiment for collision detection, so return closest unoccluded point
+                return batch_sample[0], facing_target_angle, True
+
+            # get orientation noise offset
+            orientation_noise_samples = []
+            if orientation_noise > 0 and max_orientation_samples > 0:
+                orientation_noise_samples = [
+                    np.random.normal(0.0, orientation_noise)
+                    for _ in range(max_orientation_samples)
+                ]
+            # last one is always no-noise to check forward-facing
+            orientation_noise_samples.append(0)
+
+            for orientation_noise_sample in orientation_noise_samples:
+                desired_angle = facing_target_angle + orientation_noise_sample
+                if embodiment_heuristic_offsets is not None:
+                    # local 2d point rotation
+                    rotation_2d = mn.Matrix3.rotation(-mn.Rad(desired_angle))
+                    transformed_offsets_2d = [
+                        rotation_2d.transform_vector(xz)
+                        for xz in embodiment_heuristic_offsets
+                    ]
+
+                    # translation to global 3D points at navmesh height
+                    offsets_3d = [
+                        np.array(
+                            [
+                                transformed_offset_2d[0],
+                                0,
+                                transformed_offset_2d[1],
+                            ]
+                        )
+                        + batch_sample[0]
+                        for transformed_offset_2d in transformed_offsets_2d
+                    ]
+
+                    if data_out is not None:
+                        data_out["offsets_3d"] = offsets_3d
+
+                    # check for offset navigability
+                    is_collision = False
+                    for offset_point in offsets_3d:
+                        if not (
+                            sim.pathfinder.is_navigable(offset_point)
+                            and (
+                                island_id == -1
+                                or sim.pathfinder.get_island(offset_point)
+                                == island_id
+                            )
+                        ):
+                            is_collision = True
+                            break
+
+                    # if this sample is invalid, try the next
+                    if is_collision:
+                        continue
+
+                if agent_embodiment is not None:
+                    # contact testing with collision shapes
+                    start_position = agent_embodiment.base_pos
+                    start_rotation = agent_embodiment.base_rot
+
+                    agent_embodiment.base_pos = batch_sample[0]
+                    agent_embodiment.base_rot = desired_angle
+
+                    details = None
+                    sim.perform_discrete_collision_detection()
+                    # Make sure the robot is not colliding with anything in this state.
+                    if sim.__class__.__name__ == "RearrangeSim":
+                        _, details = rearrange_collision(
+                            sim,
+                            False,
+                            ignore_base=False,
+                        )
+                    else:
+                        _, details = general_sim_collision(
+                            sim, agent_embodiment
+                        )
+
+                    # reset agent state
+                    agent_embodiment.base_pos = start_position
+                    agent_embodiment.base_rot = start_rotation
+
+                    # Only care about collisions between the robot and scene.
+                    is_feasible_state = details.robot_scene_colls == 0
+                    if not is_feasible_state:
+                        continue
+
+                # if we made it here, all tests passed and we found a valid placement state
+                return batch_sample[0], desired_angle, True
+
+    # unable to find a valid navmesh point within constraints
+    return None, None, False
+
+
 def is_collision(
     pathfinder: habitat_sim.nav.PathFinder,
     trans: mn.Matrix4,
@@ -549,7 +757,7 @@ def is_accessible(
 
     :param sim: Habitat Simulator instance.
     :param point: The query point.
-    :property height: The height of the agent. Given navmesh snap point is grounded, the maximum height from which a visibility check should indicate non-occlusion. First check starts from this height.
+    :param height: The height of the agent. Given navmesh snap point is grounded, the maximum height from which a visibility check should indicate non-occlusion. First check starts from this height.
     :param nav_to_min_distance: Minimum distance threshold. -1 opts out of the test and returns True (i.e. no minimum distance).
     :param nav_island: The NavMesh island on which to check accessibility. Default -1 is the full NavMesh.
     :param target_object_id: An optional set of object ids which should be ignored in occlusion check. For example, when checking accessibility of an object's COM, that object should not occlude.
diff --git a/habitat-lab/habitat/tasks/rearrange/actions/actions.py b/habitat-lab/habitat/tasks/rearrange/actions/actions.py
index 06ea299e63..ec28cc6ed3 100644
--- a/habitat-lab/habitat/tasks/rearrange/actions/actions.py
+++ b/habitat-lab/habitat/tasks/rearrange/actions/actions.py
@@ -11,6 +11,9 @@
 from gym import spaces
 
 import habitat_sim
+from habitat.articulated_agents.mobile_manipulator import (
+    MobileManipulatorParams,
+)
 from habitat.core.embodied_task import SimulatorTaskAction
 from habitat.core.registry import registry
 from habitat.sims.habitat_simulator.actions import HabitatSimActions
@@ -561,7 +564,15 @@ def __init__(self, *args, config, sim: RearrangeSim, **kwargs):
         self._longitudinal_lin_speed = self._config.longitudinal_lin_speed
         self._lateral_lin_speed = self._config.lateral_lin_speed
         self._ang_speed = self._config.ang_speed
-        self._navmesh_offset = self._config.navmesh_offset
+        assert isinstance(
+            self.cur_articulated_agent.params, MobileManipulatorParams
+        ), "ArticulatedAgent must be a MobileManipulator to use this action."
+        self._navmesh_offset = (
+            self.cur_articulated_agent.params.navmesh_offsets
+        )
+        assert (
+            self._navmesh_offset is not None
+        ), "MobileManipulatorParams must define a set of 2D navmesh_offset points to use this action."
         self._enable_lateral_move = self._config.enable_lateral_move
 
     @property
@@ -597,6 +608,7 @@ def collision_check(
         """
         # Get the offset positions
         num_check_cylinder = len(self._navmesh_offset)
+        # TODO: height 0 is not a good assumption in general. This must be changed to query current navmesh height.
         nav_pos_3d = [
             np.array([xz[0], 0.0, xz[1]]) for xz in self._navmesh_offset
         ]
diff --git a/habitat-lab/habitat/tasks/rearrange/utils.py b/habitat-lab/habitat/tasks/rearrange/utils.py
index 78949bbfcf..bcaee39537 100644
--- a/habitat-lab/habitat/tasks/rearrange/utils.py
+++ b/habitat-lab/habitat/tasks/rearrange/utils.py
@@ -10,7 +10,7 @@
 import pickle
 import time
 from functools import wraps
-from typing import List, Optional, Tuple
+from typing import TYPE_CHECKING, List, Optional, Tuple
 
 import attr
 import magnum as mn
@@ -25,6 +25,10 @@
 from habitat.tasks.utils import get_angle
 from habitat_sim.physics import MotionType
 
+if TYPE_CHECKING:
+    # avoids circular import while allowing type hints
+    from habitat.tasks.rearrange.rearrange_sim import RearrangeSim
+
 rearrange_logger = HabitatLogger(
     name="rearrange_task",
     level=int(os.environ.get("HABITAT_REARRANGE_LOG", logging.ERROR)),
@@ -89,16 +93,42 @@ def __add__(self, other):
         )
 
 
+def general_sim_collision(
+    sim: habitat_sim.Simulator, agent_embodiment: MobileManipulator
+) -> Tuple[bool, CollisionDetails]:
+    """
+    Proxy for "rearrange_collision()" which does not require a RearrangeSim.
+
+    Used for testing functions which require a collision testing routine.
+
+    :return: boolean flag denoting collisions and a details struct (not complete)
+    """
+    colls = sim.get_physics_contact_points()
+
+    agent_embodiment_object_id = agent_embodiment.sim_obj.object_id
+
+    robot_scene_colls = 0
+    for col in colls:
+        if coll_name_matches(col, agent_embodiment_object_id):
+            robot_scene_colls += 1
+
+    return (robot_scene_colls > 0), CollisionDetails(
+        robot_scene_colls=robot_scene_colls
+    )
+
+
 def rearrange_collision(
-    sim,
+    sim: "RearrangeSim",
     count_obj_colls: bool,
     verbose: bool = False,
     ignore_names: Optional[List[str]] = None,
     ignore_base: bool = True,
     get_extra_coll_data: bool = False,
     agent_idx: Optional[int] = None,
-):
-    """Defines what counts as a collision for the Rearrange environment execution"""
+) -> Tuple[bool, CollisionDetails]:
+    """
+    Defines what counts as a collision for the Rearrange environment execution.
+    """
     agent_model = sim.get_agent_data(agent_idx).articulated_agent
     grasp_mgr = sim.get_agent_data(agent_idx).grasp_mgr
     colls = sim.get_physics_contact_points()
@@ -664,7 +694,10 @@ def _get_robot_spawns(
 
 def get_angle_to_pos(rel_pos: np.ndarray) -> float:
     """
+    Get the 1D orientation angle (around Y axis) for an agent with X axis forward to face toward a relative 3D position.
+
     :param rel_pos: Relative 3D positive from the robot to the target like: `target_pos - robot_pos`.
+
     :returns: Angle in radians.
     """
 
diff --git a/test/test_navmesh_utils.py b/test/test_navmesh_utils.py
new file mode 100644
index 0000000000..676a10e2dc
--- /dev/null
+++ b/test/test_navmesh_utils.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Meta Platforms, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os.path as osp
+
+import pytest
+from omegaconf import DictConfig
+
+import habitat.datasets.rearrange.navmesh_utils as nav_utils
+import habitat.sims.habitat_simulator.sim_utilities as sutils
+from habitat.articulated_agents.robots.spot_robot import SpotRobot
+from habitat.tasks.rearrange.utils import general_sim_collision
+from habitat_sim import Simulator, built_with_bullet
+from habitat_sim.utils.settings import default_sim_settings, make_cfg
+
+
+@pytest.mark.skipif(
+    not built_with_bullet,
+    reason="Raycasting API requires Bullet physics.",
+)
+@pytest.mark.skipif(
+    not osp.exists("data/replica_cad/"),
+    reason="Requires ReplicaCAD dataset.",
+)
+@pytest.mark.skipif(
+    not osp.exists("data/robots/hab_spot_arm/"),
+    reason="Requires Spot robot embodiment.",
+)
+@pytest.mark.parametrize("scene_id", ["apt_0", "v3_sc0_staging_00"])
+def test_unoccluded_snapping_utils(scene_id):
+    sim_settings = default_sim_settings.copy()
+    sim_settings[
+        "scene_dataset_config_file"
+    ] = "data/replica_cad/replicaCAD.scene_dataset_config.json"
+    sim_settings["scene"] = scene_id
+    hab_cfg = make_cfg(sim_settings)
+    with Simulator(hab_cfg) as sim:
+        # explicitly load the navmesh
+        # NOTE: in apt_0, navmesh does not include furniture, robot should find valid placements anyway with collision checking
+        sim.pathfinder.load_nav_mesh(
+            f"data/replica_cad/navmeshes/{scene_id}.navmesh"
+        )
+
+        # setup for visual debugging
+        # sim.navmesh_visualization = True
+        # from habitat.sims.habitat_simulator.debug_visualizer import (
+        #    DebugVisualizer,
+        # )
+        # dbv = DebugVisualizer(sim)
+
+        # add the robot to the world via the wrapper
+        robot_path = "data/robots/hab_spot_arm/urdf/hab_spot_arm.urdf"
+        agent_config = DictConfig({"articulated_agent_urdf": robot_path})
+        spot = SpotRobot(agent_config, sim)
+        spot.reconfigure()
+        spot.update()
+
+        # get the table in the middle of the room
+        table_object = sutils.get_obj_from_handle(
+            sim, "frl_apartment_table_02_:0000"
+        )
+
+        agent_object_ids = [spot.sim_obj.object_id] + [
+            *spot.sim_obj.link_object_ids.keys()
+        ]
+
+        for _ in range(100):
+            sampled_orientations = []
+            for orientation_noise_level in [0, 0.1, 0.2, 0.5]:
+                # do an embodied snap
+                (
+                    snap_point,
+                    orientation,
+                    success,
+                ) = nav_utils.embodied_unoccluded_navmesh_snap(
+                    target_position=table_object.translation,
+                    height=1.3,
+                    sim=sim,
+                    target_object_ids=[table_object.object_id],
+                    ignore_object_ids=agent_object_ids,
+                    agent_embodiment=spot,
+                    orientation_noise=orientation_noise_level,
+                )
+
+                # dbv.peek(spot.sim_obj, peek_all_axis=True).show()
+                # breakpoint()
+
+                # should always succeed here
+                assert success
+                assert orientation not in sampled_orientations
+                sampled_orientations.append(orientation)
+
+                # place the robot at the sampled position
+                spot.base_pos = snap_point
+                spot.base_rot = orientation
+
+                # check that the robot is not in collision
+                sim.perform_discrete_collision_detection()
+                _, details = general_sim_collision(sim, spot)
+                assert details.robot_scene_colls == 0
+
+                table_occluded = nav_utils.snap_point_is_occluded(
+                    target=table_object.translation,
+                    snap_point=snap_point,
+                    height=1.3,
+                    sim=sim,
+                    target_object_ids=[table_object.object_id],
+                    ignore_object_ids=agent_object_ids,
+                )
+
+                assert not table_occluded

From d67599961495e92fa6c2f688f7dbffa1fbe2fa6b Mon Sep 17 00:00:00 2001
From: Alexander Clegg <alexanderwclegg@gmail.com>
Date: Tue, 21 May 2024 22:50:42 -0700
Subject: [PATCH 32/88] remove auto downloader from RearrangeDataset init
 (#1969)

* remove auto downloader from RearrangeDataset init

* skip HitL smoke tests when hssd-hab is not available
---
 habitat-hitl/test/test_example_apps.py        | 26 ++++++++++++++++---
 habitat-hitl/test/test_main.py                |  7 +++++
 .../datasets/rearrange/rearrange_dataset.py   | 15 ++---------
 3 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/habitat-hitl/test/test_example_apps.py b/habitat-hitl/test/test_example_apps.py
index 48024268a8..2cf5afea83 100644
--- a/habitat-hitl/test/test_example_apps.py
+++ b/habitat-hitl/test/test_example_apps.py
@@ -7,7 +7,7 @@
 import multiprocessing
 import runpy
 import sys
-from os import path
+from os import path as osp
 
 import pytest
 
@@ -15,8 +15,8 @@
 def run_main(*args):
     sys.argv = list(args)
     target = args[0]
-    if path.isfile(target):
-        sys.path.insert(0, path.dirname(target))
+    if osp.isfile(target):
+        sys.path.insert(0, osp.dirname(target))
     runpy.run_path(target, run_name="__main__")
 
 
@@ -28,6 +28,10 @@ def run_main_as_subprocess(args):
     assert process.exitcode == 0
 
 
+@pytest.mark.skipif(
+    not osp.exists("data/scene_datasets/hssd-hab"),
+    reason="Requires public Habitat-HSSD scene dataset. TODO: should be updated to a new dataset.",
+)
 @pytest.mark.parametrize(
     "args",
     [
@@ -43,6 +47,10 @@ def test_hitl_example_basic_viewer(args):
     run_main_as_subprocess(args)
 
 
+@pytest.mark.skipif(
+    not osp.exists("data/scene_datasets/hssd-hab"),
+    reason="Requires public Habitat-HSSD scene dataset. TODO: should be updated to a new dataset.",
+)
 @pytest.mark.parametrize(
     "args",
     [
@@ -58,6 +66,10 @@ def test_hitl_example_minimal(args):
     run_main_as_subprocess(args)
 
 
+@pytest.mark.skipif(
+    not osp.exists("data/scene_datasets/hssd-hab"),
+    reason="Requires public Habitat-HSSD scene dataset. TODO: should be updated to a new dataset.",
+)
 @pytest.mark.parametrize(
     "args",
     [
@@ -73,6 +85,10 @@ def test_hitl_example_pick_throw_vr(args):
     run_main_as_subprocess(args)
 
 
+@pytest.mark.skipif(
+    not osp.exists("data/scene_datasets/hssd-hab"),
+    reason="Requires public Habitat-HSSD scene dataset. TODO: should be updated to a new dataset.",
+)
 @pytest.mark.parametrize(
     "args",
     [
@@ -89,6 +105,10 @@ def test_hitl_example_rearrange(args):
 
 
 @pytest.mark.skip(reason="Cannot currently be tested.")
+@pytest.mark.skipif(
+    not osp.exists("data/scene_datasets/hssd-hab"),
+    reason="Requires public Habitat-HSSD scene dataset. TODO: should be updated to a new dataset.",
+)
 @pytest.mark.parametrize(
     "args",
     [
diff --git a/habitat-hitl/test/test_main.py b/habitat-hitl/test/test_main.py
index b7685d93a5..8d547c790e 100644
--- a/habitat-hitl/test/test_main.py
+++ b/habitat-hitl/test/test_main.py
@@ -4,7 +4,10 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 
+from os import path as osp
+
 import magnum
+import pytest
 from hydra import compose, initialize
 
 from habitat_hitl.app_states.app_state_abc import AppState
@@ -37,6 +40,10 @@ def main(config) -> None:
     hitl_main(config, lambda app_service: AppStateTest(app_service))
 
 
+@pytest.mark.skipif(
+    not osp.exists("data/scene_datasets/hssd-hab"),
+    reason="Requires public Habitat-HSSD scene dataset. TODO: should be updated to a new dataset.",
+)
 def test_hitl_main():
     register_hydra_plugins()
     with initialize(version_base=None, config_path="config"):
diff --git a/habitat-lab/habitat/datasets/rearrange/rearrange_dataset.py b/habitat-lab/habitat/datasets/rearrange/rearrange_dataset.py
index bddfc7580f..40f6052bfa 100644
--- a/habitat-lab/habitat/datasets/rearrange/rearrange_dataset.py
+++ b/habitat-lab/habitat/datasets/rearrange/rearrange_dataset.py
@@ -10,9 +10,7 @@
 import attr
 import numpy as np
 
-import habitat_sim.utils.datasets_download as data_downloader
 from habitat.core.dataset import Episode
-from habitat.core.logging import logger
 from habitat.core.registry import registry
 from habitat.core.utils import DatasetFloatJSONEncoder
 from habitat.datasets.pointnav.pointnav_dataset import PointNavDatasetV1
@@ -58,18 +56,9 @@ def __init__(self, config: Optional["DictConfig"] = None) -> None:
         self.config = config
 
         if config and not self.check_config_paths_exist(config):
-            logger.info(
-                "Rearrange task assets are not downloaded locally, downloading and extracting now..."
+            raise ValueError(
+                f"Requested RearrangeDataset config paths '{config.data_path.format(split=config.split)}' or '{config.scenes_dir}' are not downloaded locally. Aborting."
             )
-            data_downloader.main(
-                [
-                    "--uids",
-                    "rearrange_task_assets",
-                    "--no-replace",
-                    "--no-prune",
-                ]
-            )
-            logger.info("Downloaded and extracted the data.")
 
         check_and_gen_physics_config()
 

From 8d0c9cf610931d7fe9a4a2ff049410ffb2b2a06d Mon Sep 17 00:00:00 2001
From: Alexander Clegg <alexanderwclegg@gmail.com>
Date: Wed, 22 May 2024 13:49:14 -0700
Subject: [PATCH 33/88] add ignore_object_ids to snap_down API (#1971)

---
 .../sims/habitat_simulator/sim_utilities.py   | 45 ++++++++++++++-----
 test/test_sim_utils.py                        |  8 ++++
 2 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/habitat-lab/habitat/sims/habitat_simulator/sim_utilities.py b/habitat-lab/habitat/sims/habitat_simulator/sim_utilities.py
index 167b8c2906..ba0bfdcbee 100644
--- a/habitat-lab/habitat/sims/habitat_simulator/sim_utilities.py
+++ b/habitat-lab/habitat/sims/habitat_simulator/sim_utilities.py
@@ -327,6 +327,7 @@ def bb_ray_prescreen(
     sim: habitat_sim.Simulator,
     obj: habitat_sim.physics.ManagedRigidObject,
     support_obj_ids: Optional[List[int]] = None,
+    ignore_obj_ids: Optional[List[int]] = None,
     check_all_corners: bool = False,
 ) -> Dict[str, Any]:
     """
@@ -335,6 +336,7 @@ def bb_ray_prescreen(
     :param sim: The Simulator instance.
     :param obj: The RigidObject instance.
     :param support_obj_ids: A list of object ids designated as valid support surfaces for object placement. Contact with other objects is a criteria for placement rejection.
+    :param ignore_obj_ids: A list of object ids which should be ignored in contact checks and raycasts. For example, the body of the agent placing an object.
     :param check_all_corners: Optionally cast rays from all bounding box corners instead of only casting a ray from the center of mass.
 
     :return: a dict of raycast metadata: "base_rel_height","surface_snap_point", "raycast_results"
@@ -373,7 +375,11 @@ def bb_ray_prescreen(
             raycast_results.append(sim.cast_ray(ray))
             # classify any obstructions before hitting the support surface
             for hit in raycast_results[-1].hits:
-                if hit.object_id == obj.object_id:
+                if (
+                    hit.object_id == obj.object_id
+                    or ignore_obj_ids is not None
+                    and hit.object_id in ignore_obj_ids
+                ):
                     continue
                 elif hit.object_id in support_obj_ids:
                     hit_point = hit.point
@@ -427,6 +433,7 @@ def snap_down(
     sim: habitat_sim.Simulator,
     obj: habitat_sim.physics.ManagedRigidObject,
     support_obj_ids: Optional[List[int]] = None,
+    ignore_obj_ids: Optional[List[int]] = None,
     dbv: Optional[DebugVisualizer] = None,
     max_collision_depth: float = 0.01,
 ) -> bool:
@@ -436,6 +443,7 @@ def snap_down(
     :param sim: The Simulator instance.
     :param obj: The RigidObject instance.
     :param support_obj_ids: A list of object ids designated as valid support surfaces for object placement. Contact with other objects is a criteria for placement rejection. If none provided, default support surface is the stage/ground mesh (0).
+    :param ignore_obj_ids: A list of object ids which should be ignored in contact checks and raycasts. For example, the body of the agent placing an object.
     :param dbv: Optionally provide a DebugVisualizer (dbv) to render debug images of each object's computed snap position before collision culling.
     :param max_collision_depth: The maximum contact penetration depth between the object and the support surface. Higher values are easier to sample, but result in less dynamically stabile states.
 
@@ -454,8 +462,12 @@ def snap_down(
         # set default support surface to stage/ground mesh
         support_obj_ids = [habitat_sim.stage_id]
 
+    if ignore_obj_ids is None:
+        # default empty to avoid extra none checks in-loop later
+        ignore_obj_ids = []
+
     bb_ray_prescreen_results = bb_ray_prescreen(
-        sim, obj, support_obj_ids, check_all_corners=False
+        sim, obj, support_obj_ids, ignore_obj_ids, check_all_corners=False
     )
 
     if bb_ray_prescreen_results["surface_snap_point"] is None:
@@ -472,15 +484,28 @@ def snap_down(
         cps = sim.get_physics_contact_points()
         for cp in cps:
             if (
-                cp.object_id_a == obj.object_id
-                or cp.object_id_b == obj.object_id
-            ) and (
                 (
-                    cp.contact_distance < (-1 * max_collision_depth)
-                )  # contact depth is negative distance
-                or not (
-                    cp.object_id_a in support_obj_ids
-                    or cp.object_id_b in support_obj_ids
+                    # the object is involved in the contact
+                    cp.object_id_a == obj.object_id
+                    or cp.object_id_b == obj.object_id
+                )
+                and not (
+                    # the contact does not involve ignored objects
+                    cp.object_id_a in ignore_obj_ids
+                    or cp.object_id_b in ignore_obj_ids
+                )
+                and (
+                    (
+                        # contact exceeds maximum depth
+                        # NOTE: contact depth is negative distance
+                        cp.contact_distance
+                        < (-1 * max_collision_depth)
+                    )
+                    or not (
+                        # contact is not with a support object
+                        cp.object_id_a in support_obj_ids
+                        or cp.object_id_b in support_obj_ids
+                    )
                 )
             ):
                 obj.translation = cached_position
diff --git a/test/test_sim_utils.py b/test/test_sim_utils.py
index 9e2a7fd20c..70975cb078 100644
--- a/test/test_sim_utils.py
+++ b/test/test_sim_utils.py
@@ -150,6 +150,14 @@ def test_snap_down(support_margin, obj_margin, stage_support):
                 assert (
                     bb_ray_prescreen_results["surface_snap_point"] is not None
                 )
+
+                # reset the object and try again, ignoring the supports instead
+                cube_obj.translation = mn.Vector3(0, 0.2, 0)
+                snap_success = sutils.snap_down(
+                    sim, cube_obj, ignore_obj_ids=support_obj_ids
+                )
+                assert not snap_success
+
                 if stage_support:
                     # don't need 3 iterations for stage b/c no motion types to test
                     break

From 0c057619f8dc77d6bcd87034f97d992b3df75b67 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mika=C3=ABl=20Dallaire=20C=C3=B4t=C3=A9?=
 <110583667+0mdc@users.noreply.github.com>
Date: Wed, 22 May 2024 16:56:55 -0400
Subject: [PATCH 34/88] HITL - Decouple agents from users. (#1972)

* Add session management.

* Formatting changes.

* Add clarifications to episode resolution.

* Document temporary hack to check for client-side loading status.

* Add session recorder, ui events and data upload.

* Add end episode form with error reporting.

* Decouple agents from users.

* Update agent camera transform from sensor.

* Update comment.
---
 examples/hitl/rearrange_v2/rearrange_v2.py    | 243 +++++++++++++-----
 .../habitat_hitl/_internal/hitl_driver.py     |   5 +
 .../habitat_hitl/app_states/app_service.py    |  11 +-
 .../controllers/controller_helper.py          |   6 +
 4 files changed, 196 insertions(+), 69 deletions(-)

diff --git a/examples/hitl/rearrange_v2/rearrange_v2.py b/examples/hitl/rearrange_v2/rearrange_v2.py
index 28d7fcd92b..7c5697726a 100644
--- a/examples/hitl/rearrange_v2/rearrange_v2.py
+++ b/examples/hitl/rearrange_v2/rearrange_v2.py
@@ -30,9 +30,12 @@
 from habitat_hitl.app_states.app_service import AppService
 from habitat_hitl.core.gui_input import GuiInput
 from habitat_hitl.core.text_drawer import TextOnScreenAlignment
-from habitat_hitl.core.user_mask import Mask
+from habitat_hitl.core.user_mask import Mask, Users
 from habitat_hitl.environment.camera_helper import CameraHelper
-from habitat_hitl.environment.controllers.controller_abc import GuiController
+from habitat_hitl.environment.controllers.controller_abc import (
+    Controller,
+    GuiController,
+)
 from habitat_hitl.environment.controllers.gui_controller import (
     GuiHumanoidController,
     GuiRobotController,
@@ -112,11 +115,11 @@ def record_state(
         for user_index in range(len(user_data)):
             u = user_data[user_index]
             user_data_dict = {
-                "task_completed": u.episode_completion_status
+                "task_completed": u.agent_data.episode_completion_status
                 != EpisodeCompletionStatus.PENDING,
-                "task_succeeded": u.episode_completion_status
+                "task_succeeded": u.agent_data.episode_completion_status
                 == EpisodeCompletionStatus.SUCCESS,
-                "camera_transform": u.cam_transform,
+                "camera_transform": u.agent_data.cam_transform,
                 "held_object": u.ui._held_object_id,
                 "hovered_object": u.ui._hover_selection.object_id,
                 "events": u.pop_ui_events(),
@@ -126,6 +129,44 @@ def record_state(
         return data
 
 
+class AgentData:
+    """
+    Agent-specific states for the ongoing rearrangement session.
+    Agents can be controlled by either a user or an AI.
+    """
+
+    def __init__(
+        self,
+        app_service: AppService,
+        world: World,
+        agent_controller: Controller,
+        agent_index: int,
+        render_camera: Optional[Any],
+    ):
+        self.app_service = app_service
+        self.world = world
+        self.agent_controller = agent_controller
+        self.agent_index = agent_index
+
+        self.task_instruction = ""
+
+        self.render_camera = render_camera
+        self.cam_transform = mn.Matrix4.identity_init()
+
+        self.episode_completion_status = EpisodeCompletionStatus.PENDING
+
+    def update_camera_from_sensor(self) -> None:
+        """
+        Update the camera transform from the agent's sensor.
+        Agents controlled by users have their camera updated using CameraHelper.
+        For AI-controlled agents, the camera transform can be inferred from this function.
+        """
+        if self.render_camera is not None:
+            self.cam_transform = np.linalg.inv(
+                self.render_camera.camera_matrix
+            )
+
+
 class UserData:
     """
     User-specific states for the ongoing rearrangement session.
@@ -136,22 +177,26 @@ def __init__(
         app_service: AppService,
         user_index: int,
         world: World,
-        gui_agent_controller: GuiController,
+        agent_data: AgentData,
         server_sps_tracker: AverageRateTracker,
     ):
         self.app_service = app_service
-        self.world = world
         self.user_index = user_index
-        self.gui_agent_controller = gui_agent_controller
+        self.world = world
+        self.agent_data = agent_data
         self.server_sps_tracker = server_sps_tracker
         self.client_helper = (
             self.app_service.remote_client_state._client_helper
         )
-        self.cam_transform = mn.Matrix4.identity_init()
         self.show_gui_text = True
-        self.task_instruction = ""
         self.pip_initialized = False
 
+        gui_agent_controller = agent_data.agent_controller
+        assert isinstance(
+            gui_agent_controller, GuiController
+        ), "User agent controller must be a GuiController"
+        self.gui_agent_controller = gui_agent_controller
+
         # Events for data collection.
         self.ui_events: List[Dict[str, Any]] = []
 
@@ -171,7 +216,7 @@ def __init__(
             hitl_config=app_service.hitl_config,
             user_index=user_index,
             world=world,
-            gui_controller=gui_agent_controller,
+            gui_controller=self.gui_agent_controller,
             sim=app_service.sim,
             gui_input=self.gui_input,
             gui_drawer=app_service.gui_drawer,
@@ -179,7 +224,6 @@ def __init__(
         )
 
         self.end_episode_form = EndEpisodeForm(user_index, app_service)
-        self.episode_completion_status = EpisodeCompletionStatus.PENDING
 
         # Register UI callbacks
         self.ui.on_pick.registerCallback(self._on_pick)
@@ -244,18 +288,18 @@ def update(self, dt: float):
             )
 
         self.camera_helper.update(self._get_camera_lookat_pos(), dt)
-        self.cam_transform = self.camera_helper.get_cam_transform()
+        self.agent_data.cam_transform = self.camera_helper.get_cam_transform()
 
         if self.app_service.hitl_config.networking.enable:
             self.app_service._client_message_manager.update_camera_transform(
-                self.cam_transform,
+                self.agent_data.cam_transform,
                 destination_mask=Mask.from_index(self.user_index),
             )
 
         self.ui.update()
         self.ui.draw_ui()
 
-    def draw_pip_viewport(self, pip_user_data: UserData):
+    def draw_pip_viewport(self, pip_agent_data: AgentData):
         """
         Draw a picture-in-picture viewport showing another agent's perspective.
         """
@@ -268,7 +312,7 @@ def draw_pip_viewport(self, pip_user_data: UserData):
             self.pip_initialized = True
 
             # Assign pip agent objects to their own layer.
-            pip_agent_index = pip_user_data.gui_agent_controller._agent_idx
+            pip_agent_index = pip_agent_data.agent_index
             agent_object_ids = self.world.get_agent_object_ids(pip_agent_index)
             for agent_object_id in agent_object_ids:
                 self.app_service.client_message_manager.set_object_visibility_layer(
@@ -290,7 +334,7 @@ def draw_pip_viewport(self, pip_user_data: UserData):
         # Show picture-in-picture (PIP) viewport.
         self.app_service.client_message_manager.show_viewport(
             viewport_id=PIP_VIEWPORT_ID,
-            cam_transform=pip_user_data.cam_transform,
+            cam_transform=pip_agent_data.cam_transform,
             destination_mask=Mask.from_index(self.user_index),
         )
 
@@ -354,7 +398,9 @@ def _on_episode_form_cancelled(self, _e: Any = None):
                 "type": "end_episode_form_cancelled",
             }
         )
-        self.episode_completion_status = EpisodeCompletionStatus.PENDING
+        self.agent_data.episode_completion_status = (
+            EpisodeCompletionStatus.PENDING
+        )
 
     def _on_episode_success(self, _e: Any = None):
         self.ui_events.append(
@@ -362,7 +408,9 @@ def _on_episode_success(self, _e: Any = None):
                 "type": "episode_success",
             }
         )
-        self.episode_completion_status = EpisodeCompletionStatus.SUCCESS
+        self.agent_data.episode_completion_status = (
+            EpisodeCompletionStatus.SUCCESS
+        )
         print(f"User {self.user_index} has signaled the episode as completed.")
 
     def _on_error_reported(self, error_report: ErrorReport):
@@ -372,7 +420,9 @@ def _on_error_reported(self, error_report: ErrorReport):
                 "error_report": error_report.user_message,
             }
         )
-        self.episode_completion_status = EpisodeCompletionStatus.FAILURE
+        self.agent_data.episode_completion_status = (
+            EpisodeCompletionStatus.FAILURE
+        )
         print(
             f"User {self.user_index} has signaled a problem with the episode: '{error_report.user_message}'."
         )
@@ -387,32 +437,74 @@ def __init__(
         self, app_service: AppService, app_data: AppData, session: Session
     ):
         super().__init__(app_service, app_data)
+        sim = app_service.sim
+        agent_mgr = sim.agents_mgr
         self._save_keyframes = False  # Done on env step (rearrange_sim).
+
         self._app_service = app_service
         self._session = session
-        self._gui_agent_controllers = self._app_service.gui_agent_controllers
-        self._num_agents = len(self._gui_agent_controllers)
-        self._users = self._app_service.users
+        self._gui_agent_controllers = app_service.gui_agent_controllers
+
+        self._users = app_service.users
+        self._num_users = self._users.max_user_count
+        self._agents = Users(len(agent_mgr._all_agent_data))
+        self._num_agents = self._agents.max_user_count
 
         self._sps_tracker = AverageRateTracker(2.0)
         self._server_user_index = 0
-        self._server_gui_input = self._app_service.gui_input
+        self._server_gui_input = app_service.gui_input
         self._server_input_enabled = False
         self._elapsed_time = 0.0
 
-        self._user_data: List[UserData] = []
-
         self._world = World(app_service.sim)
 
+        self._agent_to_user_index: Dict[int, int] = {}
+        self._user_to_agent_index: Dict[int, int] = {}
+
+        self._agent_data: List[AgentData] = []
+        for agent_index in range(self._num_agents):
+            agent = agent_mgr._all_agent_data[agent_index]
+            camera_name: Optional[Any] = (
+                agent.articulated_agent._cameras[0]
+                if len(agent.articulated_agent._cameras) > 0
+                else None
+            )
+            render_camera: Optional[Any] = (
+                sim.agents[agent_index]._sensors[camera_name].render_camera
+                if camera_name is not None
+                else None
+            )
+            agent_controller = app_service.all_agent_controllers[agent_index]
+
+            # Match agent and user indices.
+            for user_index in range(len(self._gui_agent_controllers)):
+                gui_agent_controller = self._gui_agent_controllers[user_index]
+                if gui_agent_controller._agent_idx == agent_index:
+                    self._agent_to_user_index[agent_index] = user_index
+                    self._user_to_agent_index[user_index] = agent_index
+                    break
+
+            self._agent_data.append(
+                AgentData(
+                    app_service=app_service,
+                    world=self._world,
+                    agent_controller=agent_controller,
+                    agent_index=agent_index,
+                    render_camera=render_camera,
+                )
+            )
+
+        self._user_data: List[UserData] = []
         for user_index in self._users.indices(Mask.ALL):
+            agent_data = self._agent_data[
+                self._user_to_agent_index[user_index]
+            ]
             self._user_data.append(
                 UserData(
                     app_service=app_service,
                     user_index=user_index,
                     world=self._world,
-                    gui_agent_controller=self._gui_agent_controllers[
-                        user_index
-                    ],
+                    agent_data=agent_data,
                     server_sps_tracker=self._sps_tracker,
                 )
             )
@@ -473,9 +565,11 @@ def on_environment_reset(self, episode_recorder_dict):
         current_episode = self._app_service.env.current_episode
         if hasattr(current_episode, "instruction"):
             task_instruction = current_episode.instruction
-            # TODO: Users will have different instructions.
-            for user_index in self._users.indices(Mask.ALL):
-                self._user_data[user_index].task_instruction = task_instruction
+            # TODO: Agents will have different instructions.
+            for agent_index in self._agents.indices(Mask.ALL):
+                self._agent_data[
+                    agent_index
+                ].task_instruction = task_instruction
 
         for user_index in self._users.indices(Mask.ALL):
             self._user_data[user_index].reset()
@@ -484,6 +578,7 @@ def on_environment_reset(self, episode_recorder_dict):
         self._app_service.sim.gfx_replay_manager.save_keyframe()
 
     def _update_grasping_and_set_act_hints(self, user_index: int):
+        # TODO: Read/write from grasp manager.
         gui_agent_controller = self._user_data[user_index].gui_agent_controller
         assert isinstance(
             gui_agent_controller, (GuiHumanoidController, GuiRobotController)
@@ -500,9 +595,6 @@ def _update_grasping_and_set_act_hints(self, user_index: int):
             reach_pos=None,
         )
 
-    def _get_gui_controlled_agent_index(self, user_index):
-        return self._gui_agent_controllers[user_index]._agent_idx
-
     def _get_controls_text(self, user_index: int):
         controls_str: str = ""
         if self._user_data[user_index].show_gui_text:
@@ -525,21 +617,22 @@ def _get_controls_text(self, user_index: int):
     def _get_status_text(self, user_index: int):
         status_str = ""
 
-        if len(self._user_data[user_index].task_instruction) > 0:
-            status_str += (
-                "Instruction: "
-                + self._user_data[user_index].task_instruction
-                + "\n"
-            )
+        task_instruction = self._user_data[
+            user_index
+        ].agent_data.task_instruction
+        if len(task_instruction) > 0:
+            status_str += "Instruction: " + task_instruction + "\n"
 
         if (
             self._users.max_user_count > 1
-            and self._user_data[user_index].episode_completion_status
+            and self._user_data[
+                user_index
+            ].agent_data.episode_completion_status
             == EpisodeCompletionStatus.PENDING
         ):
-            if self._has_any_user_finished_success():
+            if self._has_any_agent_finished_success():
                 status_str += "\n\nThe other participant signaled that the task is completed.\nPress '0' when you are done."
-            elif self._has_any_user_finished_failure():
+            elif self._has_any_agent_finished_failure():
                 status_str += "\n\nThe other participant signaled a problem with the task.\nPress '0' to continue."
 
         client_helper = self._app_service.remote_client_state._client_helper
@@ -585,18 +678,18 @@ def sim_update(self, dt: float, post_sim_update_dict):
             if self._server_gui_input.get_key_down(GuiInput.KeyNS.ZERO):
                 server_user = self._user_data[self._server_user_index]
                 if (
-                    server_user.episode_completion_status
+                    server_user.agent_data.episode_completion_status
                     == EpisodeCompletionStatus.PENDING
                 ):
                     server_user._on_episode_success()
 
             # Switch the server-controlled user.
-            if self._num_agents > 0 and self._server_gui_input.get_key_down(
+            if self._num_users > 0 and self._server_gui_input.get_key_down(
                 GuiInput.KeyNS.TAB
             ):
                 self._server_user_index = (
                     self._server_user_index + 1
-                ) % self._num_agents
+                ) % self._num_users
 
         # Copy server input to user input when server input is active.
         if self._app_service.hitl_config.networking.enable:
@@ -618,73 +711,87 @@ def sim_update(self, dt: float, post_sim_update_dict):
             self._update_help_text(user_index)
 
         # Draw the picture-in-picture showing other agent's perspective.
-        if self._users.max_user_count == 2:
-            self._user_data[0].draw_pip_viewport(self._user_data[1])
-            self._user_data[1].draw_pip_viewport(self._user_data[0])
+        if self._num_agents == 2:
+            for user_index in range(self._num_users):
+                user_agent_idx = self._user_to_agent_index[user_index]
+                other_agent_idx = user_agent_idx ^ 1
+                other_agent_data = self._agent_data[other_agent_idx]
+
+                # If the other agent is AI-controlled, update its camera.
+                if other_agent_idx not in self._user_to_agent_index:
+                    other_agent_data.update_camera_from_sensor()
+
+                self._user_data[user_index].draw_pip_viewport(other_agent_data)
 
         self._app_service.compute_action_and_step_env()
 
         # Set the server camera.
         server_cam_transform = self._user_data[
             self._server_user_index
-        ].cam_transform
+        ].agent_data.cam_transform
         post_sim_update_dict["cam_transform"] = server_cam_transform
 
         #  Collect data.
         self._elapsed_time += dt
-        # TODO: Always record with non-human agent.
-        if self._is_any_user_active():
+        if self._is_any_agent_policy_driven() or self._is_any_user_active():
             frame_data = self._frame_recorder.record_state(
                 self._elapsed_time, self._user_data
             )
             self._session.session_recorder.record_frame(frame_data)
 
+    def _is_any_agent_policy_driven(self) -> bool:
+        """
+        Returns true if any of the agents is policy-driven.
+        Returns false if all agents are user-driven.
+        """
+        return self._num_agents > self._num_users
+
     def _is_any_user_active(self) -> bool:
         """
         Returns true if any user is active during the frame.
         """
-        return any(
+        return self._is_any_agent_policy_driven() or any(
             self._user_data[user_index].gui_input.get_any_input()
             or len(self._user_data[user_index].ui_events) > 0
             for user_index in range(self._app_data.max_user_count)
         )
 
-    def _has_any_user_finished_success(self) -> bool:
+    def _has_any_agent_finished_success(self) -> bool:
         """
-        Returns true if any user completed the episode successfully.
+        Returns true if any agent completed the episode successfully.
         """
         return any(
-            self._user_data[user_index].episode_completion_status
+            self._agent_data[agent_index].episode_completion_status
             == EpisodeCompletionStatus.SUCCESS
-            for user_index in range(self._app_data.max_user_count)
+            for agent_index in range(self._num_agents)
         )
 
-    def _has_any_user_finished_failure(self) -> bool:
+    def _has_any_agent_finished_failure(self) -> bool:
         """
-        Returns true if any user completed the episode unsuccessfully.
+        Returns true if any agent completed the episode unsuccessfully.
         """
         return any(
-            self._user_data[user_index].episode_completion_status
+            self._agent_data[agent_index].episode_completion_status
             == EpisodeCompletionStatus.FAILURE
-            for user_index in range(self._app_data.max_user_count)
+            for agent_index in range(self._num_agents)
         )
 
     def _is_episode_finished(self) -> bool:
         """
-        Returns true if all users finished the episode, regardless of success.
+        Returns true if all agents finished the episode, regardless of success.
         """
         return all(
-            self._user_data[user_index].episode_completion_status
+            self._agent_data[agent_index].episode_completion_status
             != EpisodeCompletionStatus.PENDING
-            for user_index in range(self._app_data.max_user_count)
+            for agent_index in range(self._num_agents)
         )
 
     def _is_episode_successful(self) -> bool:
         """
-        Returns true if all users finished the episode successfully.
+        Returns true if all agents finished the episode successfully.
         """
         return all(
-            self._user_data[user_index].episode_completion_status
+            self._agent_data[agent_index].episode_completion_status
             == EpisodeCompletionStatus.SUCCESS
-            for user_index in range(self._app_data.max_user_count)
+            for agent_index in range(self._num_agents)
         )
diff --git a/habitat-hitl/habitat_hitl/_internal/hitl_driver.py b/habitat-hitl/habitat_hitl/_internal/hitl_driver.py
index 1dc3c54a5b..c699e91b85 100644
--- a/habitat-hitl/habitat_hitl/_internal/hitl_driver.py
+++ b/habitat-hitl/habitat_hitl/_internal/hitl_driver.py
@@ -48,6 +48,7 @@
 from habitat_hitl.core.text_drawer import AbstractTextDrawer
 from habitat_hitl.core.types import KeyframeAndMessages
 from habitat_hitl.core.user_mask import Users
+from habitat_hitl.environment.controllers.controller_abc import Controller
 from habitat_hitl.environment.controllers.controller_helper import (
     ControllerHelper,
 )
@@ -207,6 +208,9 @@ def local_end_episode(do_reset=False):
         gui_agent_controllers: Any = (
             self.ctrl_helper.get_gui_agent_controllers()
         )
+        all_agent_controllers: List[
+            Controller
+        ] = self.ctrl_helper.get_all_agent_controllers()
 
         # TODO: Dependency injection
         text_drawer._client_message_manager = self._client_message_manager
@@ -230,6 +234,7 @@ def local_end_episode(do_reset=False):
             episode_helper=self._episode_helper,
             client_message_manager=self._client_message_manager,
             gui_agent_controllers=gui_agent_controllers,
+            all_agent_controllers=all_agent_controllers,
         )
 
         self._app_state: AppState = None
diff --git a/habitat-hitl/habitat_hitl/app_states/app_service.py b/habitat-hitl/habitat_hitl/app_states/app_service.py
index 8081849037..2f7ea3bf9b 100644
--- a/habitat-hitl/habitat_hitl/app_states/app_service.py
+++ b/habitat-hitl/habitat_hitl/app_states/app_service.py
@@ -17,7 +17,10 @@
 from habitat_hitl.core.serialize_utils import BaseRecorder
 from habitat_hitl.core.text_drawer import AbstractTextDrawer
 from habitat_hitl.core.user_mask import Users
-from habitat_hitl.environment.controllers.controller_abc import GuiController
+from habitat_hitl.environment.controllers.controller_abc import (
+    Controller,
+    GuiController,
+)
 from habitat_hitl.environment.episode_helper import EpisodeHelper
 
 
@@ -44,6 +47,7 @@ def __init__(
         episode_helper: EpisodeHelper,
         client_message_manager: ClientMessageManager,
         gui_agent_controllers: List[GuiController],
+        all_agent_controllers: List[Controller],
     ):
         self._config = config
         self._hitl_config = hitl_config
@@ -63,6 +67,7 @@ def __init__(
         self._episode_helper = episode_helper
         self._client_message_manager = client_message_manager
         self._gui_agent_controllers = gui_agent_controllers
+        self._all_agent_controllers = all_agent_controllers
 
     @property
     def config(self):
@@ -135,3 +140,7 @@ def client_message_manager(self) -> ClientMessageManager:
     @property
     def gui_agent_controllers(self) -> List[GuiController]:
         return self._gui_agent_controllers
+
+    @property
+    def all_agent_controllers(self) -> List[Controller]:
+        return self._all_agent_controllers
diff --git a/habitat-hitl/habitat_hitl/environment/controllers/controller_helper.py b/habitat-hitl/habitat_hitl/environment/controllers/controller_helper.py
index d4855ea3ce..e04073ef40 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/controller_helper.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/controller_helper.py
@@ -211,6 +211,12 @@ def get_gui_agent_controllers(self) -> List[Controller]:
             )
         return gui_agent_controllers
 
+    def get_all_agent_controllers(self) -> List[Controller]:
+        """
+        Return a list of controllers indexed by agent index.
+        """
+        return self.controllers
+
     def update(self, obs):
         actions = []
 

From 1b3e7cf545d4147bf0628b06a5a4c82cf9c3865c Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Thu, 23 May 2024 14:31:46 -0400
Subject: [PATCH 35/88] Change dataset config.

---
 examples/hitl/rearrange_v2/config/language_rearrange.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/hitl/rearrange_v2/config/language_rearrange.yaml b/examples/hitl/rearrange_v2/config/language_rearrange.yaml
index 2d30872ce9..09f35d6cd2 100644
--- a/examples/hitl/rearrange_v2/config/language_rearrange.yaml
+++ b/examples/hitl/rearrange_v2/config/language_rearrange.yaml
@@ -78,7 +78,8 @@ habitat:
       enable_physics: True
   dataset:
     type: "CollaborationDataset-v0"
-    data_path: data/datasets/hssd/llm_rearrange/v2/60scenes_dataset_776eps_with_eval.json.gz
+    data_path: "data/episodes/cycled_tuts.json.gz"
+    scenes_dir: "data/fpss"
 
 rearrange_v2:
   data_collection:

From 30712b0be94cea25ca3b7edbf53e8b069e60d685 Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Tue, 28 May 2024 11:42:09 -0400
Subject: [PATCH 36/88] Config updates for remote HITL sessions.

---
 .../config/lang_rearrange_llmspot_guihumanoid.yaml        | 8 +++++++-
 examples/hitl/rearrange_v2/config/language_rearrange.yaml | 7 +++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml b/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
index 592c357995..7d4695c9db 100644
--- a/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
+++ b/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
@@ -34,7 +34,8 @@ habitat:
       # For the demo, we want to showcase the episodes in the specified order
       shuffle: False
   dataset:
-    data_path: "data/prerelease_1k_v2.json.gz"
+    type: "CollaborationDataset-v0"
+    data_path: "data/episodes/cycled_tuts.json.gz"
     scenes_dir: "data/fpss"
     # metadata:
     #   metadata_folder: "data/fpss/metadata"
@@ -88,3 +89,8 @@ habitat_hitl:
   hide_humanoid_in_gui: True
   camera:
     first_person_mode: True
+  networking:
+    client_sync:
+      server_camera: False
+      server_input: False
+    client_max_idle_duration: 180.0
diff --git a/examples/hitl/rearrange_v2/config/language_rearrange.yaml b/examples/hitl/rearrange_v2/config/language_rearrange.yaml
index 09f35d6cd2..c3c705dcdc 100644
--- a/examples/hitl/rearrange_v2/config/language_rearrange.yaml
+++ b/examples/hitl/rearrange_v2/config/language_rearrange.yaml
@@ -81,6 +81,13 @@ habitat:
     data_path: "data/episodes/cycled_tuts.json.gz"
     scenes_dir: "data/fpss"
 
+habitat_hitl:
+  networking:
+    client_sync:
+      server_camera: False
+      server_input: False
+    client_max_idle_duration: 180.0
+
 rearrange_v2:
   data_collection:
     s3_path: "Placeholder/"

From 0a88ef7eff74f720da75cf6ec62f578533ca8998 Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Tue, 28 May 2024 14:06:34 -0400
Subject: [PATCH 37/88] Load llm extensions.

---
 examples/hitl/rearrange_v2/collaboration_episode_loader.py | 3 +++
 examples/hitl/rearrange_v2/main.py                         | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/examples/hitl/rearrange_v2/collaboration_episode_loader.py b/examples/hitl/rearrange_v2/collaboration_episode_loader.py
index b2e6e6c33c..f0111bd485 100644
--- a/examples/hitl/rearrange_v2/collaboration_episode_loader.py
+++ b/examples/hitl/rearrange_v2/collaboration_episode_loader.py
@@ -54,3 +54,6 @@ def load_collaboration_episode_data(
         episode: RearrangeEpisode,
     ) -> CollaborationEpisodeData:
         return CollaborationEpisodeData()
+
+    def register_habitat_llm_extensions(config):
+        pass
diff --git a/examples/hitl/rearrange_v2/main.py b/examples/hitl/rearrange_v2/main.py
index fbcc780632..904efa4161 100644
--- a/examples/hitl/rearrange_v2/main.py
+++ b/examples/hitl/rearrange_v2/main.py
@@ -30,6 +30,8 @@ def main(config):
     # We don't sync the server camera. Instead, we maintain one camera per user.
     assert config.habitat_hitl.networking.client_sync.server_camera == False
 
+    collaboration_episode_loader.register_habitat_llm_extensions(config)
+
     hitl_main(
         config,
         lambda app_service: StateMachine(app_service),

From 1d8f031d94ce2153329d7336d7d45b1b02d763cb Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Tue, 28 May 2024 18:27:54 -0400
Subject: [PATCH 38/88] Update LLM config.

---
 .../rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml b/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
index 7d4695c9db..aa0bea16b6 100644
--- a/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
+++ b/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
@@ -10,7 +10,7 @@ defaults:
   - /wandb_conf@                : own
   - language_rearrange_multi_agent_llm_gui
   - hitl_defaults
-  - override /instruct@evaluation.agents.agent_0.planner.plan_config.instruct: few_shot_decentralized_robot_partner_aware_v1
+  - override /instruct@evaluation.agents.agent_0.planner.plan_config.instruct: few_shot_decentralized_partial_obs_coordinated_robot
   - override /llm@evaluation.agents.agent_0.planner.plan_config.llm: openai_chat
   - _self_
 

From d981be3243a91cd56864dac7164d4af5178cf226 Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Tue, 28 May 2024 18:28:09 -0400
Subject: [PATCH 39/88] Remove dead code in LLM controller.

---
 .../habitat_hitl/environment/controllers/llm_controller.py  | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
index 6607e8ecea..e62ee8984f 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
@@ -62,8 +62,6 @@ def __init__(
                 level=logging.DEBUG,
                 force=True,
             )
-            self._analysis_logger = logging.getLogger("LLMController")
-            self._analysis_logger.debug("LLMController initialized")
 
         with habitat.config.read_write(self._config):
             fix_config(self._config)
@@ -122,10 +120,6 @@ def on_environment_reset(self):
             self.environment_interface.hab_env.current_episode.instruction
         )
         print(f"Instruction: {self.current_instruction}")
-        self._analysis_logger.debug(
-            f"------\nInstruction: {self.current_instruction}"
-        )
-        self._analysis_logger.debug("NEW EPISODE STARTING")
         self._iter = 0
 
     def _act(self, observations, *args, **kwargs):

From 4a8c77fb37449b268fb2306662cf83a6abbf4090 Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Wed, 29 May 2024 14:46:36 -0400
Subject: [PATCH 40/88] Avoid errors when loading non-llm configs.

---
 .../hitl/rearrange_v2/collaboration_episode_loader.py    | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/examples/hitl/rearrange_v2/collaboration_episode_loader.py b/examples/hitl/rearrange_v2/collaboration_episode_loader.py
index f0111bd485..9f4bf4f946 100644
--- a/examples/hitl/rearrange_v2/collaboration_episode_loader.py
+++ b/examples/hitl/rearrange_v2/collaboration_episode_loader.py
@@ -44,9 +44,12 @@ def load_collaboration_episode_data(
         return episode_data
 
     def register_habitat_llm_extensions(config):
-        register_actions(config)
-        register_measures(config)
-        register_sensors(config)
+        try:
+            register_actions(config)
+            register_measures(config)
+            register_sensors(config)
+        except Exception as e:
+            print(f"Config incompatible with LLM. {e}")
 
 else:
 

From a9e49f57a32bed77a6aa2a761606ce92251fc601 Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Wed, 29 May 2024 14:47:11 -0400
Subject: [PATCH 41/88] Remove superfluous dataset definition.

---
 .../hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml b/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml
index 8e58967569..1192a68b52 100644
--- a/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml
+++ b/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml
@@ -15,9 +15,6 @@ habitat:
       max_scene_repeat_steps: -1
       max_scene_repeat_episodes: -1
       group_by_scene: False
-  dataset:
-    data_path: "data/prerelease_1k_v2.json.gz"
-    scenes_dir: "data/fpss"
 
 habitat_baselines:
   # todo: document these choices

From 19659534c0a2b9b27c3cfdaa1bba203df6fc34d4 Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Wed, 29 May 2024 16:15:55 -0400
Subject: [PATCH 42/88] Avoid resetting Habitat when resetting
 environment_interface.

---
 .../habitat_hitl/environment/controllers/llm_controller.py      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
index e62ee8984f..975dc1c25d 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
@@ -109,7 +109,7 @@ def on_environment_reset(self):
         # NOTE: the following ONLY resets self._test_recurrent_hidden_states,
         # self._prev_actions and self._not_done_masks
         # super().on_environment_reset()
-        self.environment_interface.reset_environment()
+        self.environment_interface.reset_environment(reset_habitat=False)
         self.planner.reset()
         if self._thread is not None:
             self._thread.join()

From dc65e9289861daf6b7853df6809365507cf139ef Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Wed, 29 May 2024 18:46:36 -0400
Subject: [PATCH 43/88] Fix rearrange sensor initialization.

---
 examples/hitl/rearrange_v2/rearrange_v2.py | 26 +++++++++++++---------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/examples/hitl/rearrange_v2/rearrange_v2.py b/examples/hitl/rearrange_v2/rearrange_v2.py
index 7c5697726a..73b16037c7 100644
--- a/examples/hitl/rearrange_v2/rearrange_v2.py
+++ b/examples/hitl/rearrange_v2/rearrange_v2.py
@@ -462,18 +462,22 @@ def __init__(
         self._user_to_agent_index: Dict[int, int] = {}
 
         self._agent_data: List[AgentData] = []
+
+        # HACK: The simulator has only 1 agent with all sensors. See 'create_sim_config() in habitat_simulator.py'.
+        sim_agent = sim.agents[0]
+        config = self._app_service.config
+        head_sensor_substring: str = config.rearrange_v2.head_sensor_substring
         for agent_index in range(self._num_agents):
             agent = agent_mgr._all_agent_data[agent_index]
-            camera_name: Optional[Any] = (
-                agent.articulated_agent._cameras[0]
-                if len(agent.articulated_agent._cameras) > 0
-                else None
-            )
-            render_camera: Optional[Any] = (
-                sim.agents[agent_index]._sensors[camera_name].render_camera
-                if camera_name is not None
-                else None
-            )
+
+            render_camera: Optional[Any] = None
+            for camera_name in agent.articulated_agent._cameras:
+                if head_sensor_substring in camera_name:
+                    sensor = sim_agent._sensors.get(camera_name, None)
+                    if sensor is not None and hasattr(sensor, "render_camera"):
+                        render_camera = sensor.render_camera
+                        break
+
             agent_controller = app_service.all_agent_controllers[agent_index]
 
             # Match agent and user indices.
@@ -518,6 +522,7 @@ def __init__(
 
     def get_next_state(self) -> Optional[AppStateBase]:
         if self._cancel:
+            # TODO: Reset LLM controller.
             return create_app_state_cancel_session(
                 self._app_service,
                 self._app_data,
@@ -525,6 +530,7 @@ def get_next_state(self) -> Optional[AppStateBase]:
                 "User disconnected",
             )
         elif self._is_episode_finished():
+            # TODO: Reset LLM controller.
             return create_app_state_load_episode(
                 self._app_service, self._app_data, self._session
             )

From 60a08f9f7ed46a6dd49e7643c25b0e6823e42034 Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Wed, 29 May 2024 18:47:04 -0400
Subject: [PATCH 44/88] Add rearrange_v2 config.

---
 .../rearrange_v2/config/lang_rearrange_spot_humanoid.yaml   | 6 ++++++
 examples/hitl/rearrange_v2/config/language_rearrange.yaml   | 1 +
 .../config/language_rearrange_multi_agent_llm_gui.yaml      | 6 ++++++
 3 files changed, 13 insertions(+)

diff --git a/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml b/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml
index 1192a68b52..2482999814 100644
--- a/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml
+++ b/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml
@@ -47,3 +47,9 @@ habitat_hitl:
       server_camera: False
       server_input: False
     client_max_idle_duration: 180.0
+
+rearrange_v2:
+  data_collection:
+    s3_path: "Placeholder/"
+    output_file_name: "session"
+  head_sensor_substring: "head_sensor"
diff --git a/examples/hitl/rearrange_v2/config/language_rearrange.yaml b/examples/hitl/rearrange_v2/config/language_rearrange.yaml
index c3c705dcdc..8ee176b28b 100644
--- a/examples/hitl/rearrange_v2/config/language_rearrange.yaml
+++ b/examples/hitl/rearrange_v2/config/language_rearrange.yaml
@@ -92,3 +92,4 @@ rearrange_v2:
   data_collection:
     s3_path: "Placeholder/"
     output_file_name: "session"
+  head_sensor_substring: "head_sensor"
diff --git a/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml b/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
index 7ca5939268..7be60c0a2d 100644
--- a/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
+++ b/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
@@ -151,3 +151,9 @@ habitat:
     type: "CollaborationDataset-v0"
     split: train
     scenes_dir: data/fpss
+
+rearrange_v2:
+  data_collection:
+    s3_path: "Placeholder/"
+    output_file_name: "session"
+  head_sensor_substring: "head_sensor"

From fe5504c97361d22f11efa4de0c9a73d2a6e73364 Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Wed, 29 May 2024 18:47:33 -0400
Subject: [PATCH 45/88] Set idle timer to 180s.

---
 .../hitl/rearrange_v2/config/experiment/headless_server.yaml    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/hitl/rearrange_v2/config/experiment/headless_server.yaml b/examples/hitl/rearrange_v2/config/experiment/headless_server.yaml
index 370a45d75f..85e8299a3f 100644
--- a/examples/hitl/rearrange_v2/config/experiment/headless_server.yaml
+++ b/examples/hitl/rearrange_v2/config/experiment/headless_server.yaml
@@ -6,7 +6,7 @@ habitat_hitl:
     enable: True
     http_availability_server:
       enable: True
-    client_max_idle_duration: 30.0
+    client_max_idle_duration: 180.0
   experimental:
     headless:
       do_headless: True

From cebd24b8be5b150dda393fc26303d0b7945128b6 Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Fri, 31 May 2024 15:47:43 -0700
Subject: [PATCH 46/88] event callbacks and action-space update

---
 examples/hitl/rearrange_v2/rearrange_v2.py    |  7 +++
 .../environment/controllers/llm_controller.py | 54 +++++++++++++++++--
 2 files changed, 56 insertions(+), 5 deletions(-)

diff --git a/examples/hitl/rearrange_v2/rearrange_v2.py b/examples/hitl/rearrange_v2/rearrange_v2.py
index 73b16037c7..4575088d09 100644
--- a/examples/hitl/rearrange_v2/rearrange_v2.py
+++ b/examples/hitl/rearrange_v2/rearrange_v2.py
@@ -40,6 +40,7 @@
     GuiHumanoidController,
     GuiRobotController,
 )
+from habitat_hitl.environment.controllers.llm_controller import LLMController
 from habitat_hitl.environment.hablab_utils import get_agent_art_obj_transform
 from habitat_sim.utils.common import quat_from_magnum, quat_to_coeffs
 
@@ -512,6 +513,12 @@ def __init__(
                     server_sps_tracker=self._sps_tracker,
                 )
             )
+            for agent_controller in app_service.all_agent_controllers:
+                # register callbacks for LLMController
+                if isinstance(agent_controller, LLMController):
+                    self._user_data[-1].ui.on_pick.registerCallback(agent_controller._on_pick)
+                    self._user_data[-1].ui.on_place.registerCallback(agent_controller._on_place)
+
 
         self._frame_recorder = FrameRecorder(
             app_service, app_data, self._world
diff --git a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
index 975dc1c25d..7fc2c4fa23 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
@@ -7,6 +7,7 @@
 # This controller assumes you are using a habitat-llm Agent downstream
 # code for interface followed by a habitat-llm Agent will be released in the future
 
+import copy
 import logging
 import threading
 from typing import Any, Dict, Union
@@ -16,12 +17,14 @@
 from habitat_llm.agent.env import EnvironmentInterface
 from habitat_llm.planner.llm_planner import LLMPlanner
 from habitat_llm.utils import fix_config, setup_config
+from habitat_llm.utils.analysis import CodeTimer
 from hydra.utils import instantiate
 from omegaconf import DictConfig
 
 import habitat
 import habitat.config
 from habitat.core.environments import GymHabitatEnv
+from habitat.sims.habitat_simulator.sim_utilities import get_obj_from_id
 from habitat_hitl.environment.controllers.baselines_controller import (
     SingleAgentBaselinesController,
 )
@@ -44,7 +47,7 @@ def __init__(
         self._habitat_env = gym_habitat_env.unwrapped.habitat_env
         self._agent_idx = agent_idx
         # TODO: gather this from config
-        self._agent_action_length = 28
+        self._agent_action_length = 36
         self._thread: Union[None, threading.Thread] = None
         self._low_level_actions: Union[None, dict, np.ndarray] = {}
         self._task_done = False
@@ -78,6 +81,7 @@ def __init__(
         self.initialize_environment_interface()
         self.initialize_planner()
         self.info: Dict[str, Any] = {}
+        self._human_action_history = []
 
     def initialize_planner(self):
         # NOTE: using instantiate here, but given this is planning for a single agent
@@ -122,13 +126,31 @@ def on_environment_reset(self):
         print(f"Instruction: {self.current_instruction}")
         self._iter = 0
 
+    def _on_pick(self, _e: Any = None):
+        action = {
+            "action": "PICK",
+            "object_id": _e.object_id,
+            "object_handle": _e.object_handle,
+        }
+
+        self._human_action_history.append(action)
+
+    def _on_place(self, _e: Any = None):
+        action = {
+            "action": "PLACE",
+            "object_id": _e.object_id,
+            "object_handle": _e.object_handle,
+            "receptacle_id": _e.receptacle_id,
+            # "receptacle_name": self.environment_interface.world_graph.get_node_from_sim_handle(
+            #     get_obj_from_id(self.environment_interface.sim, _e.receptacle_id).handle
+            # ),
+        }
+
+        self._human_action_history.append(action)
+
     def _act(self, observations, *args, **kwargs):
-        # NOTE: update the world state to reflect the new observations
-        self.environment_interface.update_world_state(observations)
         # NOTE: this is where the LLM magic happens, the agent is given the observations
         # and it returns the actions for the agent
-        # TODO: looping needed here until a physical low-level-action is returned
-        # low_level_actions: Union[dict, np.ndarray] = {}
         (
             self._low_level_actions,
             planner_info,
@@ -142,6 +164,28 @@ def _act(self, observations, *args, **kwargs):
         return
 
     def act(self, observations, *args, **kwargs):
+        # NOTE: update the world state to reflect the new observations
+        # TODO: might need a lock on world-state here?
+        self.environment_interface.update_world_state(
+            observations, disable_logging=True
+        )
+        # update agent state history
+        while self._human_action_history:
+            action = self._human_action_history.pop(0)
+            if action["action"] == "PICK":
+                object_name = self.environment_interface.world_graph.get_node_from_sim_handle(
+                    action['object_handle']
+                ).name
+                self.environment_interface.agent_state_history[1].append(
+                    f"Agent picked up {object_name}"
+                )
+            elif action["action"] == "PLACE":
+                object_name = self.environment_interface.world_graph.get_node_from_sim_handle(
+                    action['object_handle']
+                ).name
+                self.environment_interface.agent_state_history[1].append(
+                    f"Agent placed {object_name} in {action['receptacle_id']}"
+                )
         if self._iter < self._skip_iters or self._task_done:
             self._iter += 1
             return np.zeros(self._agent_action_length)

From 72cb8f7aac021101f5f615ba1b22fe54613b049d Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Fri, 31 May 2024 16:03:50 -0700
Subject: [PATCH 47/88] added open/close callback too; test next

---
 examples/hitl/rearrange_v2/rearrange_v2.py     |  2 ++
 .../environment/controllers/llm_controller.py  | 18 ++++++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/examples/hitl/rearrange_v2/rearrange_v2.py b/examples/hitl/rearrange_v2/rearrange_v2.py
index 4575088d09..334be78fd7 100644
--- a/examples/hitl/rearrange_v2/rearrange_v2.py
+++ b/examples/hitl/rearrange_v2/rearrange_v2.py
@@ -518,6 +518,8 @@ def __init__(
                 if isinstance(agent_controller, LLMController):
                     self._user_data[-1].ui.on_pick.registerCallback(agent_controller._on_pick)
                     self._user_data[-1].ui.on_place.registerCallback(agent_controller._on_place)
+                    self._user_data[-1].ui.on_open.registerCallback(agent_controller._on_open)
+                    self._user_data[-1].ui.on_close.registerCallback(agent_controller._on_close)
 
 
         self._frame_recorder = FrameRecorder(
diff --git a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
index 7fc2c4fa23..eedd404849 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
@@ -148,6 +148,24 @@ def _on_place(self, _e: Any = None):
 
         self._human_action_history.append(action)
 
+    def _on_open(self, _e: Any = None):
+        action = {
+            "action": "OPEN",
+            "object_id": _e.object_id,
+            "object_handle": _e.object_handle,
+        }
+
+        self._human_action_history.append(action)
+
+    def _on_close(self, _e: Any = None):
+        action = {
+            "action": "CLOSE",
+            "object_id": _e.object_id,
+            "object_handle": _e.object_handle,
+        }
+
+        self._human_action_history.append(action)
+
     def _act(self, observations, *args, **kwargs):
         # NOTE: this is where the LLM magic happens, the agent is given the observations
         # and it returns the actions for the agent

From 243df213b41f3c53576f9572e5dc086adc94675e Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Sat, 1 Jun 2024 17:06:39 -0400
Subject: [PATCH 48/88] Speed up agent action thread.

---
 .../environment/controllers/llm_controller.py | 23 ++++++++-----------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
index eedd404849..3ee85d0de7 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
@@ -7,7 +7,6 @@
 # This controller assumes you are using a habitat-llm Agent downstream
 # code for interface followed by a habitat-llm Agent will be released in the future
 
-import copy
 import logging
 import threading
 from typing import Any, Dict, Union
@@ -192,14 +191,14 @@ def act(self, observations, *args, **kwargs):
             action = self._human_action_history.pop(0)
             if action["action"] == "PICK":
                 object_name = self.environment_interface.world_graph.get_node_from_sim_handle(
-                    action['object_handle']
+                    action["object_handle"]
                 ).name
                 self.environment_interface.agent_state_history[1].append(
                     f"Agent picked up {object_name}"
                 )
             elif action["action"] == "PLACE":
                 object_name = self.environment_interface.world_graph.get_node_from_sim_handle(
-                    action['object_handle']
+                    action["object_handle"]
                 ).name
                 self.environment_interface.agent_state_history[1].append(
                     f"Agent placed {object_name} in {action['receptacle_id']}"
@@ -208,19 +207,15 @@ def act(self, observations, *args, **kwargs):
             self._iter += 1
             return np.zeros(self._agent_action_length)
         low_level_actions = np.zeros(self._agent_action_length)
-        if self._thread is None:
-            self._thread = threading.Thread(
+
+        if self._thread is None or not self._thread.is_alive():
+            if self._low_level_actions != {}:
+                low_level_actions = self._low_level_actions[
+                    str(self._agent_idx)
+                ][:-248]
+            self._thread = self._thread = threading.Thread(
                 target=self._act, args=(observations,), kwargs=kwargs
             )
             self._thread.start()
-        else:
-            if self._thread.is_alive():
-                pass
-            else:
-                self._thread = None
-                if self._low_level_actions != {}:
-                    low_level_actions = self._low_level_actions[
-                        str(self._agent_idx)
-                    ][:-248]
 
         return low_level_actions

From a9ff943dd7e7d3f954170c24f18f6b9bf699e653 Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Sun, 2 Jun 2024 16:18:44 -0400
Subject: [PATCH 49/88] Make Unity controls smoother.

---
 habitat-hitl/habitat_hitl/core/gui_input.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/habitat-hitl/habitat_hitl/core/gui_input.py b/habitat-hitl/habitat_hitl/core/gui_input.py
index 651475eadb..2ab639b19b 100644
--- a/habitat-hitl/habitat_hitl/core/gui_input.py
+++ b/habitat-hitl/habitat_hitl/core/gui_input.py
@@ -97,8 +97,10 @@ def on_frame_end(self):
         self._key_up.clear()
         self._mouse_button_down.clear()
         self._mouse_button_up.clear()
-        self._relative_mouse_position = [0, 0]
-        self._mouse_scroll_offset = 0.0
+        # TODO: The commented lines below are required for local server controls, but make Unity control sluggish.
+        #       Fix this by making server GUI reset independent.
+        # self._relative_mouse_position = [0, 0]
+        # self._mouse_scroll_offset = 0.0
 
     def copy_from(self, other: GuiInput):
         self._key_down = set(other._key_down)

From 3ae3e3ccda255262db1757096ed0777b4fe7a128 Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Sun, 2 Jun 2024 16:20:46 -0400
Subject: [PATCH 50/88] Code formatting and typing fixes.

---
 examples/hitl/rearrange_v2/rearrange_v2.py      | 17 ++++++++++++-----
 .../environment/controllers/llm_controller.py   |  6 ++----
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/examples/hitl/rearrange_v2/rearrange_v2.py b/examples/hitl/rearrange_v2/rearrange_v2.py
index 334be78fd7..c54c62826f 100644
--- a/examples/hitl/rearrange_v2/rearrange_v2.py
+++ b/examples/hitl/rearrange_v2/rearrange_v2.py
@@ -516,11 +516,18 @@ def __init__(
             for agent_controller in app_service.all_agent_controllers:
                 # register callbacks for LLMController
                 if isinstance(agent_controller, LLMController):
-                    self._user_data[-1].ui.on_pick.registerCallback(agent_controller._on_pick)
-                    self._user_data[-1].ui.on_place.registerCallback(agent_controller._on_place)
-                    self._user_data[-1].ui.on_open.registerCallback(agent_controller._on_open)
-                    self._user_data[-1].ui.on_close.registerCallback(agent_controller._on_close)
-
+                    self._user_data[-1].ui.on_pick.registerCallback(
+                        agent_controller._on_pick
+                    )
+                    self._user_data[-1].ui.on_place.registerCallback(
+                        agent_controller._on_place
+                    )
+                    self._user_data[-1].ui.on_open.registerCallback(
+                        agent_controller._on_open
+                    )
+                    self._user_data[-1].ui.on_close.registerCallback(
+                        agent_controller._on_close
+                    )
 
         self._frame_recorder = FrameRecorder(
             app_service, app_data, self._world
diff --git a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
index 3ee85d0de7..e5cb3a8517 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
@@ -9,21 +9,19 @@
 
 import logging
 import threading
-from typing import Any, Dict, Union
+from typing import Any, Dict, List, Union
 
 import numpy as np
 from habitat_llm.agent import Agent
 from habitat_llm.agent.env import EnvironmentInterface
 from habitat_llm.planner.llm_planner import LLMPlanner
 from habitat_llm.utils import fix_config, setup_config
-from habitat_llm.utils.analysis import CodeTimer
 from hydra.utils import instantiate
 from omegaconf import DictConfig
 
 import habitat
 import habitat.config
 from habitat.core.environments import GymHabitatEnv
-from habitat.sims.habitat_simulator.sim_utilities import get_obj_from_id
 from habitat_hitl.environment.controllers.baselines_controller import (
     SingleAgentBaselinesController,
 )
@@ -80,7 +78,7 @@ def __init__(
         self.initialize_environment_interface()
         self.initialize_planner()
         self.info: Dict[str, Any] = {}
-        self._human_action_history = []
+        self._human_action_history: List[Any] = []
 
     def initialize_planner(self):
         # NOTE: using instantiate here, but given this is planning for a single agent

From 5f0780730d8337b10148768b1cc408d55d989f2d Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Sun, 2 Jun 2024 18:34:56 -0400
Subject: [PATCH 51/88] Skip serialization of attributes missing mendatory
 values.

---
 .../habitat_hitl/core/serialize_utils.py         | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/habitat-hitl/habitat_hitl/core/serialize_utils.py b/habitat-hitl/habitat_hitl/core/serialize_utils.py
index 1bcfd32a08..1ac81c5449 100644
--- a/habitat-hitl/habitat_hitl/core/serialize_utils.py
+++ b/habitat-hitl/habitat_hitl/core/serialize_utils.py
@@ -57,11 +57,17 @@ def convert_to_json_friendly(obj):
         return convert_to_json_friendly(list(obj))
     else:
         # If obj is a complex object, convert its attributes to a dictionary
-        attributes = {
-            attr: convert_to_json_friendly(getattr(obj, attr))
-            for attr in dir(obj)
-            if not attr.startswith("__") and not callable(getattr(obj, attr))
-        }
+        attributes = {}
+        for attr in dir(obj):
+            try:
+                if not attr.startswith("__") and not callable(
+                    getattr(obj, attr)
+                ):
+                    attributes[attr] = getattr(obj, attr)
+            except Exception as e:
+                print(
+                    f"Unable to convert attribute to JSON: {attr}. Skipping. {e}"
+                )
         return convert_to_json_friendly(attributes)
 
 

From 9dcb6ce3f017743524b0b5b220faa2926d41b10c Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Sun, 2 Jun 2024 18:35:09 -0400
Subject: [PATCH 52/88] Fix other agent's sensor.

---
 .../config/lang_rearrange_spot_humanoid.yaml  |  4 +++-
 .../config/language_rearrange.yaml            |  4 +++-
 ...anguage_rearrange_multi_agent_llm_gui.yaml |  4 +++-
 examples/hitl/rearrange_v2/rearrange_v2.py    | 21 +++++++++----------
 4 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml b/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml
index 2482999814..9ec894d240 100644
--- a/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml
+++ b/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml
@@ -52,4 +52,6 @@ rearrange_v2:
   data_collection:
     s3_path: "Placeholder/"
     output_file_name: "session"
-  head_sensor_substring: "head_sensor"
+  head_sensor_substrings:
+    - "head"
+    - "jaw"
diff --git a/examples/hitl/rearrange_v2/config/language_rearrange.yaml b/examples/hitl/rearrange_v2/config/language_rearrange.yaml
index 8ee176b28b..172f74ae49 100644
--- a/examples/hitl/rearrange_v2/config/language_rearrange.yaml
+++ b/examples/hitl/rearrange_v2/config/language_rearrange.yaml
@@ -92,4 +92,6 @@ rearrange_v2:
   data_collection:
     s3_path: "Placeholder/"
     output_file_name: "session"
-  head_sensor_substring: "head_sensor"
+  head_sensor_substrings:
+    - "head"
+    - "jaw"
diff --git a/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml b/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
index 7be60c0a2d..2c07ea7cd6 100644
--- a/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
+++ b/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
@@ -156,4 +156,6 @@ rearrange_v2:
   data_collection:
     s3_path: "Placeholder/"
     output_file_name: "session"
-  head_sensor_substring: "head_sensor"
+  head_sensor_substrings:
+    - "head"
+    - "jaw"
diff --git a/examples/hitl/rearrange_v2/rearrange_v2.py b/examples/hitl/rearrange_v2/rearrange_v2.py
index c54c62826f..97b880c1f8 100644
--- a/examples/hitl/rearrange_v2/rearrange_v2.py
+++ b/examples/hitl/rearrange_v2/rearrange_v2.py
@@ -163,9 +163,7 @@ def update_camera_from_sensor(self) -> None:
         For AI-controlled agents, the camera transform can be inferred from this function.
         """
         if self.render_camera is not None:
-            self.cam_transform = np.linalg.inv(
-                self.render_camera.camera_matrix
-            )
+            self.cam_transform = self.render_camera.camera_matrix.inverted()
 
 
 class UserData:
@@ -467,15 +465,16 @@ def __init__(
         # HACK: The simulator has only 1 agent with all sensors. See 'create_sim_config() in habitat_simulator.py'.
         sim_agent = sim.agents[0]
         config = self._app_service.config
-        head_sensor_substring: str = config.rearrange_v2.head_sensor_substring
+        head_sensor_substrings: List[
+            str
+        ] = config.rearrange_v2.head_sensor_substrings
         for agent_index in range(self._num_agents):
-            agent = agent_mgr._all_agent_data[agent_index]
-
             render_camera: Optional[Any] = None
-            for camera_name in agent.articulated_agent._cameras:
-                if head_sensor_substring in camera_name:
-                    sensor = sim_agent._sensors.get(camera_name, None)
-                    if sensor is not None and hasattr(sensor, "render_camera"):
+            for substring in head_sensor_substrings:
+                for sensor_name, sensor in sim_agent._sensors.items():
+                    if substring in sensor_name and hasattr(
+                        sensor, "render_camera"
+                    ):
                         render_camera = sensor.render_camera
                         break
 
@@ -740,7 +739,7 @@ def sim_update(self, dt: float, post_sim_update_dict):
                 other_agent_data = self._agent_data[other_agent_idx]
 
                 # If the other agent is AI-controlled, update its camera.
-                if other_agent_idx not in self._user_to_agent_index:
+                if other_agent_idx not in self._agent_to_user_index:
                     other_agent_data.update_camera_from_sensor()
 
                 self._user_data[user_index].draw_pip_viewport(other_agent_data)

From 4431ab2f8a88a3b4026eddd0b9e93773c18984e9 Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Sun, 2 Jun 2024 18:36:21 -0400
Subject: [PATCH 53/88] Add comment.

---
 examples/hitl/rearrange_v2/rearrange_v2.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/hitl/rearrange_v2/rearrange_v2.py b/examples/hitl/rearrange_v2/rearrange_v2.py
index 97b880c1f8..abad6bad80 100644
--- a/examples/hitl/rearrange_v2/rearrange_v2.py
+++ b/examples/hitl/rearrange_v2/rearrange_v2.py
@@ -471,6 +471,7 @@ def __init__(
         for agent_index in range(self._num_agents):
             render_camera: Optional[Any] = None
             for substring in head_sensor_substrings:
+                # TODO: Validate that the lab agent owns the sensor.
                 for sensor_name, sensor in sim_agent._sensors.items():
                     if substring in sensor_name and hasattr(
                         sensor, "render_camera"

From 61750e42c2e279a4233617182f4ec27a6a1e0e32 Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Mon, 3 Jun 2024 12:19:41 -0400
Subject: [PATCH 54/88] Query grasp manager to determine whether an object is
 picked by an agent.

---
 examples/hitl/rearrange_v2/ui.py                  | 10 +++-------
 examples/hitl/rearrange_v2/world.py               | 15 +++++++++++++++
 .../tasks/rearrange/articulated_agent_manager.py  |  2 +-
 3 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/examples/hitl/rearrange_v2/ui.py b/examples/hitl/rearrange_v2/ui.py
index 7117167de0..c0d05193a1 100644
--- a/examples/hitl/rearrange_v2/ui.py
+++ b/examples/hitl/rearrange_v2/ui.py
@@ -223,7 +223,7 @@ def _pick_object(self, object_id: int) -> None:
         if (
             not self._is_holding_object()
             and self._is_object_pickable(object_id)
-            and not self._is_someone_holding_object(object_id)
+            and not self._world.is_any_agent_holding_object(object_id)
         ):
             rigid_object = self._world.get_rigid_object(object_id)
             if rigid_object is not None:
@@ -353,10 +353,6 @@ def _is_holding_object(self) -> bool:
         """Returns true if the user is holding an object."""
         return self._held_object_id is not None
 
-    def _is_someone_holding_object(self, object_id: int) -> bool:
-        """Returns true if any user is holding the specified object."""
-        return object_id in self._world._all_held_object_ids
-
     def _is_within_reach(self, target_pos: mn.Vector3) -> bool:
         """Returns true if the target can be reached by the user."""
         return (
@@ -382,7 +378,7 @@ def _is_location_suitable_for_placement(
         if not self._is_within_reach(point):
             return False
         # Cannot place on objects held by agents.
-        if self._is_someone_holding_object(receptacle_object_id):
+        if self._world.is_any_agent_holding_object(receptacle_object_id):
             return False
         return True
 
@@ -451,7 +447,7 @@ def _draw_hovered_pickable(self) -> None:
         object_id = self._hover_selection.object_id
         if not self._is_object_pickable(
             object_id
-        ) or self._is_someone_holding_object(object_id):
+        ) or self._world.is_any_agent_holding_object(object_id):
             return
 
         managed_object = sim_utilities.get_obj_from_id(
diff --git a/examples/hitl/rearrange_v2/world.py b/examples/hitl/rearrange_v2/world.py
index d7126eceb0..084d0f716b 100644
--- a/examples/hitl/rearrange_v2/world.py
+++ b/examples/hitl/rearrange_v2/world.py
@@ -122,3 +122,18 @@ def get_agent_object_ids(self, agent_index: int) -> Set[int]:
             agent_object_ids.add(link_object_id)
 
         return agent_object_ids
+
+    def is_any_agent_holding_object(self, object_id: int) -> bool:
+        """
+        Checks whether the specified object is being held by an agent.
+        This function looks up both the HITL world state and grasp managers.
+        """
+        sim = self._sim
+        agents_mgr = sim.agents_mgr
+
+        for agent_index in range(len(agents_mgr.agent_names)):
+            grasp_mgr = agents_mgr._all_agent_data[agent_index].grasp_mgr
+            if grasp_mgr._snapped_obj_id == object_id:
+                return True
+
+        return object_id in self._all_held_object_ids
diff --git a/habitat-lab/habitat/tasks/rearrange/articulated_agent_manager.py b/habitat-lab/habitat/tasks/rearrange/articulated_agent_manager.py
index dbc5e3e95f..c5cae517d7 100644
--- a/habitat-lab/habitat/tasks/rearrange/articulated_agent_manager.py
+++ b/habitat-lab/habitat/tasks/rearrange/articulated_agent_manager.py
@@ -69,7 +69,7 @@ class ArticulatedAgentManager:
 
     def __init__(self, cfg, sim):
         self._sim = sim
-        self._all_agent_data = []
+        self._all_agent_data: List[ArticulatedAgentData] = []
         self._is_pb_installed = is_pb_installed()
         self.agent_names = cfg.agents
 

From 5945cce1786823824cccb92e26c4b5748610bfed Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Mon, 3 Jun 2024 12:20:43 -0400
Subject: [PATCH 55/88] Move HITL output to 'data/' so that it is ignored by
 git.

---
 examples/hitl/rearrange_v2/app_state_end_session.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/hitl/rearrange_v2/app_state_end_session.py b/examples/hitl/rearrange_v2/app_state_end_session.py
index bc5bb5e54c..ed988504e1 100644
--- a/examples/hitl/rearrange_v2/app_state_end_session.py
+++ b/examples/hitl/rearrange_v2/app_state_end_session.py
@@ -83,7 +83,7 @@ def _end_session(self):
 
             # Use the port as a discriminator for when there are multiple concurrent servers.
             output_folder_suffix = str(config.habitat_hitl.networking.port)
-            output_folder = f"output_{output_folder_suffix}"
+            output_folder = f"data/output_{output_folder_suffix}"
 
             output_file_name = data_collection_config.output_file_name
             output_file = f"{output_file_name}.json.gz"

From 0c585de5ca8c782ab005f750434cd4dc69ed5d77 Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Mon, 3 Jun 2024 17:07:06 -0400
Subject: [PATCH 56/88] Fix place receptacle ID.

---
 examples/hitl/rearrange_v2/ui.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/hitl/rearrange_v2/ui.py b/examples/hitl/rearrange_v2/ui.py
index c0d05193a1..efc5cafe2c 100644
--- a/examples/hitl/rearrange_v2/ui.py
+++ b/examples/hitl/rearrange_v2/ui.py
@@ -285,7 +285,7 @@ def _place_object(self) -> None:
                 UI.PlaceEventData(
                     object_id=object_id,
                     object_handle=rigid_object.handle,
-                    receptacle_id=self._place_selection.object_id,
+                    receptacle_id=receptacle_object_id,
                 )
             )
 

From 0d1d2fb495fe04baffa48506187f2a3b2a4a572c Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Tue, 4 Jun 2024 12:30:45 -0400
Subject: [PATCH 57/88] Use grasp manager to communicate grasp state to other
 systems.

---
 examples/hitl/rearrange_v2/ui.py              | 27 ++++++++++-
 .../rearrange/rearrange_grasp_manager.py      | 46 +++++++++++--------
 2 files changed, 52 insertions(+), 21 deletions(-)

diff --git a/examples/hitl/rearrange_v2/ui.py b/examples/hitl/rearrange_v2/ui.py
index efc5cafe2c..2188d3d06c 100644
--- a/examples/hitl/rearrange_v2/ui.py
+++ b/examples/hitl/rearrange_v2/ui.py
@@ -8,7 +8,7 @@
 
 from dataclasses import dataclass
 from datetime import datetime, timedelta
-from typing import List, Optional, Tuple
+from typing import TYPE_CHECKING, List, Optional, Tuple
 
 import magnum as mn
 from world import World
@@ -25,6 +25,11 @@
 from habitat_hitl.environment.controllers.controller_abc import GuiController
 from habitat_hitl.environment.hablab_utils import get_agent_art_obj_transform
 
+if TYPE_CHECKING:
+    from habitat.tasks.rearrange.rearrange_grasp_manager import (
+        RearrangeGraspManager,
+    )
+
 # Verticality threshold for successful placement.
 MINIMUM_DROP_VERTICALITY: float = 0.9
 
@@ -123,6 +128,9 @@ def place_selection_fn(gui_input: GuiInput) -> bool:
         self._on_open = Event()
         self._on_close = Event()
 
+        # Disable the snap manager automatic object positioning so that object placement is controlled here.
+        self._get_grasp_manager()._automatically_update_snapped_object = False
+
     @dataclass
     class PickEventData:
         object_id: int
@@ -218,6 +226,11 @@ def draw_ui(self) -> None:
         self._draw_hovered_pickable()
         self._draw_goals()
 
+    def _get_grasp_manager(self) -> "RearrangeGraspManager":
+        agent_mgr = self._sim.agents_mgr
+        agent_data = agent_mgr._all_agent_data[self._gui_controller._agent_idx]
+        return agent_data.grasp_mgr
+
     def _pick_object(self, object_id: int) -> None:
         """Pick the specified object_id. The object must be pickable and held by nobody else."""
         if (
@@ -233,6 +246,14 @@ def _pick_object(self, object_id: int) -> None:
                     self._held_object_id = object_id
                     self._place_selection.deselect()
                     self._world._all_held_object_ids.add(object_id)
+
+                    # Set the snapped object without adding a constraint.
+                    grasp_mgr = self._get_grasp_manager()
+                    grasp_mgr.snap_to_obj(
+                        snap_obj_id=object_id,
+                        force=True,
+                    )
+
                     self._on_pick.invoke(
                         UI.PickEventData(
                             object_id=object_id,
@@ -281,6 +302,10 @@ def _place_object(self) -> None:
             self._held_object_id = None
             self._place_selection.deselect()
             self._world._all_held_object_ids.remove(object_id)
+
+            grasp_mgr = self._get_grasp_manager()
+            grasp_mgr.desnap(force=True)
+
             self._on_place.invoke(
                 UI.PlaceEventData(
                     object_id=object_id,
diff --git a/habitat-lab/habitat/tasks/rearrange/rearrange_grasp_manager.py b/habitat-lab/habitat/tasks/rearrange/rearrange_grasp_manager.py
index aee90a34ef..fe3b0155a2 100644
--- a/habitat-lab/habitat/tasks/rearrange/rearrange_grasp_manager.py
+++ b/habitat-lab/habitat/tasks/rearrange/rearrange_grasp_manager.py
@@ -55,6 +55,7 @@ def __init__(
         self._config = config
         self._managed_articulated_agent = articulated_agent
         self.ee_index = ee_index
+        self._automatically_update_snapped_object = True
 
         self._kinematic_mode = self._sim.habitat_config.kinematic_mode
 
@@ -213,15 +214,16 @@ def snap_to_marker(self, marker_name: str) -> None:
         if self._kinematic_mode:
             return
 
-        self._snap_constraints = [
-            self.create_hold_constraint(
-                RigidConstraintType.PointToPoint,
-                mn.Vector3(0.0, 0.0, 0.0),
-                mn.Vector3(*marker.offset_position),
-                marker.ao_parent.object_id,
-                marker.link_id,
-            ),
-        ]
+        if self._automatically_update_snapped_object:
+            self._snap_constraints = [
+                self.create_hold_constraint(
+                    RigidConstraintType.PointToPoint,
+                    mn.Vector3(0.0, 0.0, 0.0),
+                    mn.Vector3(*marker.offset_position),
+                    marker.ao_parent.object_id,
+                    marker.link_id,
+                ),
+            ]
 
     def create_hold_constraint(
         self,
@@ -299,7 +301,10 @@ def update_object_to_grasp(self) -> None:
         is grasped then nothing will happen.
         """
 
-        if self._snapped_obj_id is None:
+        if (
+            self._snapped_obj_id is None
+            or not self._automatically_update_snapped_object
+        ):
             # Not grasping anything, so do nothing.
             return
 
@@ -367,16 +372,17 @@ def snap_to_obj(
         if rel_pos is None:
             rel_pos = mn.Vector3.zero_init()
 
-        self._snap_constraints = [
-            self.create_hold_constraint(
-                RigidConstraintType.Fixed,
-                # link pivot is the object in link space
-                pivot_in_link=rel_pos,
-                # object pivot is local origin
-                pivot_in_obj=mn.Vector3.zero_init(),
-                obj_id_b=self._snapped_obj_id,
-            ),
-        ]
+        if self._automatically_update_snapped_object:
+            self._snap_constraints = [
+                self.create_hold_constraint(
+                    RigidConstraintType.Fixed,
+                    # link pivot is the object in link space
+                    pivot_in_link=rel_pos,
+                    # object pivot is local origin
+                    pivot_in_obj=mn.Vector3.zero_init(),
+                    obj_id_b=self._snapped_obj_id,
+                ),
+            ]
 
         if should_open_gripper:
             self._managed_articulated_agent.open_gripper()

From 3ac6df1fd5831ecc7dc5e81996932e75c092ca86 Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Tue, 4 Jun 2024 12:31:11 -0400
Subject: [PATCH 58/88] Enable humanoid model in single-learn so that the human
 appears in the PiP viewport.

---
 .../rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml b/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
index aa0bea16b6..d555dd0b61 100644
--- a/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
+++ b/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
@@ -86,7 +86,7 @@ habitat_hitl:
     - agent_index: 0
       lin_speed: 10.0
       ang_speed: 300
-  hide_humanoid_in_gui: True
+  hide_humanoid_in_gui: False
   camera:
     first_person_mode: True
   networking:

From ef49e09e2ee189216d598b6c837937a75fad3525 Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Tue, 4 Jun 2024 11:11:10 -0700
Subject: [PATCH 59/88] forward all the actions

---
 .../environment/controllers/llm_controller.py    | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
index e5cb3a8517..ebcdc008f0 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
@@ -120,7 +120,6 @@ def on_environment_reset(self):
         self.current_instruction = (
             self.environment_interface.hab_env.current_episode.instruction
         )
-        print(f"Instruction: {self.current_instruction}")
         self._iter = 0
 
     def _on_pick(self, _e: Any = None):
@@ -201,6 +200,21 @@ def act(self, observations, *args, **kwargs):
                 self.environment_interface.agent_state_history[1].append(
                     f"Agent placed {object_name} in {action['receptacle_id']}"
                 )
+            elif action["action"] == "OPEN":
+                object_name = self.environment_interface.world_graph.get_node_from_sim_handle(
+                    action["object_handle"]
+                ).name
+                self.environment_interface.agent_state_history[1].append(
+                    f"Agent opened {object_name}"
+                )
+            elif action["action"] == "CLOSE":
+                object_name = self.environment_interface.world_graph.get_node_from_sim_handle(
+                    action["object_handle"]
+                ).name
+                self.environment_interface.agent_state_history[1].append(
+                    f"Agent closed {object_name}"
+                )
+
         if self._iter < self._skip_iters or self._task_done:
             self._iter += 1
             return np.zeros(self._agent_action_length)

From 38e9da228a4fa2e2188449aebb95d9abc6618d40 Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Tue, 4 Jun 2024 15:18:27 -0700
Subject: [PATCH 60/88] removing unnecessary measure

---
 .../config/language_rearrange_multi_agent_llm_gui.yaml          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml b/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
index 2c07ea7cd6..2cfc1b4cd2 100644
--- a/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
+++ b/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
@@ -53,7 +53,7 @@ habitat:
     # slack_reward: -0.0005
     # end_on_success: True
     # constraint_violation_ends_episode: False
-    # constraint_violation_drops_object: True
+    constraint_violation_drops_object: False
     # task_spec_base_path: benchmark/multi_agent/
     # task_spec: pddl/multi_agent_tidy_house
     # pddl_domain_def: fp

From e487e9f968368ac3c02fbe6b1d40eb9cffda67d3 Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Tue, 4 Jun 2024 15:29:09 -0700
Subject: [PATCH 61/88] robustifying a bit

---
 .../environment/controllers/llm_controller.py | 32 +++++++++++--------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
index ebcdc008f0..2ea6a54800 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
@@ -50,6 +50,7 @@ def __init__(
         self._task_done = False
         self._iter = 0
         self._skip_iters = 0
+        self._log: list = []
         if log_to_file:
             import datetime
 
@@ -121,6 +122,7 @@ def on_environment_reset(self):
             self.environment_interface.hab_env.current_episode.instruction
         )
         self._iter = 0
+        self._log = []
 
     def _on_pick(self, _e: Any = None):
         action = {
@@ -186,31 +188,35 @@ def act(self, observations, *args, **kwargs):
         # update agent state history
         while self._human_action_history:
             action = self._human_action_history.pop(0)
-            if action["action"] == "PICK":
+            object_name = None
+            try:
                 object_name = self.environment_interface.world_graph.get_node_from_sim_handle(
                     action["object_handle"]
                 ).name
+            except Exception as e:
+                self._log.append(e)
+                continue
+            if action["action"] == "PICK":
                 self.environment_interface.agent_state_history[1].append(
                     f"Agent picked up {object_name}"
                 )
             elif action["action"] == "PLACE":
-                object_name = self.environment_interface.world_graph.get_node_from_sim_handle(
-                    action["object_handle"]
-                ).name
-                self.environment_interface.agent_state_history[1].append(
-                    f"Agent placed {object_name} in {action['receptacle_id']}"
-                )
+                if action["receptacle_id"] is not None:
+                    receptacle_name = self.environment_interface.world_graph.get_node_from_sim_handle(
+                        action["receptacle_id"]
+                    ).name
+                    self.environment_interface.agent_state_history[1].append(
+                        f"Agent placed {object_name} in {receptacle_name}"
+                    )
+                else:
+                    self.environment_interface.agent_state_history[1].append(
+                        f"Agent placed {object_name} in {action['receptacle_id']}"
+                    )
             elif action["action"] == "OPEN":
-                object_name = self.environment_interface.world_graph.get_node_from_sim_handle(
-                    action["object_handle"]
-                ).name
                 self.environment_interface.agent_state_history[1].append(
                     f"Agent opened {object_name}"
                 )
             elif action["action"] == "CLOSE":
-                object_name = self.environment_interface.world_graph.get_node_from_sim_handle(
-                    action["object_handle"]
-                ).name
                 self.environment_interface.agent_state_history[1].append(
                     f"Agent closed {object_name}"
                 )

From 95522f30167d4c48499d1390289ef26570600701 Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Wed, 5 Jun 2024 13:23:52 -0700
Subject: [PATCH 62/88] receptacle naming

---
 .../environment/controllers/llm_controller.py      | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
index 2ea6a54800..061eb56eab 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
@@ -22,6 +22,7 @@
 import habitat
 import habitat.config
 from habitat.core.environments import GymHabitatEnv
+from habitat.sims.habitat_simulator.sim_utilities import get_obj_from_id
 from habitat_hitl.environment.controllers.baselines_controller import (
     SingleAgentBaselinesController,
 )
@@ -203,14 +204,20 @@ def act(self, observations, *args, **kwargs):
             elif action["action"] == "PLACE":
                 if action["receptacle_id"] is not None:
                     receptacle_name = self.environment_interface.world_graph.get_node_from_sim_handle(
-                        action["receptacle_id"]
+                        get_obj_from_id(
+                            self.environment_interface.sim,
+                            action["receptacle_id"],
+                        ).handle
                     ).name
                     self.environment_interface.agent_state_history[1].append(
-                        f"Agent placed {object_name} in {receptacle_name}"
+                        f"Agent placed {object_name} in/on {receptacle_name}"
+                    )
+                    print(
+                        f"Agent placed {object_name} in/on {receptacle_name}"
                     )
                 else:
                     self.environment_interface.agent_state_history[1].append(
-                        f"Agent placed {object_name} in {action['receptacle_id']}"
+                        f"Agent placed {object_name} in unknown location"
                     )
             elif action["action"] == "OPEN":
                 self.environment_interface.agent_state_history[1].append(
@@ -236,4 +243,5 @@ def act(self, observations, *args, **kwargs):
             )
             self._thread.start()
 
+        self._iter += 1
         return low_level_actions

From 7fa8f174f8cc3131b871f5beec0a2b977fdeb60d Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Wed, 5 Jun 2024 13:24:13 -0700
Subject: [PATCH 63/88] turn off magic desnapping

---
 .../config/language_rearrange_multi_agent_llm_gui.yaml          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml b/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
index 2cfc1b4cd2..34d43c0e41 100644
--- a/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
+++ b/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
@@ -74,7 +74,7 @@ habitat:
 # --- habitat-llm block
       # - agent_0_third_rgb
       # - agent_0_articulated_agent_arm_depth
-      # - agent_0_articulated_agent_arm_rgb
+      - agent_0_articulated_agent_arm_rgb
       - agent_0_articulated_agent_arm_panoptic
       # - agent_0_head_depth
       # - agent_0_head_rgb

From 93ed72d8c144cbcbeab73200cd70d538dfc24a9f Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Wed, 5 Jun 2024 17:19:48 -0700
Subject: [PATCH 64/88] termination condition for single-learn case

---
 examples/hitl/rearrange_v2/rearrange_v2.py | 28 +++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/examples/hitl/rearrange_v2/rearrange_v2.py b/examples/hitl/rearrange_v2/rearrange_v2.py
index abad6bad80..439248dd88 100644
--- a/examples/hitl/rearrange_v2/rearrange_v2.py
+++ b/examples/hitl/rearrange_v2/rearrange_v2.py
@@ -40,7 +40,10 @@
     GuiHumanoidController,
     GuiRobotController,
 )
-from habitat_hitl.environment.controllers.llm_controller import LLMController
+from habitat_hitl.environment.controllers.llm_controller import (
+    LLMController,
+    PlannerStatus,
+)
 from habitat_hitl.environment.hablab_utils import get_agent_art_obj_transform
 from habitat_sim.utils.common import quat_from_magnum, quat_to_coeffs
 
@@ -156,6 +159,12 @@ def __init__(
 
         self.episode_completion_status = EpisodeCompletionStatus.PENDING
 
+    def _on_termination_cb(self, _e: Any = None):
+        if _e.status == PlannerStatus.SUCCESS:
+            self.episode_completion_status = EpisodeCompletionStatus.SUCCESS
+        else:
+            self.episode_completion_status = EpisodeCompletionStatus.FAILURE
+
     def update_camera_from_sensor(self) -> None:
         """
         Update the camera transform from the agent's sensor.
@@ -498,6 +507,10 @@ def __init__(
                     render_camera=render_camera,
                 )
             )
+            if isinstance(agent_controller, LLMController):
+                agent_controller._on_termination.registerCallback(
+                    self._agent_data[agent_index]._on_termination_cb
+                )
 
         self._user_data: List[UserData] = []
         for user_index in self._users.indices(Mask.ALL):
@@ -645,6 +658,7 @@ def _get_status_text(self, user_index: int):
         if len(task_instruction) > 0:
             status_str += "Instruction: " + task_instruction + "\n"
 
+        # the multi-agent case
         if (
             self._users.max_user_count > 1
             and self._user_data[
@@ -657,6 +671,18 @@ def _get_status_text(self, user_index: int):
             elif self._has_any_agent_finished_failure():
                 status_str += "\n\nThe other participant signaled a problem with the task.\nPress '0' to continue."
 
+        # the single-learn agent case
+        if (
+            (len(self._user_data) == 1)
+            and len(self._agent_data) == 2
+            and any(
+                self._agent_data[agent_index].episode_completion_status
+                != EpisodeCompletionStatus.PENDING
+                for agent_index in range(self._num_agents)
+            )
+        ):
+            status_str += "\n\nThe other participant finished working on their part of the task.\nPress '0' when you are done."
+
         client_helper = self._app_service.remote_client_state._client_helper
         if client_helper.do_show_idle_kick_warning(user_index):
             remaining_time = str(

From 2191f56f1633de5bc5e1ea4c916c3544829a8774 Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Wed, 5 Jun 2024 17:21:46 -0700
Subject: [PATCH 65/88] adding termination event; bypassing planning when done

---
 .../environment/controllers/llm_controller.py | 176 ++++++++++++------
 1 file changed, 119 insertions(+), 57 deletions(-)

diff --git a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
index 061eb56eab..dd83f4395e 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
@@ -9,8 +9,11 @@
 
 import logging
 import threading
+from dataclasses import dataclass
+from enum import Enum
 from typing import Any, Dict, List, Union
 
+import cv2
 import numpy as np
 from habitat_llm.agent import Agent
 from habitat_llm.agent.env import EnvironmentInterface
@@ -23,11 +26,22 @@
 import habitat.config
 from habitat.core.environments import GymHabitatEnv
 from habitat.sims.habitat_simulator.sim_utilities import get_obj_from_id
+from habitat_hitl.core.event import Event
 from habitat_hitl.environment.controllers.baselines_controller import (
     SingleAgentBaselinesController,
 )
 
 
+class PlannerStatus(Enum):
+    SUCCESS = 0
+    FAILED = 1
+
+
+@dataclass
+class AgentTerminationEvent:
+    status: PlannerStatus
+
+
 class LLMController(SingleAgentBaselinesController):
     """Controller for single LLM controlled agent."""
 
@@ -81,6 +95,11 @@ def __init__(
         self.initialize_planner()
         self.info: Dict[str, Any] = {}
         self._human_action_history: List[Any] = []
+        self._planner_info: dict = {}
+
+        # interfacing with HitL
+        self._on_termination = Event()
+        self._termination_reported = False
 
     def initialize_planner(self):
         # NOTE: using instantiate here, but given this is planning for a single agent
@@ -116,14 +135,19 @@ def on_environment_reset(self):
         self.planner.reset()
         if self._thread is not None:
             self._thread.join()
-            self._thread = None  # noqa: F841
-            self._low_level_actions = {}
+        self._thread = None
 
         self.current_instruction = (
             self.environment_interface.hab_env.current_episode.instruction
         )
         self._iter = 0
         self._log = []
+        self._termination_reported = False
+        self._low_level_actions = {}
+        self._task_done = False
+        self._iter = 0
+        self._skip_iters = 0
+        self._log = []
 
     def _on_pick(self, _e: Any = None):
         action = {
@@ -170,7 +194,7 @@ def _act(self, observations, *args, **kwargs):
         # and it returns the actions for the agent
         (
             self._low_level_actions,
-            planner_info,
+            self._planner_info,
             self._task_done,
         ) = self.planner.get_next_action(
             self.current_instruction,
@@ -180,68 +204,106 @@ def _act(self, observations, *args, **kwargs):
         )
         return
 
-    def act(self, observations, *args, **kwargs):
-        # NOTE: update the world state to reflect the new observations
-        # TODO: might need a lock on world-state here?
-        self.environment_interface.update_world_state(
-            observations, disable_logging=True
-        )
-        # update agent state history
-        while self._human_action_history:
-            action = self._human_action_history.pop(0)
-            object_name = None
-            try:
-                object_name = self.environment_interface.world_graph.get_node_from_sim_handle(
-                    action["object_handle"]
-                ).name
-            except Exception as e:
-                self._log.append(e)
-                continue
-            if action["action"] == "PICK":
-                self.environment_interface.agent_state_history[1].append(
-                    f"Agent picked up {object_name}"
+    def act(self, observations, debug_obs: bool = False, *args, **kwargs):
+        # set the task as done and report it back
+        if self._task_done and not self._termination_reported:
+            if (
+                self._planner_info["replanning_count"]
+                >= self._planner_info["replanning_threshold"]
+            ):
+                self._on_termination.invoke(
+                    AgentTerminationEvent(PlannerStatus.FAILED)
+                )
+            else:
+                self._on_termination.invoke(
+                    AgentTerminationEvent(PlannerStatus.SUCCESS)
                 )
-            elif action["action"] == "PLACE":
-                if action["receptacle_id"] is not None:
-                    receptacle_name = self.environment_interface.world_graph.get_node_from_sim_handle(
-                        get_obj_from_id(
-                            self.environment_interface.sim,
-                            action["receptacle_id"],
-                        ).handle
+            self._termination_reported = True
+
+        low_level_actions = np.zeros(self._agent_action_length)
+
+        if debug_obs:
+            rgb = observations["agent_1_head_rgb"]
+            panoptic = observations["agent_1_head_panoptic"]
+            cv2.imwrite(
+                f"./visuals/agent_1/rgb_{self._iter}.png",
+                cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR),
+            )
+            cv2.imwrite(
+                f"./visuals/agent_1/panoptic_{self._iter}.png", panoptic
+            )
+            rgb = observations["agent_0_articulated_agent_arm_rgb"]
+            panoptic = observations["agent_0_articulated_agent_arm_panoptic"]
+            cv2.imwrite(
+                f"./visuals/agent_0/rgb_{self._iter}.png",
+                cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR),
+            )
+            cv2.imwrite(
+                f"./visuals/agent_0/panoptic_{self._iter}.png", panoptic
+            )
+
+        # planning logic when task is not done
+        if not self._task_done:
+            # NOTE: update the world state to reflect the new observations
+            # TODO: might need a lock on world-state here?
+            self.environment_interface.update_world_state(
+                observations, disable_logging=True
+            )
+            # update agent state history
+            while self._human_action_history:
+                action = self._human_action_history.pop(0)
+                object_name = None
+                try:
+                    object_name = self.environment_interface.world_graph.get_node_from_sim_handle(
+                        action["object_handle"]
                     ).name
+                except Exception as e:
+                    self._log.append(e)
+                    continue
+                if action["action"] == "PICK":
                     self.environment_interface.agent_state_history[1].append(
-                        f"Agent placed {object_name} in/on {receptacle_name}"
+                        f"Agent picked up {object_name}"
                     )
-                    print(
-                        f"Agent placed {object_name} in/on {receptacle_name}"
+                elif action["action"] == "PLACE":
+                    if action["receptacle_id"] is not None:
+                        receptacle_name = self.environment_interface.world_graph.get_node_from_sim_handle(
+                            get_obj_from_id(
+                                self.environment_interface.sim,
+                                action["receptacle_id"],
+                            ).handle
+                        ).name
+                        self.environment_interface.agent_state_history[
+                            1
+                        ].append(
+                            f"Agent placed {object_name} in/on {receptacle_name}"
+                        )
+                        # print(
+                        #     f"Agent placed {object_name} in/on {receptacle_name}"
+                        # )
+                    else:
+                        self.environment_interface.agent_state_history[
+                            1
+                        ].append(
+                            f"Agent placed {object_name} in unknown location"
+                        )
+                elif action["action"] == "OPEN":
+                    self.environment_interface.agent_state_history[1].append(
+                        f"Agent opened {object_name}"
                     )
-                else:
+                elif action["action"] == "CLOSE":
                     self.environment_interface.agent_state_history[1].append(
-                        f"Agent placed {object_name} in unknown location"
+                        f"Agent closed {object_name}"
                     )
-            elif action["action"] == "OPEN":
-                self.environment_interface.agent_state_history[1].append(
-                    f"Agent opened {object_name}"
-                )
-            elif action["action"] == "CLOSE":
-                self.environment_interface.agent_state_history[1].append(
-                    f"Agent closed {object_name}"
-                )
-
-        if self._iter < self._skip_iters or self._task_done:
-            self._iter += 1
-            return np.zeros(self._agent_action_length)
-        low_level_actions = np.zeros(self._agent_action_length)
 
-        if self._thread is None or not self._thread.is_alive():
-            if self._low_level_actions != {}:
-                low_level_actions = self._low_level_actions[
-                    str(self._agent_idx)
-                ][:-248]
-            self._thread = self._thread = threading.Thread(
-                target=self._act, args=(observations,), kwargs=kwargs
-            )
-            self._thread.start()
+            if self._thread is None or not self._thread.is_alive():
+                if self._low_level_actions != {}:
+                    low_level_actions = self._low_level_actions[
+                        str(self._agent_idx)
+                    ][:-248]
+                self._thread = self._thread = threading.Thread(
+                    target=self._act, args=(observations,), kwargs=kwargs
+                )
+                self._thread.start()
 
         self._iter += 1
         return low_level_actions

From 3031983b1938d69f00e83a151870d9f699a42c39 Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Wed, 5 Jun 2024 17:25:41 -0700
Subject: [PATCH 66/88] clean-up

---
 .../environment/controllers/llm_controller.py | 96 ++++++++++---------
 1 file changed, 49 insertions(+), 47 deletions(-)

diff --git a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
index dd83f4395e..cd0fe44e94 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
@@ -189,6 +189,49 @@ def _on_close(self, _e: Any = None):
 
         self._human_action_history.append(action)
 
+    def push_user_actions_to_llm(self):
+        # update agent state history
+        while self._human_action_history:
+            action = self._human_action_history.pop(0)
+            object_name = None
+            try:
+                object_name = self.environment_interface.world_graph.get_node_from_sim_handle(
+                    action["object_handle"]
+                ).name
+            except Exception as e:
+                self._log.append(e)
+                continue
+            if action["action"] == "PICK":
+                self.environment_interface.agent_state_history[1].append(
+                    f"Agent picked up {object_name}"
+                )
+            elif action["action"] == "PLACE":
+                if action["receptacle_id"] is not None:
+                    receptacle_name = self.environment_interface.world_graph.get_node_from_sim_handle(
+                        get_obj_from_id(
+                            self.environment_interface.sim,
+                            action["receptacle_id"],
+                        ).handle
+                    ).name
+                    self.environment_interface.agent_state_history[1].append(
+                        f"Agent placed {object_name} in/on {receptacle_name}"
+                    )
+                    # print(
+                    #     f"Agent placed {object_name} in/on {receptacle_name}"
+                    # )
+                else:
+                    self.environment_interface.agent_state_history[1].append(
+                        f"Agent placed {object_name} in unknown location"
+                    )
+            elif action["action"] == "OPEN":
+                self.environment_interface.agent_state_history[1].append(
+                    f"Agent opened {object_name}"
+                )
+            elif action["action"] == "CLOSE":
+                self.environment_interface.agent_state_history[1].append(
+                    f"Agent closed {object_name}"
+                )
+
     def _act(self, observations, *args, **kwargs):
         # NOTE: this is where the LLM magic happens, the agent is given the observations
         # and it returns the actions for the agent
@@ -244,62 +287,21 @@ def act(self, observations, debug_obs: bool = False, *args, **kwargs):
 
         # planning logic when task is not done
         if not self._task_done:
-            # NOTE: update the world state to reflect the new observations
+            # update world-graph and action history
             # TODO: might need a lock on world-state here?
             self.environment_interface.update_world_state(
                 observations, disable_logging=True
             )
-            # update agent state history
-            while self._human_action_history:
-                action = self._human_action_history.pop(0)
-                object_name = None
-                try:
-                    object_name = self.environment_interface.world_graph.get_node_from_sim_handle(
-                        action["object_handle"]
-                    ).name
-                except Exception as e:
-                    self._log.append(e)
-                    continue
-                if action["action"] == "PICK":
-                    self.environment_interface.agent_state_history[1].append(
-                        f"Agent picked up {object_name}"
-                    )
-                elif action["action"] == "PLACE":
-                    if action["receptacle_id"] is not None:
-                        receptacle_name = self.environment_interface.world_graph.get_node_from_sim_handle(
-                            get_obj_from_id(
-                                self.environment_interface.sim,
-                                action["receptacle_id"],
-                            ).handle
-                        ).name
-                        self.environment_interface.agent_state_history[
-                            1
-                        ].append(
-                            f"Agent placed {object_name} in/on {receptacle_name}"
-                        )
-                        # print(
-                        #     f"Agent placed {object_name} in/on {receptacle_name}"
-                        # )
-                    else:
-                        self.environment_interface.agent_state_history[
-                            1
-                        ].append(
-                            f"Agent placed {object_name} in unknown location"
-                        )
-                elif action["action"] == "OPEN":
-                    self.environment_interface.agent_state_history[1].append(
-                        f"Agent opened {object_name}"
-                    )
-                elif action["action"] == "CLOSE":
-                    self.environment_interface.agent_state_history[1].append(
-                        f"Agent closed {object_name}"
-                    )
+            self.push_user_actions_to_llm()
 
+            # read thread result and create thread if previous thread is done
             if self._thread is None or not self._thread.is_alive():
                 if self._low_level_actions != {}:
                     low_level_actions = self._low_level_actions[
                         str(self._agent_idx)
-                    ][:-248]
+                    ][
+                        :-248
+                    ]  # TODO: bad; fix this by reading action-space from config
                 self._thread = self._thread = threading.Thread(
                     target=self._act, args=(observations,), kwargs=kwargs
                 )

From 74829ebb3e578b89380a6c9e17c2ab1b824b2790 Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Thu, 6 Jun 2024 21:16:04 -0400
Subject: [PATCH 67/88] Disable new connections by default.

---
 .../hitl/rearrange_v2/config/experiment/headless_server.yaml  | 1 +
 examples/hitl/rearrange_v2/config/language_rearrange.yaml     | 4 +---
 examples/hitl/rearrange_v2/config/rearrange_v2.yaml           | 3 ++-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/hitl/rearrange_v2/config/experiment/headless_server.yaml b/examples/hitl/rearrange_v2/config/experiment/headless_server.yaml
index 85e8299a3f..ca0e6a8094 100644
--- a/examples/hitl/rearrange_v2/config/experiment/headless_server.yaml
+++ b/examples/hitl/rearrange_v2/config/experiment/headless_server.yaml
@@ -7,6 +7,7 @@ habitat_hitl:
     http_availability_server:
       enable: True
     client_max_idle_duration: 180.0
+    enable_connections_by_default: False
   experimental:
     headless:
       do_headless: True
diff --git a/examples/hitl/rearrange_v2/config/language_rearrange.yaml b/examples/hitl/rearrange_v2/config/language_rearrange.yaml
index 172f74ae49..83d5cc5e84 100644
--- a/examples/hitl/rearrange_v2/config/language_rearrange.yaml
+++ b/examples/hitl/rearrange_v2/config/language_rearrange.yaml
@@ -87,11 +87,9 @@ habitat_hitl:
       server_camera: False
       server_input: False
     client_max_idle_duration: 180.0
+    enable_connections_by_default: False
 
 rearrange_v2:
   data_collection:
     s3_path: "Placeholder/"
     output_file_name: "session"
-  head_sensor_substrings:
-    - "head"
-    - "jaw"
diff --git a/examples/hitl/rearrange_v2/config/rearrange_v2.yaml b/examples/hitl/rearrange_v2/config/rearrange_v2.yaml
index 1c635adc2c..f12dca3e1a 100644
--- a/examples/hitl/rearrange_v2/config/rearrange_v2.yaml
+++ b/examples/hitl/rearrange_v2/config/rearrange_v2.yaml
@@ -43,6 +43,7 @@ habitat_hitl:
     - agent_index: 1
       lin_speed: 10.0
       ang_speed: 300
-  hide_humanoid_in_gui: True
   camera:
     first_person_mode: True
+  networking:
+    enable_connections_by_default: False

From 7f3ccf129faa2efc2d08df9db651f0281ee0f28b Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Thu, 6 Jun 2024 21:16:52 -0400
Subject: [PATCH 68/88] Move agent head sensors to match GUI camera.

---
 .../config/lang_rearrange_spot_humanoid.yaml  |  3 -
 ...anguage_rearrange_multi_agent_llm_gui.yaml |  4 +-
 examples/hitl/rearrange_v2/rearrange_v2.py    | 90 ++++++++++++-------
 3 files changed, 61 insertions(+), 36 deletions(-)

diff --git a/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml b/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml
index 9ec894d240..3261e5a7c2 100644
--- a/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml
+++ b/examples/hitl/rearrange_v2/config/lang_rearrange_spot_humanoid.yaml
@@ -52,6 +52,3 @@ rearrange_v2:
   data_collection:
     s3_path: "Placeholder/"
     output_file_name: "session"
-  head_sensor_substrings:
-    - "head"
-    - "jaw"
diff --git a/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml b/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
index 34d43c0e41..72620469e5 100644
--- a/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
+++ b/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
@@ -157,5 +157,5 @@ rearrange_v2:
     s3_path: "Placeholder/"
     output_file_name: "session"
   head_sensor_substrings:
-    - "head"
-    - "jaw"
+    "agent_0": "jaw"
+    "agent_1": "head"
diff --git a/examples/hitl/rearrange_v2/rearrange_v2.py b/examples/hitl/rearrange_v2/rearrange_v2.py
index 439248dd88..91bc9ba6aa 100644
--- a/examples/hitl/rearrange_v2/rearrange_v2.py
+++ b/examples/hitl/rearrange_v2/rearrange_v2.py
@@ -145,7 +145,7 @@ def __init__(
         world: World,
         agent_controller: Controller,
         agent_index: int,
-        render_camera: Optional[Any],
+        render_cameras: List[Any],
     ):
         self.app_service = app_service
         self.world = world
@@ -154,7 +154,7 @@ def __init__(
 
         self.task_instruction = ""
 
-        self.render_camera = render_camera
+        self.render_cameras = render_cameras
         self.cam_transform = mn.Matrix4.identity_init()
 
         self.episode_completion_status = EpisodeCompletionStatus.PENDING
@@ -168,11 +168,35 @@ def _on_termination_cb(self, _e: Any = None):
     def update_camera_from_sensor(self) -> None:
         """
         Update the camera transform from the agent's sensor.
-        Agents controlled by users have their camera updated using CameraHelper.
         For AI-controlled agents, the camera transform can be inferred from this function.
         """
-        if self.render_camera is not None:
-            self.cam_transform = self.render_camera.camera_matrix.inverted()
+        if len(self.render_cameras) > 0:
+            self.cam_transform = self.render_cameras[
+                0
+            ].camera_matrix.inverted()
+
+    def update_camera_transform(
+        self, global_cam_transform: mn.Matrix4
+    ) -> None:
+        """
+        Updates the camera transform of the agent.
+        If the agent has 'head sensors', this will also update their transform.
+        """
+        self.cam_transform = global_cam_transform
+
+        for render_camera in self.render_cameras:
+            # TODO: There is currently no utility to set a global transform.
+            cumulative_transform = mn.Matrix4.identity_init()
+            node = render_camera.node.parent
+            while node is not None and hasattr(node, "transformation"):
+                cumulative_transform @= node.transformation
+                node = node.parent
+            inv_cumulative_transform = cumulative_transform.inverted()
+
+            if render_camera is not None:
+                render_camera.node.transformation = (
+                    inv_cumulative_transform @ global_cam_transform
+                )
 
 
 class UserData:
@@ -254,7 +278,7 @@ def __init__(
         gui_agent_controller._gui_input = self.gui_input
 
     def reset(self):
-        self.camera_helper.update(self._get_camera_lookat_pos(), dt=0)
+        self._update_camera()
         self.ui.reset()
 
         # If networking is enabled...
@@ -295,14 +319,7 @@ def update(self, dt: float):
                 self.server_sps_tracker.get_smoothed_rate(),
             )
 
-        self.camera_helper.update(self._get_camera_lookat_pos(), dt)
-        self.agent_data.cam_transform = self.camera_helper.get_cam_transform()
-
-        if self.app_service.hitl_config.networking.enable:
-            self.app_service._client_message_manager.update_camera_transform(
-                self.agent_data.cam_transform,
-                destination_mask=Mask.from_index(self.user_index),
-            )
+        self._update_camera()
 
         self.ui.update()
         self.ui.draw_ui()
@@ -360,6 +377,17 @@ def _get_camera_lookat_pos(self) -> mn.Vector3:
         lookat = agent_root.translation + lookat_y_offset
         return lookat
 
+    def _update_camera(self) -> None:
+        self.camera_helper.update(self._get_camera_lookat_pos(), dt=0)
+        cam_transform = self.camera_helper.get_cam_transform()
+        self.agent_data.update_camera_transform(cam_transform)
+
+        if self.app_service.hitl_config.networking.enable:
+            self.app_service._client_message_manager.update_camera_transform(
+                cam_transform,
+                destination_mask=Mask.from_index(self.user_index),
+            )
+
     def _is_user_idle_this_frame(self) -> bool:
         return not self.gui_input.get_any_input()
 
@@ -474,19 +502,20 @@ def __init__(
         # HACK: The simulator has only 1 agent with all sensors. See 'create_sim_config() in habitat_simulator.py'.
         sim_agent = sim.agents[0]
         config = self._app_service.config
-        head_sensor_substrings: List[
-            str
+        head_sensor_substrings: Dict[
+            str, str
         ] = config.rearrange_v2.head_sensor_substrings
         for agent_index in range(self._num_agents):
-            render_camera: Optional[Any] = None
-            for substring in head_sensor_substrings:
-                # TODO: Validate that the lab agent owns the sensor.
-                for sensor_name, sensor in sim_agent._sensors.items():
-                    if substring in sensor_name and hasattr(
-                        sensor, "render_camera"
-                    ):
-                        render_camera = sensor.render_camera
-                        break
+            render_cameras: List[Any] = []
+            agent_id = config.habitat.simulator.agents_order[agent_index]
+            substring = head_sensor_substrings[agent_id]
+            for sensor_name, sensor in sim_agent._sensors.items():
+                if (
+                    substring in sensor_name
+                    and agent_id in sensor_name
+                    and hasattr(sensor, "render_camera")
+                ):
+                    render_cameras.append(sensor.render_camera)
 
             agent_controller = app_service.all_agent_controllers[agent_index]
 
@@ -504,7 +533,7 @@ def __init__(
                     world=self._world,
                     agent_controller=agent_controller,
                     agent_index=agent_index,
-                    render_camera=render_camera,
+                    render_cameras=render_cameras,
                 )
             )
             if isinstance(agent_controller, LLMController):
@@ -764,15 +793,14 @@ def sim_update(self, dt: float, post_sim_update_dict):
                 user_agent_idx = self._user_to_agent_index[user_index]
                 other_agent_idx = user_agent_idx ^ 1
                 other_agent_data = self._agent_data[other_agent_idx]
-
-                # If the other agent is AI-controlled, update its camera.
-                if other_agent_idx not in self._agent_to_user_index:
-                    other_agent_data.update_camera_from_sensor()
-
                 self._user_data[user_index].draw_pip_viewport(other_agent_data)
 
         self._app_service.compute_action_and_step_env()
 
+        # Update agent cameras.
+        for agent_index in range(self._num_agents):
+            self._agent_data[agent_index].update_camera_from_sensor()
+
         # Set the server camera.
         server_cam_transform = self._user_data[
             self._server_user_index

From 6e0cdfb7486ab2ef7c0a29abbbda59c1705bd8aa Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Thu, 6 Jun 2024 21:21:32 -0400
Subject: [PATCH 69/88] Disable RGB sensors.

---
 .../config/language_rearrange_multi_agent_llm_gui.yaml        | 4 ++--
 .../habitat_hitl/environment/controllers/llm_controller.py    | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml b/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
index 72620469e5..abae231f87 100644
--- a/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
+++ b/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
@@ -74,7 +74,7 @@ habitat:
 # --- habitat-llm block
       # - agent_0_third_rgb
       # - agent_0_articulated_agent_arm_depth
-      - agent_0_articulated_agent_arm_rgb
+      # - agent_0_articulated_agent_arm_rgb
       - agent_0_articulated_agent_arm_panoptic
       # - agent_0_head_depth
       # - agent_0_head_rgb
@@ -98,7 +98,7 @@ habitat:
       # - agent_1_articulated_agent_arm_rgb
       # - agent_1_articulated_agent_arm_panoptic
       # - agent_1_head_depth
-      - agent_1_head_rgb
+      # - agent_1_head_rgb
       - agent_1_head_panoptic
       # - agent_1_relative_resting_position
       # - agent_1_joint
diff --git a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
index cd0fe44e94..16b1a593a5 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
@@ -265,7 +265,7 @@ def act(self, observations, debug_obs: bool = False, *args, **kwargs):
 
         low_level_actions = np.zeros(self._agent_action_length)
 
-        if debug_obs:
+        if debug_obs and "agent_1_head_rgb" in observations:
             rgb = observations["agent_1_head_rgb"]
             panoptic = observations["agent_1_head_panoptic"]
             cv2.imwrite(

From 41583e07ca9955321ec291786ba6b2f41efaabd1 Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Thu, 6 Jun 2024 23:01:06 -0700
Subject: [PATCH 70/88] adding task % progress

---
 examples/hitl/rearrange_v2/rearrange_v2.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/examples/hitl/rearrange_v2/rearrange_v2.py b/examples/hitl/rearrange_v2/rearrange_v2.py
index 91bc9ba6aa..3b80e5c762 100644
--- a/examples/hitl/rearrange_v2/rearrange_v2.py
+++ b/examples/hitl/rearrange_v2/rearrange_v2.py
@@ -687,6 +687,11 @@ def _get_status_text(self, user_index: int):
         if len(task_instruction) > 0:
             status_str += "Instruction: " + task_instruction + "\n"
 
+        # get recent metrics
+        metrics = self._app_service.get_metrics()
+        if "task_percent_complete" in metrics:
+            status_str += f"Task progress: {(metrics['task_percent_complete']*100):.2f}%\n"
+
         # the multi-agent case
         if (
             self._users.max_user_count > 1

From 677d971191bda903a4840066da03e9e60440d9cf Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Thu, 6 Jun 2024 23:01:42 -0700
Subject: [PATCH 71/88] guard each agent's obs

---
 .../habitat_hitl/environment/controllers/llm_controller.py     | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
index 16b1a593a5..48c92c3ebc 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
@@ -247,7 +247,7 @@ def _act(self, observations, *args, **kwargs):
         )
         return
 
-    def act(self, observations, debug_obs: bool = False, *args, **kwargs):
+    def act(self, observations, debug_obs: bool = True, *args, **kwargs):
         # set the task as done and report it back
         if self._task_done and not self._termination_reported:
             if (
@@ -275,6 +275,7 @@ def act(self, observations, debug_obs: bool = False, *args, **kwargs):
             cv2.imwrite(
                 f"./visuals/agent_1/panoptic_{self._iter}.png", panoptic
             )
+        if debug_obs and "agent_0_articulated_agent_arm_rgb" in observations:
             rgb = observations["agent_0_articulated_agent_arm_rgb"]
             panoptic = observations["agent_0_articulated_agent_arm_panoptic"]
             cv2.imwrite(

From 9b5ca88a8492efd2dfb52b8ce3d70b9649eec71d Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Fri, 7 Jun 2024 11:54:19 -0400
Subject: [PATCH 72/88] Disable humanoid sensor auto-update.

---
 .../config/lang_rearrange_llmspot_guihumanoid.yaml          | 1 +
 .../articulated_agents/humanoids/kinematic_humanoid.py      | 5 ++++-
 habitat-lab/habitat/articulated_agents/manipulator.py       | 6 ++++--
 .../habitat/articulated_agents/mobile_manipulator.py        | 2 ++
 .../habitat/articulated_agents/static_manipulator.py        | 2 ++
 habitat-lab/habitat/config/default_structured_configs.py    | 5 +++++
 .../habitat/sims/habitat_simulator/habitat_simulator.py     | 1 +
 7 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml b/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
index d555dd0b61..a9da1937a1 100644
--- a/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
+++ b/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
@@ -50,6 +50,7 @@ habitat:
         articulated_agent_type: 'KinematicHumanoid'
         articulated_agent_urdf: 'data/humanoids/humanoid_data/female_2/female_2.urdf'
         motion_data_path: "data/humanoids/humanoid_data/female_2/female_2_motion_data_smplx.pkl"
+        auto_update_sensor_transform: False
 
 device      : cuda
 instruction : ''
diff --git a/habitat-lab/habitat/articulated_agents/humanoids/kinematic_humanoid.py b/habitat-lab/habitat/articulated_agents/humanoids/kinematic_humanoid.py
index 7f272a40fc..fd0c5a9b8c 100644
--- a/habitat-lab/habitat/articulated_agents/humanoids/kinematic_humanoid.py
+++ b/habitat-lab/habitat/articulated_agents/humanoids/kinematic_humanoid.py
@@ -59,6 +59,8 @@ def _get_humanoid_params(self):
     def __init__(
         self, agent_cfg, sim, limit_robo_joints=False, fixed_base=False
     ):
+        auto_update_sensor_transform = agent_cfg.auto_update_sensor_transform
+
         super().__init__(
             self._get_humanoid_params(),
             agent_cfg,
@@ -66,6 +68,7 @@ def __init__(
             limit_robo_joints,
             fixed_base,
             maintain_link_order=True,
+            auto_update_sensor_transform=auto_update_sensor_transform,
         )
 
         # The offset and base transform are used so that the
@@ -164,7 +167,7 @@ def update(self) -> None:
         """Updates the camera transformations and performs necessary checks on
         joint limits and sleep states.
         """
-        if self._cameras is not None:
+        if self._cameras is not None and self._auto_update_sensor_transforms:
             # get the transformation
             agent_node = self._sim._default_agent.scene_node
             inv_T = agent_node.transformation.inverted()
diff --git a/habitat-lab/habitat/articulated_agents/manipulator.py b/habitat-lab/habitat/articulated_agents/manipulator.py
index 7994b95483..dcc89b14e9 100644
--- a/habitat-lab/habitat/articulated_agents/manipulator.py
+++ b/habitat-lab/habitat/articulated_agents/manipulator.py
@@ -17,7 +17,7 @@
 
 
 class Manipulator(ArticulatedAgentInterface):
-    """Generic manupulator interface defines standard API functions. Robot with a controllable arm."""
+    """Generic manipulator interface defines standard API functions. Robot with a controllable arm."""
 
     def __init__(
         self,
@@ -28,6 +28,7 @@ def __init__(
         fixed_based: bool = True,
         sim_obj=None,
         maintain_link_order=False,
+        auto_update_sensor_transform=True,
         **kwargs,
     ):
         r"""Constructor"""
@@ -40,6 +41,7 @@ def __init__(
         self._fixed_base = fixed_based
         self.sim_obj = sim_obj
         self._maintain_link_order = maintain_link_order
+        self._auto_update_sensor_transforms = auto_update_sensor_transform
 
         # Adapt Manipulator params to support multiple end effector indices
         # NOTE: the follow members cache static info for improved efficiency over querying the API
@@ -140,7 +142,7 @@ def update(self) -> None:
         """Updates the camera transformations and performs necessary checks on
         joint limits and sleep states.
         """
-        if self._cameras is not None:
+        if self._cameras is not None and self._auto_update_sensor_transforms:
             # get the transformation
             agent_node = self._sim._default_agent.scene_node
             inv_T = agent_node.transformation.inverted()
diff --git a/habitat-lab/habitat/articulated_agents/mobile_manipulator.py b/habitat-lab/habitat/articulated_agents/mobile_manipulator.py
index acdfa7c947..26f1f598f5 100644
--- a/habitat-lab/habitat/articulated_agents/mobile_manipulator.py
+++ b/habitat-lab/habitat/articulated_agents/mobile_manipulator.py
@@ -120,6 +120,7 @@ def __init__(
         limit_robo_joints: bool = True,
         fixed_base: bool = True,
         maintain_link_order: bool = False,
+        auto_update_sensor_transform=True,
         base_type="mobile",
     ):
         r"""Constructor
@@ -142,6 +143,7 @@ def __init__(
             params=params,
             sim=sim,
             limit_robo_joints=limit_robo_joints,
+            auto_update_sensor_transform=auto_update_sensor_transform,
         )
         # instantiate a robotBase
         ArticulatedAgentBase.__init__(
diff --git a/habitat-lab/habitat/articulated_agents/static_manipulator.py b/habitat-lab/habitat/articulated_agents/static_manipulator.py
index e1c2b5d42a..50fda886c2 100644
--- a/habitat-lab/habitat/articulated_agents/static_manipulator.py
+++ b/habitat-lab/habitat/articulated_agents/static_manipulator.py
@@ -64,6 +64,7 @@ def __init__(
         sim: Simulator,
         limit_robo_joints: bool = True,
         fixed_base: bool = True,
+        auto_update_sensor_transform=False,
     ):
         r"""Constructor
         :param params: The parameter of the manipulator robot.
@@ -81,6 +82,7 @@ def __init__(
             sim=sim,
             limit_robo_joints=limit_robo_joints,
             fixed_based=fixed_base,
+            auto_update_sensor_transform=auto_update_sensor_transform,
         )
 
     def reconfigure(self) -> None:
diff --git a/habitat-lab/habitat/config/default_structured_configs.py b/habitat-lab/habitat/config/default_structured_configs.py
index 259a74c0e7..26df58a434 100644
--- a/habitat-lab/habitat/config/default_structured_configs.py
+++ b/habitat-lab/habitat/config/default_structured_configs.py
@@ -1681,6 +1681,11 @@ class AgentConfig(HabitatBaseConfig):
     # File to motion data, used to play pre-recorded motions
     motion_data_path: str = ""
 
+    # Hack: UI-controlled agents may use a first-person camera.
+    #       To avoid discrepancies, sensors are moved to the first-person view.
+    #       This flag allows for disabling automatic sensor transform update.
+    auto_update_sensor_transform: bool = True
+
 
 @dataclass
 class RendererConfig(HabitatBaseConfig):
diff --git a/habitat-lab/habitat/sims/habitat_simulator/habitat_simulator.py b/habitat-lab/habitat/sims/habitat_simulator/habitat_simulator.py
index 1f5a863945..16a3323faf 100644
--- a/habitat-lab/habitat/sims/habitat_simulator/habitat_simulator.py
+++ b/habitat-lab/habitat/sims/habitat_simulator/habitat_simulator.py
@@ -348,6 +348,7 @@ def create_sim_config(
                 "max_climb",
                 "max_slope",
                 "joint_start_override",
+                "auto_update_sensor_transform",
             },
         )
 

From 3e3324e3d1dfe4183cc558b61166cd4f52048c8c Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Fri, 7 Jun 2024 14:27:58 -0400
Subject: [PATCH 73/88] Disable task success text. Add task success to data
 output.

---
 examples/hitl/rearrange_v2/rearrange_v2.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/examples/hitl/rearrange_v2/rearrange_v2.py b/examples/hitl/rearrange_v2/rearrange_v2.py
index 3b80e5c762..ee52779691 100644
--- a/examples/hitl/rearrange_v2/rearrange_v2.py
+++ b/examples/hitl/rearrange_v2/rearrange_v2.py
@@ -49,6 +49,8 @@
 
 PIP_VIEWPORT_ID = 0  # ID of the picture-in-picture viewport that shows other agent's perspective.
 
+UI_SHOW_TASK_SUCCESS = False  # Turn on to display current task success.
+
 
 class EpisodeCompletionStatus(Enum):
     PENDING = (0,)
@@ -116,6 +118,10 @@ def record_state(
             "agent_states": self.get_agents_state(),
         }
 
+        metrics = self._app_service.get_metrics()
+        if "task_percent_complete" in metrics:
+            data["task_percent_complete"] = metrics["task_percent_complete"]
+
         for user_index in range(len(user_data)):
             u = user_data[user_index]
             user_data_dict = {
@@ -688,9 +694,10 @@ def _get_status_text(self, user_index: int):
             status_str += "Instruction: " + task_instruction + "\n"
 
         # get recent metrics
-        metrics = self._app_service.get_metrics()
-        if "task_percent_complete" in metrics:
-            status_str += f"Task progress: {(metrics['task_percent_complete']*100):.2f}%\n"
+        if UI_SHOW_TASK_SUCCESS:
+            metrics = self._app_service.get_metrics()
+            if "task_percent_complete" in metrics:
+                status_str += f"Task progress: {(metrics['task_percent_complete']*100):.2f}%\n"
 
         # the multi-agent case
         if (

From b983eb3ac8b41746d2a803243b7bb7718e279738 Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Fri, 7 Jun 2024 15:02:27 -0400
Subject: [PATCH 74/88] Adjust episode termination signals for single-learn.

---
 examples/hitl/rearrange_v2/rearrange_v2.py | 39 ++++++++++++++++++++--
 1 file changed, 36 insertions(+), 3 deletions(-)

diff --git a/examples/hitl/rearrange_v2/rearrange_v2.py b/examples/hitl/rearrange_v2/rearrange_v2.py
index ee52779691..56d1429111 100644
--- a/examples/hitl/rearrange_v2/rearrange_v2.py
+++ b/examples/hitl/rearrange_v2/rearrange_v2.py
@@ -505,6 +505,9 @@ def __init__(
 
         self._agent_data: List[AgentData] = []
 
+        # If set, the episode will skip with the following error message.
+        self._skip_episode_error_message: Optional[str] = None
+
         # HACK: The simulator has only 1 agent with all sensors. See 'create_sim_config() in habitat_simulator.py'.
         sim_agent = sim.agents[0]
         config = self._app_service.config
@@ -546,6 +549,9 @@ def __init__(
                 agent_controller._on_termination.registerCallback(
                     self._agent_data[agent_index]._on_termination_cb
                 )
+                agent_controller._on_termination.registerCallback(
+                    self._on_termination_cb
+                )
 
         self._user_data: List[UserData] = []
         for user_index in self._users.indices(Mask.ALL):
@@ -586,7 +592,6 @@ def __init__(
 
     def get_next_state(self) -> Optional[AppStateBase]:
         if self._cancel:
-            # TODO: Reset LLM controller.
             return create_app_state_cancel_session(
                 self._app_service,
                 self._app_data,
@@ -594,7 +599,11 @@ def get_next_state(self) -> Optional[AppStateBase]:
                 "User disconnected",
             )
         elif self._is_episode_finished():
-            # TODO: Reset LLM controller.
+            return create_app_state_load_episode(
+                self._app_service, self._app_data, self._session
+            )
+        elif self._skip_episode_error_message is not None:
+            # TODO: Skip episode state.
             return create_app_state_load_episode(
                 self._app_service, self._app_data, self._session
             )
@@ -864,7 +873,17 @@ def _has_any_agent_finished_failure(self) -> bool:
             for agent_index in range(self._num_agents)
         )
 
-    def _is_episode_finished(self) -> bool:
+    def _have_all_users_reported_errors(self) -> bool:
+        """
+        Returns true if all GUI users reported errors.
+        """
+        return all(
+            self._user_data[user_index].agent_data.episode_completion_status
+            == EpisodeCompletionStatus.FAILURE
+            for user_index in range(self._num_users)
+        )
+
+    def _have_all_agents_finished_episode(self) -> bool:
         """
         Returns true if all agents finished the episode, regardless of success.
         """
@@ -874,6 +893,15 @@ def _is_episode_finished(self) -> bool:
             for agent_index in range(self._num_agents)
         )
 
+    def _is_episode_finished(self) -> bool:
+        """
+        Returns true if the episode is finished.
+        """
+        return (
+            self._have_all_agents_finished_episode()
+            or self._have_all_users_reported_errors()
+        )
+
     def _is_episode_successful(self) -> bool:
         """
         Returns true if all agents finished the episode successfully.
@@ -883,3 +911,8 @@ def _is_episode_successful(self) -> bool:
             == EpisodeCompletionStatus.SUCCESS
             for agent_index in range(self._num_agents)
         )
+
+    def _on_termination_cb(self, _e: Any = None):
+        # Trigger episode change sequence when an agent error occurs.
+        if _e.status == PlannerStatus.FAILED:
+            self._skip_episode_error_message = "The other participant has encountered an error. Skipping episode."

From 1757301f11ff5e8ecde9519ae93ed4c76e1a1368 Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Fri, 7 Jun 2024 15:24:02 -0400
Subject: [PATCH 75/88] Add skip episode app state.

---
 .../rearrange_v2/app_state_skip_episode.py    | 59 +++++++++++++++++++
 examples/hitl/rearrange_v2/app_states.py      |  8 +++
 examples/hitl/rearrange_v2/rearrange_v2.py    |  9 ++-
 3 files changed, 73 insertions(+), 3 deletions(-)
 create mode 100644 examples/hitl/rearrange_v2/app_state_skip_episode.py

diff --git a/examples/hitl/rearrange_v2/app_state_skip_episode.py b/examples/hitl/rearrange_v2/app_state_skip_episode.py
new file mode 100644
index 0000000000..65e2586b62
--- /dev/null
+++ b/examples/hitl/rearrange_v2/app_state_skip_episode.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Meta Platforms, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Optional
+
+from app_data import AppData
+from app_state_base import AppStateBase
+from app_states import (
+    create_app_state_cancel_session,
+    create_app_state_load_episode,
+)
+from session import Session
+from util import get_top_down_view
+
+from habitat_hitl.app_states.app_service import AppService
+from habitat_hitl.core.user_mask import Mask
+
+SKIP_EPISODE_MESSAGE_DURATION = 5.0
+
+class AppStateSkipEpisode(AppStateBase):
+    """
+    Skip an episode.
+    A message is displayed for 'SKIP_EPISODE_MESSAGE_DURATION' before resuming session.
+    """
+
+    def __init__(
+        self, app_service: AppService, app_data: AppData, session: Session, message: str
+    ):
+        super().__init__(app_service, app_data)
+        self._session = session
+        self._message = message
+        self._timer = SKIP_EPISODE_MESSAGE_DURATION
+
+    def get_next_state(self) -> Optional[AppStateBase]:
+        if self._cancel:
+            return create_app_state_cancel_session(
+                self._app_service,
+                self._app_data,
+                self._session,
+                "User disconnected.",
+            )
+        if self._timer < 0.0:
+            return create_app_state_load_episode(
+                self._app_service, self._app_data, self._session
+            )
+        return None
+
+    def sim_update(self, dt: float, post_sim_update_dict):
+        self._status_message(self._message)
+        self._timer -= dt
+
+        cam_matrix = get_top_down_view(self._app_service.sim)
+        post_sim_update_dict["cam_transform"] = cam_matrix
+        self._app_service._client_message_manager.update_camera_transform(
+            cam_matrix, destination_mask=Mask.ALL
+        )
diff --git a/examples/hitl/rearrange_v2/app_states.py b/examples/hitl/rearrange_v2/app_states.py
index 7df1a93b3b..6fbb3c152f 100644
--- a/examples/hitl/rearrange_v2/app_states.py
+++ b/examples/hitl/rearrange_v2/app_states.py
@@ -47,6 +47,14 @@ def create_app_state_load_episode(
     return AppStateLoadEpisode(app_service, app_data, session)
 
 
+def create_app_state_skip_episode(
+    app_service: AppService, app_data: AppData, session: Session, message: str
+) -> AppStateBase:
+    from app_state_skip_episode import AppStateSkipEpisode
+
+    return AppStateSkipEpisode(app_service, app_data, session, message)
+
+
 def create_app_state_start_screen(
     app_service: AppService, app_data: AppData, session: Session
 ) -> AppStateBase:
diff --git a/examples/hitl/rearrange_v2/rearrange_v2.py b/examples/hitl/rearrange_v2/rearrange_v2.py
index 56d1429111..f234ef3e04 100644
--- a/examples/hitl/rearrange_v2/rearrange_v2.py
+++ b/examples/hitl/rearrange_v2/rearrange_v2.py
@@ -17,6 +17,7 @@
 from app_states import (
     create_app_state_cancel_session,
     create_app_state_load_episode,
+    create_app_state_skip_episode,
 )
 from end_episode_form import EndEpisodeForm, ErrorReport
 from session import Session
@@ -603,9 +604,11 @@ def get_next_state(self) -> Optional[AppStateBase]:
                 self._app_service, self._app_data, self._session
             )
         elif self._skip_episode_error_message is not None:
-            # TODO: Skip episode state.
-            return create_app_state_load_episode(
-                self._app_service, self._app_data, self._session
+            return create_app_state_skip_episode(
+                self._app_service,
+                self._app_data,
+                self._session,
+                self._skip_episode_error_message,
             )
         else:
             return None

From c455f7fc682927a4ca187464f468158fe97c4427 Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Fri, 7 Jun 2024 15:24:34 -0400
Subject: [PATCH 76/88] Fix config typo.

---
 .../config/language_rearrange_multi_agent_llm_gui.yaml          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml b/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
index abae231f87..022cf4630b 100644
--- a/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
+++ b/examples/hitl/rearrange_v2/config/language_rearrange_multi_agent_llm_gui.yaml
@@ -64,7 +64,7 @@ habitat:
 
     # robot_at_thresh: 3.0
     # lab_sensors:
-    #   # Defien the human detector
+    #   # Define the human detector
     #   humanoid_detector_sensor:
     #     # If the human detector function is image or binary flag
     #     return_image: False

From 5f756d5d2ad8d150ef8a4fe96ed51fa390881284 Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Fri, 7 Jun 2024 13:19:46 -0700
Subject: [PATCH 77/88] hot-fix for inaccessible receptacles

---
 .../environment/controllers/llm_controller.py | 39 +++++++++++--------
 1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
index 48c92c3ebc..e32618c490 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
@@ -19,6 +19,7 @@
 from habitat_llm.agent.env import EnvironmentInterface
 from habitat_llm.planner.llm_planner import LLMPlanner
 from habitat_llm.utils import fix_config, setup_config
+from habitat_llm.world_model import Furniture
 from hydra.utils import instantiate
 from omegaconf import DictConfig
 
@@ -206,23 +207,27 @@ def push_user_actions_to_llm(self):
                     f"Agent picked up {object_name}"
                 )
             elif action["action"] == "PLACE":
+                furniture_name = "unknown furniture"
                 if action["receptacle_id"] is not None:
-                    receptacle_name = self.environment_interface.world_graph.get_node_from_sim_handle(
-                        get_obj_from_id(
-                            self.environment_interface.sim,
-                            action["receptacle_id"],
-                        ).handle
-                    ).name
-                    self.environment_interface.agent_state_history[1].append(
-                        f"Agent placed {object_name} in/on {receptacle_name}"
-                    )
-                    # print(
-                    #     f"Agent placed {object_name} in/on {receptacle_name}"
-                    # )
-                else:
-                    self.environment_interface.agent_state_history[1].append(
-                        f"Agent placed {object_name} in unknown location"
-                    )
+                    try:
+                        receptacle_node = self.environment_interface.world_graph.get_node_from_sim_handle(
+                            get_obj_from_id(
+                                self.environment_interface.sim,
+                                action["receptacle_id"],
+                            ).handle
+                        )
+                        furniture_name = self.environment_interface.world_graph.get_neighbors_of_type(
+                            receptacle_node, Furniture
+                        )[
+                            0
+                        ].name
+                    except ValueError:
+                        print(
+                            f"Receptacle not found: {get_obj_from_id(self.environment_interface.sim, action['receptacle_id']).handle}"
+                        )
+                self.environment_interface.agent_state_history[1].append(
+                    f"Agent placed {object_name} in/on {furniture_name}"
+                )
             elif action["action"] == "OPEN":
                 self.environment_interface.agent_state_history[1].append(
                     f"Agent opened {object_name}"
@@ -247,7 +252,7 @@ def _act(self, observations, *args, **kwargs):
         )
         return
 
-    def act(self, observations, debug_obs: bool = True, *args, **kwargs):
+    def act(self, observations, debug_obs: bool = False, *args, **kwargs):
         # set the task as done and report it back
         if self._task_done and not self._termination_reported:
             if (

From a6021ccb0721cb8f93e952b2e21ccbdfec60e294 Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Fri, 7 Jun 2024 13:29:01 -0700
Subject: [PATCH 78/88] guard if no fur found for rec

---
 .../environment/controllers/llm_controller.py     | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
index e32618c490..add8a243a8 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
@@ -216,11 +216,18 @@ def push_user_actions_to_llm(self):
                                 action["receptacle_id"],
                             ).handle
                         )
-                        furniture_name = self.environment_interface.world_graph.get_neighbors_of_type(
+                        furnitures = self.environment_interface.world_graph.get_neighbors_of_type(
                             receptacle_node, Furniture
-                        )[
-                            0
-                        ].name
+                        )
+                        if len(furnitures) > 0:
+                            furniture_name = furnitures[0].name
+                        else:
+                            print(
+                                "Could not find furniture for receptacle: ",
+                                receptacle_node.handle,
+                                " ",
+                                receptacle_node.name,
+                            )
                     except ValueError:
                         print(
                             f"Receptacle not found: {get_obj_from_id(self.environment_interface.sim, action['receptacle_id']).handle}"

From 9bb5ec1d66fa2e6e50eab0544e0d211d469cd254 Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Fri, 7 Jun 2024 13:33:44 -0700
Subject: [PATCH 79/88] guards

---
 .../environment/controllers/llm_controller.py | 20 +++++++++----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
index add8a243a8..b126e9a504 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
@@ -209,13 +209,13 @@ def push_user_actions_to_llm(self):
             elif action["action"] == "PLACE":
                 furniture_name = "unknown furniture"
                 if action["receptacle_id"] is not None:
-                    try:
-                        receptacle_node = self.environment_interface.world_graph.get_node_from_sim_handle(
-                            get_obj_from_id(
-                                self.environment_interface.sim,
-                                action["receptacle_id"],
-                            ).handle
-                        )
+                    receptacle_node = self.environment_interface.world_graph.get_node_from_sim_handle(
+                        get_obj_from_id(
+                            self.environment_interface.sim,
+                            action["receptacle_id"],
+                        ).handle
+                    )
+                    if receptacle_node is not None:
                         furnitures = self.environment_interface.world_graph.get_neighbors_of_type(
                             receptacle_node, Furniture
                         )
@@ -228,10 +228,8 @@ def push_user_actions_to_llm(self):
                                 " ",
                                 receptacle_node.name,
                             )
-                    except ValueError:
-                        print(
-                            f"Receptacle not found: {get_obj_from_id(self.environment_interface.sim, action['receptacle_id']).handle}"
-                        )
+                    else:
+                        print("Receptacle not found")
                 self.environment_interface.agent_state_history[1].append(
                     f"Agent placed {object_name} in/on {furniture_name}"
                 )

From 4a0d292af46c7ec73f79e58268c07d5cc40a0db4 Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Fri, 7 Jun 2024 20:40:42 +0000
Subject: [PATCH 80/88] typo

---
 .../habitat_hitl/environment/controllers/llm_controller.py      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
index b126e9a504..7fae656d21 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
@@ -224,7 +224,7 @@ def push_user_actions_to_llm(self):
                         else:
                             print(
                                 "Could not find furniture for receptacle: ",
-                                receptacle_node.handle,
+                                receptacle_node.sim_handle,
                                 " ",
                                 receptacle_node.name,
                             )

From 08f9fb340d56108e01af27ce62ebb519b8bb4473 Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Fri, 7 Jun 2024 20:58:22 +0000
Subject: [PATCH 81/88] first finish thread then reset env

---
 .../config/lang_rearrange_llmspot_guihumanoid.yaml            | 4 ++--
 .../habitat_hitl/environment/controllers/llm_controller.py    | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml b/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
index a9da1937a1..cb7b9d7650 100644
--- a/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
+++ b/examples/hitl/rearrange_v2/config/lang_rearrange_llmspot_guihumanoid.yaml
@@ -10,8 +10,8 @@ defaults:
   - /wandb_conf@                : own
   - language_rearrange_multi_agent_llm_gui
   - hitl_defaults
-  - override /instruct@evaluation.agents.agent_0.planner.plan_config.instruct: few_shot_decentralized_partial_obs_coordinated_robot
-  - override /llm@evaluation.agents.agent_0.planner.plan_config.llm: openai_chat
+  - override /instruct@evaluation.agents.agent_0.planner.plan_config.instruct: few_shot_decentralized_partial_obs_coordinated_robot_spatial
+  # - override /llm@evaluation.agents.agent_0.planner.plan_config.llm: openai_chat
   - _self_
 
 evaluation:
diff --git a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
index 7fae656d21..6f5f50bd3d 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
@@ -132,11 +132,11 @@ def on_environment_reset(self):
         # NOTE: the following ONLY resets self._test_recurrent_hidden_states,
         # self._prev_actions and self._not_done_masks
         # super().on_environment_reset()
-        self.environment_interface.reset_environment(reset_habitat=False)
-        self.planner.reset()
         if self._thread is not None:
             self._thread.join()
         self._thread = None
+        self.environment_interface.reset_environment(reset_habitat=False)
+        self.planner.reset()
 
         self.current_instruction = (
             self.environment_interface.hab_env.current_episode.instruction

From fe9d88793c3066e51f39fab29c92e73e72910859 Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Fri, 7 Jun 2024 21:20:32 +0000
Subject: [PATCH 82/88] receptacle ID can be furniture ID

---
 .../environment/controllers/llm_controller.py | 23 +++++++++++--------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
index 6f5f50bd3d..6861955473 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
@@ -216,18 +216,21 @@ def push_user_actions_to_llm(self):
                         ).handle
                     )
                     if receptacle_node is not None:
-                        furnitures = self.environment_interface.world_graph.get_neighbors_of_type(
-                            receptacle_node, Furniture
-                        )
-                        if len(furnitures) > 0:
-                            furniture_name = furnitures[0].name
+                        if isinstance(receptacle_node, Furniture):
+                            furniture_name = receptacle_node.name
                         else:
-                            print(
-                                "Could not find furniture for receptacle: ",
-                                receptacle_node.sim_handle,
-                                " ",
-                                receptacle_node.name,
+                            furnitures = self.environment_interface.world_graph.get_neighbors_of_type(
+                                receptacle_node, Furniture
                             )
+                            if len(furnitures) > 0:
+                                furniture_name = furnitures[0].name
+                            else:
+                                print(
+                                    "Could not find furniture for receptacle: ",
+                                    receptacle_node.sim_handle,
+                                    " ",
+                                    receptacle_node.name,
+                                )
                     else:
                         print("Receptacle not found")
                 self.environment_interface.agent_state_history[1].append(

From de568e21d9036dd3012c7f3d48dfcc710061a377 Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Mon, 10 Jun 2024 15:42:57 -0400
Subject: [PATCH 83/88] Formatting fix.

---
 examples/hitl/rearrange_v2/app_state_skip_episode.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/examples/hitl/rearrange_v2/app_state_skip_episode.py b/examples/hitl/rearrange_v2/app_state_skip_episode.py
index 65e2586b62..4e15cc27dc 100644
--- a/examples/hitl/rearrange_v2/app_state_skip_episode.py
+++ b/examples/hitl/rearrange_v2/app_state_skip_episode.py
@@ -20,6 +20,7 @@
 
 SKIP_EPISODE_MESSAGE_DURATION = 5.0
 
+
 class AppStateSkipEpisode(AppStateBase):
     """
     Skip an episode.
@@ -27,7 +28,11 @@ class AppStateSkipEpisode(AppStateBase):
     """
 
     def __init__(
-        self, app_service: AppService, app_data: AppData, session: Session, message: str
+        self,
+        app_service: AppService,
+        app_data: AppData,
+        session: Session,
+        message: str,
     ):
         super().__init__(app_service, app_data)
         self._session = session

From 21aeea3a746021b52fe79c699543e2d48e40defd Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Mon, 10 Jun 2024 15:43:08 -0400
Subject: [PATCH 84/88] Change data collection output folder.

---
 .../hitl/rearrange_v2/config/experiment/headless_server.yaml  | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/examples/hitl/rearrange_v2/config/experiment/headless_server.yaml b/examples/hitl/rearrange_v2/config/experiment/headless_server.yaml
index ca0e6a8094..b42427502b 100644
--- a/examples/hitl/rearrange_v2/config/experiment/headless_server.yaml
+++ b/examples/hitl/rearrange_v2/config/experiment/headless_server.yaml
@@ -11,3 +11,7 @@ habitat_hitl:
   experimental:
     headless:
       do_headless: True
+
+rearrange_v2:
+  data_collection:
+    s3_path: "Phase_3/"

From 776f0c7263de5962ae9a4efb1fdcb9ae99614451 Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Tue, 11 Jun 2024 22:29:48 +0000
Subject: [PATCH 85/88] handle error termination from user/agent

---
 examples/hitl/rearrange_v2/rearrange_v2.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/examples/hitl/rearrange_v2/rearrange_v2.py b/examples/hitl/rearrange_v2/rearrange_v2.py
index f234ef3e04..595f56399a 100644
--- a/examples/hitl/rearrange_v2/rearrange_v2.py
+++ b/examples/hitl/rearrange_v2/rearrange_v2.py
@@ -57,6 +57,7 @@ class EpisodeCompletionStatus(Enum):
     PENDING = (0,)
     SUCCESS = (1,)
     FAILURE = (2,)
+    ERROR   = (3,)
 
 
 class FrameRecorder:
@@ -165,12 +166,16 @@ def __init__(
         self.cam_transform = mn.Matrix4.identity_init()
 
         self.episode_completion_status = EpisodeCompletionStatus.PENDING
+        self._episode_completion_message = ''
 
     def _on_termination_cb(self, _e: Any = None):
         if _e.status == PlannerStatus.SUCCESS:
             self.episode_completion_status = EpisodeCompletionStatus.SUCCESS
-        else:
+        elif _e.status == PlannerStatus.FAILED:
             self.episode_completion_status = EpisodeCompletionStatus.FAILURE
+        elif _e.status == PlannerStatus.ERROR:
+            self.episode_completion_status = EpisodeCompletionStatus.ERROR
+            self._episode_message = _e.message
 
     def update_camera_from_sensor(self) -> None:
         """
@@ -917,5 +922,7 @@ def _is_episode_successful(self) -> bool:
 
     def _on_termination_cb(self, _e: Any = None):
         # Trigger episode change sequence when an agent error occurs.
+        if _e.status == PlannerStatus.ERROR:
+            self._skip_episode_error_message = f"Other participant encountered an error: {_e.message}. Skipping episode."
         if _e.status == PlannerStatus.FAILED:
             self._skip_episode_error_message = "The other participant has encountered an error. Skipping episode."

From 12e4f56eea6cfb5712bfd80223bacf032785d196 Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Tue, 11 Jun 2024 22:31:06 +0000
Subject: [PATCH 86/88] sending error information

---
 .../environment/controllers/llm_controller.py          | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
index 6861955473..6a02b4c791 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
@@ -36,11 +36,13 @@
 class PlannerStatus(Enum):
     SUCCESS = 0
     FAILED = 1
+    ERROR = 2
 
 
 @dataclass
 class AgentTerminationEvent:
     status: PlannerStatus
+    message: str
 
 
 class LLMController(SingleAgentBaselinesController):
@@ -268,11 +270,15 @@ def act(self, observations, debug_obs: bool = False, *args, **kwargs):
                 >= self._planner_info["replanning_threshold"]
             ):
                 self._on_termination.invoke(
-                    AgentTerminationEvent(PlannerStatus.FAILED)
+                    AgentTerminationEvent(status=PlannerStatus.FAILED, message="replanning threshold exceeded")
+                )
+            elif "ConnectionError" in self._planner_info["prompts"][0]:
+                self._on_termination.invoke(
+                    AgentTerminationEvent(status=PlannerStatus.ERROR, message="LLM connection error")
                 )
             else:
                 self._on_termination.invoke(
-                    AgentTerminationEvent(PlannerStatus.SUCCESS)
+                    AgentTerminationEvent(status=PlannerStatus.SUCCESS, message="")
                 )
             self._termination_reported = True
 

From f4fe7de2d18b8d00d08e4082400fbbdb30b61979 Mon Sep 17 00:00:00 2001
From: Mikael Dallaire Cote <110583667+0mdc@users.noreply.github.com>
Date: Tue, 11 Jun 2024 21:20:48 -0400
Subject: [PATCH 87/88] Record episode status.

---
 examples/hitl/rearrange_v2/rearrange_v2.py         | 14 ++++++++------
 examples/hitl/rearrange_v2/session_recorder.py     |  6 +++++-
 .../environment/controllers/llm_controller.py      | 14 +++++++++++---
 3 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/examples/hitl/rearrange_v2/rearrange_v2.py b/examples/hitl/rearrange_v2/rearrange_v2.py
index 595f56399a..e623f3ac0c 100644
--- a/examples/hitl/rearrange_v2/rearrange_v2.py
+++ b/examples/hitl/rearrange_v2/rearrange_v2.py
@@ -42,6 +42,7 @@
     GuiRobotController,
 )
 from habitat_hitl.environment.controllers.llm_controller import (
+    AgentTerminationEvent,
     LLMController,
     PlannerStatus,
 )
@@ -57,7 +58,6 @@ class EpisodeCompletionStatus(Enum):
     PENDING = (0,)
     SUCCESS = (1,)
     FAILURE = (2,)
-    ERROR   = (3,)
 
 
 class FrameRecorder:
@@ -166,15 +166,15 @@ def __init__(
         self.cam_transform = mn.Matrix4.identity_init()
 
         self.episode_completion_status = EpisodeCompletionStatus.PENDING
-        self._episode_completion_message = ''
+        self._episode_completion_message = ""
 
-    def _on_termination_cb(self, _e: Any = None):
+    def _on_termination_cb(self, _e: AgentTerminationEvent = None):
         if _e.status == PlannerStatus.SUCCESS:
             self.episode_completion_status = EpisodeCompletionStatus.SUCCESS
         elif _e.status == PlannerStatus.FAILED:
             self.episode_completion_status = EpisodeCompletionStatus.FAILURE
         elif _e.status == PlannerStatus.ERROR:
-            self.episode_completion_status = EpisodeCompletionStatus.ERROR
+            self.episode_completion_status = EpisodeCompletionStatus.FAILURE
             self._episode_message = _e.message
 
     def update_camera_from_sensor(self) -> None:
@@ -639,7 +639,9 @@ def on_exit(self):
         super().on_exit()
 
         episode_success = self._is_episode_successful()
-        self._session.session_recorder.end_episode(episode_success)
+        self._session.session_recorder.end_episode(
+            episode_success, self._skip_episode_error_message
+        )
 
     def on_environment_reset(self, episode_recorder_dict):
         self._world.reset()
@@ -920,7 +922,7 @@ def _is_episode_successful(self) -> bool:
             for agent_index in range(self._num_agents)
         )
 
-    def _on_termination_cb(self, _e: Any = None):
+    def _on_termination_cb(self, _e: AgentTerminationEvent = None):
         # Trigger episode change sequence when an agent error occurs.
         if _e.status == PlannerStatus.ERROR:
             self._skip_episode_error_message = f"Other participant encountered an error: {_e.message}. Skipping episode."
diff --git a/examples/hitl/rearrange_v2/session_recorder.py b/examples/hitl/rearrange_v2/session_recorder.py
index db7f2344a7..ee39a3a115 100644
--- a/examples/hitl/rearrange_v2/session_recorder.py
+++ b/examples/hitl/rearrange_v2/session_recorder.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional
 
 from util import timestamp
 
@@ -68,10 +68,14 @@ def start_episode(
     def end_episode(
         self,
         success: bool,
+        episode_failure_error_message: Optional[str],
     ):
         self.data["episodes"][-1]["end_timestamp"] = timestamp()
         self.data["episodes"][-1]["success"] = success
         self.data["episodes"][-1]["completed"] = True
+        self.data["episodes"][-1][
+            "error_message"
+        ] = episode_failure_error_message
 
     def record_frame(
         self,
diff --git a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
index 6a02b4c791..bd587f4e88 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
@@ -270,15 +270,23 @@ def act(self, observations, debug_obs: bool = False, *args, **kwargs):
                 >= self._planner_info["replanning_threshold"]
             ):
                 self._on_termination.invoke(
-                    AgentTerminationEvent(status=PlannerStatus.FAILED, message="replanning threshold exceeded")
+                    AgentTerminationEvent(
+                        status=PlannerStatus.FAILED,
+                        message="replanning threshold exceeded",
+                    )
                 )
             elif "ConnectionError" in self._planner_info["prompts"][0]:
                 self._on_termination.invoke(
-                    AgentTerminationEvent(status=PlannerStatus.ERROR, message="LLM connection error")
+                    AgentTerminationEvent(
+                        status=PlannerStatus.ERROR,
+                        message="LLM connection error",
+                    )
                 )
             else:
                 self._on_termination.invoke(
-                    AgentTerminationEvent(status=PlannerStatus.SUCCESS, message="")
+                    AgentTerminationEvent(
+                        status=PlannerStatus.SUCCESS, message=""
+                    )
                 )
             self._termination_reported = True
 

From 744dc38c69e0a7530323e9401bb842e2eff2aa0e Mon Sep 17 00:00:00 2001
From: Priyam Parashar <priyam8parashar@gmail.com>
Date: Wed, 12 Jun 2024 17:54:04 +0000
Subject: [PATCH 88/88] guarding against exceptions and absence of handles

---
 .../environment/controllers/llm_controller.py | 21 +++++++++----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
index 6a02b4c791..45ad8026e0 100644
--- a/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
+++ b/habitat-hitl/habitat_hitl/environment/controllers/llm_controller.py
@@ -167,9 +167,6 @@ def _on_place(self, _e: Any = None):
             "object_id": _e.object_id,
             "object_handle": _e.object_handle,
             "receptacle_id": _e.receptacle_id,
-            # "receptacle_name": self.environment_interface.world_graph.get_node_from_sim_handle(
-            #     get_obj_from_id(self.environment_interface.sim, _e.receptacle_id).handle
-            # ),
         }
 
         self._human_action_history.append(action)
@@ -196,12 +193,12 @@ def push_user_actions_to_llm(self):
         # update agent state history
         while self._human_action_history:
             action = self._human_action_history.pop(0)
-            object_name = None
+            object_name = "unknown object"
             try:
                 object_name = self.environment_interface.world_graph.get_node_from_sim_handle(
                     action["object_handle"]
                 ).name
-            except Exception as e:
+            except ValueError as e:
                 self._log.append(e)
                 continue
             if action["action"] == "PICK":
@@ -211,12 +208,14 @@ def push_user_actions_to_llm(self):
             elif action["action"] == "PLACE":
                 furniture_name = "unknown furniture"
                 if action["receptacle_id"] is not None:
-                    receptacle_node = self.environment_interface.world_graph.get_node_from_sim_handle(
-                        get_obj_from_id(
-                            self.environment_interface.sim,
-                            action["receptacle_id"],
-                        ).handle
-                    )
+                    receptacle_object = get_obj_from_id(self.environment_interface.sim, action["receptacle_id"])
+                    receptacle_handle = receptacle_object.handle if hasattr(receptacle_object, "handle") else None
+                    receptacle_node = None
+                    if receptacle_handle is not None:
+                        try:
+                            receptacle_node = self.environment_interface.world_graph.get_node_from_sim_handle(receptacle_handle)
+                        except ValueError as e:
+                            self._log.append(e)
                     if receptacle_node is not None:
                         if isinstance(receptacle_node, Furniture):
                             furniture_name = receptacle_node.name