From b7e0fa4f955dddb1f5b3eabb42f5885ccc22c974 Mon Sep 17 00:00:00 2001
From: youliangtan <tan_you_liang@hotmail.com>
Date: Mon, 17 Jun 2024 11:02:14 -0700
Subject: [PATCH 1/2] fix rare duplicated data buffer entry

Signed-off-by: youliangtan <tan_you_liang@hotmail.com>
---
 .../async_drq_randomized.py                         |  4 ++--
 examples/async_bin_relocation_fwbw_drq/run_actor.sh |  6 +-----
 .../async_cable_route_drq/async_drq_randomized.py   |  4 ++--
 examples/async_cable_route_drq/run_actor.sh         |  4 ----
 examples/async_drq_sim/async_drq_sim.py             | 13 +++++++++++--
 examples/async_drq_sim/run_actor.sh                 |  4 ----
 examples/async_drq_sim/run_learner.sh               |  2 --
 .../async_pcb_insert_drq/async_drq_randomized.py    |  4 ++--
 examples/async_pcb_insert_drq/run_actor.sh          |  4 ----
 .../async_peg_insert_drq/async_drq_randomized.py    |  4 ++--
 examples/async_peg_insert_drq/run_actor.sh          |  6 ------
 examples/async_sac_state_sim/async_sac_state_sim.py | 13 +++++++++++--
 examples/async_sac_state_sim/run_actor.sh           |  4 ----
 examples/async_sac_state_sim/run_learner.sh         |  2 --
 serl_launcher/serl_launcher/utils/launcher.py       |  1 +
 serl_launcher/setup.py                              |  2 +-
 16 files changed, 33 insertions(+), 44 deletions(-)

diff --git a/examples/async_bin_relocation_fwbw_drq/async_drq_randomized.py b/examples/async_bin_relocation_fwbw_drq/async_drq_randomized.py
index c5fb76e..78a8eee 100644
--- a/examples/async_bin_relocation_fwbw_drq/async_drq_randomized.py
+++ b/examples/async_bin_relocation_fwbw_drq/async_drq_randomized.py
@@ -56,7 +56,7 @@
 
 flags.DEFINE_integer("random_steps", 300, "Sample random actions for this many steps.")
 flags.DEFINE_integer("training_starts", 300, "Training starts after this step.")
-flags.DEFINE_integer("steps_per_update", 50, "Number of steps per update the server.")
+flags.DEFINE_integer("steps_per_update", 30, "Number of steps per update the server.")
 
 flags.DEFINE_integer("log_period", 10, "Logging period.")
 flags.DEFINE_integer("eval_period", 2000, "Evaluation period.")
@@ -537,7 +537,7 @@ def main(_):
     elif FLAGS.actor:
         sampling_rng = jax.device_put(sampling_rng, sharding.replicate())
         data_stores = OrderedDict(
-            {name: QueuedDataStore(50000) for name in id_to_task.values()}
+            {name: QueuedDataStore(2000) for name in id_to_task.values()}
         )
         # actor loop
         print_green("starting actor loop")
diff --git a/examples/async_bin_relocation_fwbw_drq/run_actor.sh b/examples/async_bin_relocation_fwbw_drq/run_actor.sh
index a887cd2..5f25bc1 100644
--- a/examples/async_bin_relocation_fwbw_drq/run_actor.sh
+++ b/examples/async_bin_relocation_fwbw_drq/run_actor.sh
@@ -7,10 +7,6 @@ python async_drq_randomized.py "$@" \
     --exp_name=serl_dev_drq_rlpd20demos_bin_fwbw_resnet_096 \
     --seed 0 \
     --random_steps 200 \
-    --training_starts 200 \
-    --critic_actor_ratio 4 \
-    --batch_size 256 \
-    --eval_period 2000 \
     --encoder_type resnet-pretrained \
     --demo_path fw_bin_2000_demo_2024-01-23_18-49-56.pkl \
     --fw_ckpt_path /home/undergrad/code/serl_dev/examples/async_bin_relocation_fwbw_drq/bin_fw_096 \
@@ -18,4 +14,4 @@ python async_drq_randomized.py "$@" \
     --fw_reward_classifier_ckpt_path "/home/undergrad/code/serl_dev/examples/async_bin_relocation_fwbw_drq/fw_classifier_ckpt" \
     --bw_reward_classifier_ckpt_path "/home/undergrad/code/serl_dev/examples/async_bin_relocation_fwbw_drq/bw_classifier_ckpt" \
     --eval_checkpoint_step 31000 \
-    --eval_n_trajs 100
+    --eval_checkpoint_step 100
diff --git a/examples/async_cable_route_drq/async_drq_randomized.py b/examples/async_cable_route_drq/async_drq_randomized.py
index e3a3b1a..90770fc 100644
--- a/examples/async_cable_route_drq/async_drq_randomized.py
+++ b/examples/async_cable_route_drq/async_drq_randomized.py
@@ -56,7 +56,7 @@
 
 flags.DEFINE_integer("random_steps", 300, "Sample random actions for this many steps.")
 flags.DEFINE_integer("training_starts", 300, "Training starts after this step.")
-flags.DEFINE_integer("steps_per_update", 50, "Number of steps per update the server.")
+flags.DEFINE_integer("steps_per_update", 30, "Number of steps per update the server.")
 
 flags.DEFINE_integer("log_period", 10, "Logging period.")
 flags.DEFINE_integer("eval_period", 2000, "Evaluation period.")
@@ -410,7 +410,7 @@ def main(_):
 
     elif FLAGS.actor:
         sampling_rng = jax.device_put(sampling_rng, sharding.replicate())
-        data_store = QueuedDataStore(50000)  # the queue size on the actor
+        data_store = QueuedDataStore(2000)  # the queue size on the actor
         # actor loop
         print_green("starting actor loop")
         actor(agent, data_store, env, sampling_rng)
diff --git a/examples/async_cable_route_drq/run_actor.sh b/examples/async_cable_route_drq/run_actor.sh
index eb47a1c..93a4f8a 100644
--- a/examples/async_cable_route_drq/run_actor.sh
+++ b/examples/async_cable_route_drq/run_actor.sh
@@ -7,10 +7,6 @@ python async_drq_randomized.py "$@" \
     --exp_name=serl_dev_drq_rlpd20demos_cable_random_resnet \
     --seed 0 \
     --random_steps 0 \
-    --training_starts 200 \
-    --critic_actor_ratio 4 \
-    --batch_size 256 \
-    --eval_period 2000 \
     --encoder_type resnet-pretrained \
     --demo_path cable_route_20_demos_2024-01-04_12-10-54.pkl \
     --checkpoint_path /home/undergrad/code/serl_dev/examples/async_cable_route_drq/10x10_30degs_20demos_rand_cable_096 \
diff --git a/examples/async_drq_sim/async_drq_sim.py b/examples/async_drq_sim/async_drq_sim.py
index 29f55ba..a84bc58 100644
--- a/examples/async_drq_sim/async_drq_sim.py
+++ b/examples/async_drq_sim/async_drq_sim.py
@@ -52,7 +52,7 @@
 
 flags.DEFINE_integer("random_steps", 300, "Sample random actions for this many steps.")
 flags.DEFINE_integer("training_starts", 300, "Training starts after this step.")
-flags.DEFINE_integer("steps_per_update", 50, "Number of steps per update the server.")
+flags.DEFINE_integer("steps_per_update", 30, "Number of steps per update the server.")
 
 flags.DEFINE_integer("log_period", 10, "Logging period.")
 flags.DEFINE_integer("eval_period", 2000, "Evaluation period.")
@@ -255,6 +255,14 @@ def stats_callback(type: str, payload: dict) -> dict:
 
     # wait till the replay buffer is filled with enough data
     timer = Timer()
+
+    # show replay buffer progress bar during training
+    pbar = tqdm.tqdm(
+        total=FLAGS.replay_buffer_capacity,
+        initial=len(replay_buffer),
+        desc="replay buffer",
+    )
+
     for step in tqdm.tqdm(range(FLAGS.max_steps), dynamic_ncols=True, desc="learner"):
         # run n-1 critic updates and 1 critic + actor update.
         # This makes training on GPU faster by reducing the large batch transfer time from CPU to GPU
@@ -298,6 +306,7 @@ def stats_callback(type: str, payload: dict) -> dict:
                 FLAGS.checkpoint_path, agent.state, step=update_steps, keep=20
             )
 
+        pbar.update(len(replay_buffer) - pbar.n)  # update replay buffer bar
         update_steps += 1
 
 
@@ -397,7 +406,7 @@ def preload_data_transform(data, metadata) -> Optional[Dict[str, Any]]:
 
     elif FLAGS.actor:
         sampling_rng = jax.device_put(sampling_rng, sharding.replicate())
-        data_store = QueuedDataStore(50000)  # the queue size on the actor
+        data_store = QueuedDataStore(2000)  # the queue size on the actor
 
         # actor loop
         print_green("starting actor loop")
diff --git a/examples/async_drq_sim/run_actor.sh b/examples/async_drq_sim/run_actor.sh
index 1cf4557..52fcfc4 100644
--- a/examples/async_drq_sim/run_actor.sh
+++ b/examples/async_drq_sim/run_actor.sh
@@ -6,9 +6,5 @@ python async_drq_sim.py "$@" \
     --exp_name=serl_dev_drq_sim_test_resnet \
     --seed 0 \
     --random_steps 1000 \
-    --training_starts 1000 \
-    --critic_actor_ratio 4 \
-    --batch_size 256 \
-    --eval_period 2000 \
     --encoder_type resnet-pretrained \
     --debug
diff --git a/examples/async_drq_sim/run_learner.sh b/examples/async_drq_sim/run_learner.sh
index 4836e6c..3944544 100644
--- a/examples/async_drq_sim/run_learner.sh
+++ b/examples/async_drq_sim/run_learner.sh
@@ -4,10 +4,8 @@ python async_drq_sim.py "$@" \
     --learner \
     --exp_name=serl_dev_drq_sim_test_resnet \
     --seed 0 \
-    --random_steps 1000 \
     --training_starts 1000 \
     --critic_actor_ratio 4 \
-    --eval_period 2000 \
     --encoder_type resnet-pretrained \
     # --demo_path franka_lift_cube_image_20_trajs.pkl \
     --debug # wandb is disabled when debug
diff --git a/examples/async_pcb_insert_drq/async_drq_randomized.py b/examples/async_pcb_insert_drq/async_drq_randomized.py
index 9a1a059..8248379 100644
--- a/examples/async_pcb_insert_drq/async_drq_randomized.py
+++ b/examples/async_pcb_insert_drq/async_drq_randomized.py
@@ -57,7 +57,7 @@
 
 flags.DEFINE_integer("random_steps", 300, "Sample random actions for this many steps.")
 flags.DEFINE_integer("training_starts", 300, "Training starts after this step.")
-flags.DEFINE_integer("steps_per_update", 50, "Number of steps per update the server.")
+flags.DEFINE_integer("steps_per_update", 30, "Number of steps per update the server.")
 
 flags.DEFINE_integer("log_period", 10, "Logging period.")
 flags.DEFINE_integer("eval_period", 2000, "Evaluation period.")
@@ -476,7 +476,7 @@ def main(_):
 
     elif FLAGS.actor:
         sampling_rng = jax.device_put(sampling_rng, sharding.replicate())
-        data_store = QueuedDataStore(50000)  # the queue size on the actor
+        data_store = QueuedDataStore(2000)  # the queue size on the actor
 
         # actor loop
         print_green("starting actor loop")
diff --git a/examples/async_pcb_insert_drq/run_actor.sh b/examples/async_pcb_insert_drq/run_actor.sh
index 41596b0..64153ab 100644
--- a/examples/async_pcb_insert_drq/run_actor.sh
+++ b/examples/async_pcb_insert_drq/run_actor.sh
@@ -7,10 +7,6 @@ python async_drq_randomized.py "$@" \
     --exp_name=serl_dev_drq_rlpd10demos_peg_insert_random_resnet \
     --seed 0 \
     --random_steps 0 \
-    --training_starts 200 \
-    --critic_actor_ratio 4 \
-    --batch_size 256 \
-    --eval_period 2000 \
     --encoder_type resnet-pretrained \
     --demo_path pcb_insert_20_demos_2023-12-27_19-40-50.pkl \
     --checkpoint_path /home/undergrad/code/serl_dev/examples/async_pcb_insert_drq/5x5_20degs_20demos_rand_pcb_insert_096 \
diff --git a/examples/async_peg_insert_drq/async_drq_randomized.py b/examples/async_peg_insert_drq/async_drq_randomized.py
index 81cc4a8..4fd76f0 100644
--- a/examples/async_peg_insert_drq/async_drq_randomized.py
+++ b/examples/async_peg_insert_drq/async_drq_randomized.py
@@ -52,7 +52,7 @@
 
 flags.DEFINE_integer("random_steps", 300, "Sample random actions for this many steps.")
 flags.DEFINE_integer("training_starts", 300, "Training starts after this step.")
-flags.DEFINE_integer("steps_per_update", 50, "Number of steps per update the server.")
+flags.DEFINE_integer("steps_per_update", 30, "Number of steps per update the server.")
 
 flags.DEFINE_integer("log_period", 10, "Logging period.")
 flags.DEFINE_integer("eval_period", 2000, "Evaluation period.")
@@ -383,7 +383,7 @@ def main(_):
 
     elif FLAGS.actor:
         sampling_rng = jax.device_put(sampling_rng, sharding.replicate())
-        data_store = QueuedDataStore(50000)  # the queue size on the actor
+        data_store = QueuedDataStore(2000)  # the queue size on the actor
 
         # actor loop
         print_green("starting actor loop")
diff --git a/examples/async_peg_insert_drq/run_actor.sh b/examples/async_peg_insert_drq/run_actor.sh
index 9b34b2a..a251e75 100644
--- a/examples/async_peg_insert_drq/run_actor.sh
+++ b/examples/async_peg_insert_drq/run_actor.sh
@@ -8,11 +8,5 @@ python async_drq_randomized.py "$@" \
     --seed 0 \
     --random_steps 0 \
     --training_starts 200 \
-    --critic_actor_ratio 4 \
-    --batch_size 256 \
-    --eval_period 2000 \
     --encoder_type resnet-pretrained \
     --demo_path peg_insert_20_demos_2023-12-25_16-13-25.pkl \
-    # --checkpoint_path /home/undergrad/code/serl_dev/examples/async_pcb_insert_drq/5x5_20degs_100demos_rand_pcb_insert_bc \
-    # --eval_checkpoint_step 20000 \
-    # --eval_n_trajs 100 \
diff --git a/examples/async_sac_state_sim/async_sac_state_sim.py b/examples/async_sac_state_sim/async_sac_state_sim.py
index 5994208..90a1acf 100644
--- a/examples/async_sac_state_sim/async_sac_state_sim.py
+++ b/examples/async_sac_state_sim/async_sac_state_sim.py
@@ -43,7 +43,7 @@
 
 flags.DEFINE_integer("random_steps", 300, "Sample random actions for this many steps.")
 flags.DEFINE_integer("training_starts", 300, "Training starts after this step.")
-flags.DEFINE_integer("steps_per_update", 50, "Number of steps per update the server.")
+flags.DEFINE_integer("steps_per_update", 30, "Number of steps per update the server.")
 
 flags.DEFINE_integer("log_period", 10, "Logging period.")
 flags.DEFINE_integer("eval_period", 2000, "Evaluation period.")
@@ -214,6 +214,14 @@ def stats_callback(type: str, payload: dict) -> dict:
 
     # wait till the replay buffer is filled with enough data
     timer = Timer()
+
+    # show replay buffer progress bar during training
+    pbar = tqdm.tqdm(
+        total=FLAGS.replay_buffer_capacity,
+        initial=len(replay_buffer),
+        desc="replay buffer",
+    )
+
     for step in tqdm.tqdm(range(FLAGS.max_steps), dynamic_ncols=True, desc="learner"):
         # Train the networks
         with timer.context("sample_replay_buffer"):
@@ -236,6 +244,7 @@ def stats_callback(type: str, payload: dict) -> dict:
                 FLAGS.checkpoint_path, agent.state, step=update_steps, keep=20
             )
 
+        pbar.update(len(replay_buffer) - pbar.n)  # update replay buffer bar
         update_steps += 1
 
 
@@ -299,7 +308,7 @@ def main(_):
 
     elif FLAGS.actor:
         sampling_rng = jax.device_put(sampling_rng, sharding.replicate())
-        data_store = QueuedDataStore(50000)  # the queue size on the actor
+        data_store = QueuedDataStore(2000)  # the queue size on the actor
 
         # actor loop
         print_green("starting actor loop")
diff --git a/examples/async_sac_state_sim/run_actor.sh b/examples/async_sac_state_sim/run_actor.sh
index ce4ff87..5767791 100644
--- a/examples/async_sac_state_sim/run_actor.sh
+++ b/examples/async_sac_state_sim/run_actor.sh
@@ -7,8 +7,4 @@ python async_sac_state_sim.py "$@" \
     --exp_name=serl_dev_sim_test \
     --seed 0 \
     --random_steps 1000 \
-    --training_starts 1000 \
-    --critic_actor_ratio 8 \
-    --batch_size 256 \
-    --eval_period 2000 \
     --debug
diff --git a/examples/async_sac_state_sim/run_learner.sh b/examples/async_sac_state_sim/run_learner.sh
index f5a2cb8..10a203c 100644
--- a/examples/async_sac_state_sim/run_learner.sh
+++ b/examples/async_sac_state_sim/run_learner.sh
@@ -5,9 +5,7 @@ python async_sac_state_sim.py "$@" \
     --env PandaPickCube-v0 \
     --exp_name=serl_dev_sim_test \
     --seed 0 \
-    --random_steps 1000 \
     --training_starts 1000 \
     --critic_actor_ratio 8 \
     --batch_size 256 \
-    --eval_period 2000 \
     --debug # wandb is disabled when debug
diff --git a/serl_launcher/serl_launcher/utils/launcher.py b/serl_launcher/serl_launcher/utils/launcher.py
index 39740c5..99d5a61 100644
--- a/serl_launcher/serl_launcher/utils/launcher.py
+++ b/serl_launcher/serl_launcher/utils/launcher.py
@@ -173,6 +173,7 @@ def make_trainer_config(port_number: int = 5488, broadcast_port: int = 5489):
         port_number=port_number,
         broadcast_port=broadcast_port,
         request_types=["send-stats"],
+        # experimental_pipeline_url="tcp://127.0.0.1:5547", # experimental ds update
     )
 
 
diff --git a/serl_launcher/setup.py b/serl_launcher/setup.py
index 0125bf0..26658d6 100644
--- a/serl_launcher/setup.py
+++ b/serl_launcher/setup.py
@@ -13,7 +13,7 @@
         "typing_extensions",
         "opencv-python",
         "lz4",
-        "agentlace@git+https://github.com/youliangtan/agentlace.git@b9be677d5d20772fca98c8be44777ecb7111bc59",
+        "agentlace@git+https://github.com/youliangtan/agentlace.git@f025024631db0992a90085ee4637d8c0c90da317",
     ],
     packages=find_packages(),
     zip_safe=False,

From 0f962cb785c1de170705ac15d60ca9ef6a0f525e Mon Sep 17 00:00:00 2001
From: youliangtan <tan_you_liang@hotmail.com>
Date: Mon, 17 Jun 2024 11:04:52 -0700
Subject: [PATCH 2/2] bump up version

Signed-off-by: youliangtan <tan_you_liang@hotmail.com>
---
 serl_launcher/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/serl_launcher/setup.py b/serl_launcher/setup.py
index 26658d6..fa5dc0a 100644
--- a/serl_launcher/setup.py
+++ b/serl_launcher/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name="serl_launcher",
-    version="0.1.2",
+    version="0.1.3",
     description="library for rl experiments",
     url="https://github.com/rail-berkeley/serl",
     author="auth",