fix the naming for utd_ratio

rail-berkeley · Jun 13, 2024 · 6f23843 · 6f23843
1 parent 20e27b4
commit 6f23843
Show file tree

Hide file tree

Showing 19 changed files with 25 additions and 26 deletions.
diff --git a/examples/async_bin_relocation_fwbw_drq/async_drq_randomized.py b/examples/async_bin_relocation_fwbw_drq/async_drq_randomized.py
@@ -49,7 +49,7 @@
 flags.DEFINE_integer("seed", 42, "Random seed.")
 flags.DEFINE_bool("save_model", False, "Whether to save model.")
 flags.DEFINE_integer("batch_size", 256, "Batch size.")
-flags.DEFINE_integer("utd_ratio", 4, "UTD ratio.")
+flags.DEFINE_integer("critic_actor_ratio", 4, "critic to actor update ratio.")
 
 flags.DEFINE_integer("max_steps", 1000000, "Maximum number of training steps.")
 flags.DEFINE_integer("replay_buffer_capacity", 200000, "Replay buffer capacity.")
@@ -374,7 +374,7 @@ def stats_callback(type: str, payload: dict) -> dict:
             continue
         # run n-1 critic updates and 1 critic + actor update.
         # This makes training on GPU faster by reducing the large batch transfer time from CPU to GPU
-        for critic_step in range(FLAGS.utd_ratio - 1):
+        for critic_step in range(FLAGS.critic_actor_ratio - 1):
             with timer.context("sample_replay_buffer"):
                 batch = next(replay_iterator)
                 demo_batch = next(demo_iterator)

diff --git a/examples/async_bin_relocation_fwbw_drq/run_actor.sh b/examples/async_bin_relocation_fwbw_drq/run_actor.sh
@@ -8,7 +8,7 @@ python async_drq_randomized.py "$@" \
     --seed 0 \
     --random_steps 200 \
     --training_starts 200 \
-    --utd_ratio 4 \
+    --critic_actor_ratio 4 \
     --batch_size 256 \
     --eval_period 2000 \
     --encoder_type resnet-pretrained \

diff --git a/examples/async_bin_relocation_fwbw_drq/run_bw_learner.sh b/examples/async_bin_relocation_fwbw_drq/run_bw_learner.sh
@@ -7,7 +7,7 @@ python async_drq_randomized.py "$@" \
     --seed 0 \
     --random_steps 200 \
     --training_starts 200 \
-    --utd_ratio 4 \
+    --critic_actor_ratio 4 \
     --batch_size 256 \
     --eval_period 2000 \
     --encoder_type resnet-pretrained \

diff --git a/examples/async_bin_relocation_fwbw_drq/run_fw_learner.sh b/examples/async_bin_relocation_fwbw_drq/run_fw_learner.sh
@@ -7,7 +7,7 @@ python async_drq_randomized.py "$@" \
     --seed 0 \
     --random_steps 200 \
     --training_starts 200 \
-    --utd_ratio 4 \
+    --critic_actor_ratio 4 \
     --batch_size 256 \
     --eval_period 2000 \
     --encoder_type resnet-pretrained \

diff --git a/examples/async_cable_route_drq/async_drq_randomized.py b/examples/async_cable_route_drq/async_drq_randomized.py
@@ -49,7 +49,7 @@
 flags.DEFINE_integer("seed", 42, "Random seed.")
 flags.DEFINE_bool("save_model", False, "Whether to save model.")
 flags.DEFINE_integer("batch_size", 256, "Batch size.")
-flags.DEFINE_integer("utd_ratio", 4, "UTD ratio.")
+flags.DEFINE_integer("critic_actor_ratio", 4, "critic to actor update ratio.")
 
 flags.DEFINE_integer("max_steps", 1000000, "Maximum number of training steps.")
 flags.DEFINE_integer("replay_buffer_capacity", 200000, "Replay buffer capacity.")
@@ -285,7 +285,7 @@ def stats_callback(type: str, payload: dict) -> dict:
     for step in tqdm.tqdm(range(FLAGS.max_steps), dynamic_ncols=True, desc="learner"):
         # run n-1 critic updates and 1 critic + actor update.
         # This makes training on GPU faster by reducing the large batch transfer time from CPU to GPU
-        for critic_step in range(FLAGS.utd_ratio - 1):
+        for critic_step in range(FLAGS.critic_actor_ratio - 1):
             with timer.context("sample_replay_buffer"):
                 batch = next(replay_iterator)
                 demo_batch = next(demo_iterator)

diff --git a/examples/async_cable_route_drq/run_actor.sh b/examples/async_cable_route_drq/run_actor.sh
@@ -8,7 +8,7 @@ python async_drq_randomized.py "$@" \
     --seed 0 \
     --random_steps 0 \
     --training_starts 200 \
-    --utd_ratio 4 \
+    --critic_actor_ratio 4 \
     --batch_size 256 \
     --eval_period 2000 \
     --encoder_type resnet-pretrained \

diff --git a/examples/async_cable_route_drq/run_learner.sh b/examples/async_cable_route_drq/run_learner.sh
@@ -7,7 +7,7 @@ python async_drq_randomized.py "$@" \
     --seed 0 \
     --random_steps 600 \
     --training_starts 200 \
-    --utd_ratio 4 \
+    --critic_actor_ratio 4 \
     --batch_size 256 \
     --eval_period 2000 \
     --encoder_type resnet-pretrained \

diff --git a/examples/async_drq_sim/async_drq_sim.py b/examples/async_drq_sim/async_drq_sim.py
@@ -45,7 +45,7 @@
 flags.DEFINE_integer("seed", 42, "Random seed.")
 flags.DEFINE_bool("save_model", False, "Whether to save model.")
 flags.DEFINE_integer("batch_size", 256, "Batch size.")
-flags.DEFINE_integer("utd_ratio", 4, "UTD ratio.")
+flags.DEFINE_integer("critic_actor_ratio", 4, "critic to actor update ratio.")
 
 flags.DEFINE_integer("max_steps", 1000000, "Maximum number of training steps.")
 flags.DEFINE_integer("replay_buffer_capacity", 200000, "Replay buffer capacity.")
@@ -258,7 +258,7 @@ def stats_callback(type: str, payload: dict) -> dict:
     for step in tqdm.tqdm(range(FLAGS.max_steps), dynamic_ncols=True, desc="learner"):
         # run n-1 critic updates and 1 critic + actor update.
         # This makes training on GPU faster by reducing the large batch transfer time from CPU to GPU
-        for critic_step in range(FLAGS.utd_ratio - 1):
+        for critic_step in range(FLAGS.critic_actor_ratio - 1):
             with timer.context("sample_replay_buffer"):
                 batch = next(replay_iterator)
 

diff --git a/examples/async_drq_sim/run_actor.sh b/examples/async_drq_sim/run_actor.sh
@@ -7,7 +7,7 @@ python async_drq_sim.py "$@" \
     --seed 0 \
     --random_steps 1000 \
     --training_starts 1000 \
-    --utd_ratio 4 \
+    --critic_actor_ratio 4 \
     --batch_size 256 \
     --eval_period 2000 \
     --encoder_type resnet-pretrained \

diff --git a/examples/async_drq_sim/run_learner.sh b/examples/async_drq_sim/run_learner.sh
@@ -6,7 +6,7 @@ python async_drq_sim.py "$@" \
     --seed 0 \
     --random_steps 1000 \
     --training_starts 1000 \
-    --utd_ratio 4 \
+    --critic_actor_ratio 4 \
     --eval_period 2000 \
     --encoder_type resnet-pretrained \
     # --demo_path franka_lift_cube_image_20_trajs.pkl \

diff --git a/examples/async_pcb_insert_drq/async_drq_randomized.py b/examples/async_pcb_insert_drq/async_drq_randomized.py
@@ -50,7 +50,7 @@
 flags.DEFINE_integer("seed", 42, "Random seed.")
 flags.DEFINE_bool("save_model", False, "Whether to save model.")
 flags.DEFINE_integer("batch_size", 256, "Batch size.")
-flags.DEFINE_integer("utd_ratio", 4, "UTD ratio.")
+flags.DEFINE_integer("critic_actor_ratio", 4, "critic to actor update ratio.")
 
 flags.DEFINE_integer("max_steps", 1000000, "Maximum number of training steps.")
 flags.DEFINE_integer("replay_buffer_capacity", 200000, "Replay buffer capacity.")
@@ -341,7 +341,7 @@ def stats_callback(type: str, payload: dict) -> dict:
     for step in tqdm.tqdm(range(FLAGS.max_steps), dynamic_ncols=True, desc="learner"):
         # run n-1 critic updates and 1 critic + actor update.
         # This makes training on GPU faster by reducing the large batch transfer time from CPU to GPU
-        for critic_step in range(FLAGS.utd_ratio - 1):
+        for critic_step in range(FLAGS.critic_actor_ratio - 1):
             with timer.context("sample_replay_buffer"):
                 batch = next(replay_iterator)
                 demo_batch = next(demo_iterator)

diff --git a/examples/async_pcb_insert_drq/run_actor.sh b/examples/async_pcb_insert_drq/run_actor.sh
@@ -8,7 +8,7 @@ python async_drq_randomized.py "$@" \
     --seed 0 \
     --random_steps 0 \
     --training_starts 200 \
-    --utd_ratio 4 \
+    --critic_actor_ratio 4 \
     --batch_size 256 \
     --eval_period 2000 \
     --encoder_type resnet-pretrained \

diff --git a/examples/async_pcb_insert_drq/run_learner.sh b/examples/async_pcb_insert_drq/run_learner.sh
@@ -7,7 +7,7 @@ python async_drq_randomized.py "$@" \
     --seed 0 \
     --random_steps 1000 \
     --training_starts 200 \
-    --utd_ratio 4 \
+    --critic_actor_ratio 4 \
     --batch_size 256 \
     --eval_period 2000 \
     --encoder_type resnet-pretrained \

diff --git a/examples/async_peg_insert_drq/async_drq_randomized.py b/examples/async_peg_insert_drq/async_drq_randomized.py
@@ -45,8 +45,7 @@
 flags.DEFINE_integer("max_traj_length", 100, "Maximum length of trajectory.")
 flags.DEFINE_integer("seed", 42, "Random seed.")
 flags.DEFINE_bool("save_model", False, "Whether to save model.")
-flags.DEFINE_integer("batch_size", 256, "Batch size.")
-flags.DEFINE_integer("utd_ratio", 4, "UTD ratio.")
+flags.DEFINE_integer("critic_actor_ratio", 4, "critic to actor update ratio.")
 
 flags.DEFINE_integer("max_steps", 1000000, "Maximum number of training steps.")
 flags.DEFINE_integer("replay_buffer_capacity", 200000, "Replay buffer capacity.")
@@ -276,7 +275,7 @@ def stats_callback(type: str, payload: dict) -> dict:
     for step in tqdm.tqdm(range(FLAGS.max_steps), dynamic_ncols=True, desc="learner"):
         # run n-1 critic updates and 1 critic + actor update.
         # This makes training on GPU faster by reducing the large batch transfer time from CPU to GPU
-        for critic_step in range(FLAGS.utd_ratio - 1):
+        for critic_step in range(FLAGS.critic_actor_ratio - 1):
             with timer.context("sample_replay_buffer"):
                 batch = next(replay_iterator)
                 demo_batch = next(demo_iterator)

diff --git a/examples/async_peg_insert_drq/run_actor.sh b/examples/async_peg_insert_drq/run_actor.sh
@@ -8,7 +8,7 @@ python async_drq_randomized.py "$@" \
     --seed 0 \
     --random_steps 0 \
     --training_starts 200 \
-    --utd_ratio 4 \
+    --critic_actor_ratio 4 \
     --batch_size 256 \
     --eval_period 2000 \
     --encoder_type resnet-pretrained \

diff --git a/examples/async_peg_insert_drq/run_learner.sh b/examples/async_peg_insert_drq/run_learner.sh
@@ -7,7 +7,7 @@ python async_drq_randomized.py "$@" \
     --seed 0 \
     --random_steps 1000 \
     --training_starts 200 \
-    --utd_ratio 4 \
+    --critic_actor_ratio 4 \
     --batch_size 256 \
     --eval_period 2000 \
     --encoder_type resnet-pretrained \

diff --git a/examples/async_sac_state_sim/async_sac_state_sim.py b/examples/async_sac_state_sim/async_sac_state_sim.py
@@ -36,7 +36,7 @@
 flags.DEFINE_integer("seed", 42, "Random seed.")
 flags.DEFINE_bool("save_model", False, "Whether to save model.")
 flags.DEFINE_integer("batch_size", 256, "Batch size.")
-flags.DEFINE_integer("utd_ratio", 8, "UTD ratio.")
+flags.DEFINE_integer("critic_actor_ratio", 8, "critic to actor update ratio.")
 
 flags.DEFINE_integer("max_steps", 1000000, "Maximum number of training steps.")
 flags.DEFINE_integer("replay_buffer_capacity", 1000000, "Replay buffer capacity.")
@@ -284,7 +284,7 @@ def main(_):
         )
         replay_iterator = replay_buffer.get_iterator(
             sample_args={
-                "batch_size": FLAGS.batch_size * FLAGS.utd_ratio,
+                "batch_size": FLAGS.batch_size * FLAGS.critic_actor_ratio,
             },
             device=sharding.replicate(),
         )

diff --git a/examples/async_sac_state_sim/run_actor.sh b/examples/async_sac_state_sim/run_actor.sh
@@ -8,7 +8,7 @@ python async_sac_state_sim.py "$@" \
     --seed 0 \
     --random_steps 1000 \
     --training_starts 1000 \
-    --utd_ratio 8 \
+    --critic_actor_ratio 8 \
     --batch_size 256 \
     --eval_period 2000 \
     --debug
diff --git a/examples/async_sac_state_sim/run_learner.sh b/examples/async_sac_state_sim/run_learner.sh
@@ -7,7 +7,7 @@ python async_sac_state_sim.py "$@" \
     --seed 0 \
     --random_steps 1000 \
     --training_starts 1000 \
-    --utd_ratio 8 \
+    --critic_actor_ratio 8 \
     --batch_size 256 \
     --eval_period 2000 \
     --debug # wandb is disabled when debug