Skip to content

Commit

Permalink
fix the naming for utd_ratio
Browse files Browse the repository at this point in the history
  • Loading branch information
jianlanluo committed Jun 13, 2024
1 parent 20e27b4 commit 6f23843
Show file tree
Hide file tree
Showing 19 changed files with 25 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
flags.DEFINE_integer("seed", 42, "Random seed.")
flags.DEFINE_bool("save_model", False, "Whether to save model.")
flags.DEFINE_integer("batch_size", 256, "Batch size.")
flags.DEFINE_integer("utd_ratio", 4, "UTD ratio.")
flags.DEFINE_integer("critic_actor_ratio", 4, "critic to actor update ratio.")

flags.DEFINE_integer("max_steps", 1000000, "Maximum number of training steps.")
flags.DEFINE_integer("replay_buffer_capacity", 200000, "Replay buffer capacity.")
Expand Down Expand Up @@ -374,7 +374,7 @@ def stats_callback(type: str, payload: dict) -> dict:
continue
# run n-1 critic updates and 1 critic + actor update.
# This makes training on GPU faster by reducing the large batch transfer time from CPU to GPU
for critic_step in range(FLAGS.utd_ratio - 1):
for critic_step in range(FLAGS.critic_actor_ratio - 1):
with timer.context("sample_replay_buffer"):
batch = next(replay_iterator)
demo_batch = next(demo_iterator)
Expand Down
2 changes: 1 addition & 1 deletion examples/async_bin_relocation_fwbw_drq/run_actor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ python async_drq_randomized.py "$@" \
--seed 0 \
--random_steps 200 \
--training_starts 200 \
--utd_ratio 4 \
--critic_actor_ratio 4 \
--batch_size 256 \
--eval_period 2000 \
--encoder_type resnet-pretrained \
Expand Down
2 changes: 1 addition & 1 deletion examples/async_bin_relocation_fwbw_drq/run_bw_learner.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ python async_drq_randomized.py "$@" \
--seed 0 \
--random_steps 200 \
--training_starts 200 \
--utd_ratio 4 \
--critic_actor_ratio 4 \
--batch_size 256 \
--eval_period 2000 \
--encoder_type resnet-pretrained \
Expand Down
2 changes: 1 addition & 1 deletion examples/async_bin_relocation_fwbw_drq/run_fw_learner.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ python async_drq_randomized.py "$@" \
--seed 0 \
--random_steps 200 \
--training_starts 200 \
--utd_ratio 4 \
--critic_actor_ratio 4 \
--batch_size 256 \
--eval_period 2000 \
--encoder_type resnet-pretrained \
Expand Down
4 changes: 2 additions & 2 deletions examples/async_cable_route_drq/async_drq_randomized.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
flags.DEFINE_integer("seed", 42, "Random seed.")
flags.DEFINE_bool("save_model", False, "Whether to save model.")
flags.DEFINE_integer("batch_size", 256, "Batch size.")
flags.DEFINE_integer("utd_ratio", 4, "UTD ratio.")
flags.DEFINE_integer("critic_actor_ratio", 4, "critic to actor update ratio.")

flags.DEFINE_integer("max_steps", 1000000, "Maximum number of training steps.")
flags.DEFINE_integer("replay_buffer_capacity", 200000, "Replay buffer capacity.")
Expand Down Expand Up @@ -285,7 +285,7 @@ def stats_callback(type: str, payload: dict) -> dict:
for step in tqdm.tqdm(range(FLAGS.max_steps), dynamic_ncols=True, desc="learner"):
# run n-1 critic updates and 1 critic + actor update.
# This makes training on GPU faster by reducing the large batch transfer time from CPU to GPU
for critic_step in range(FLAGS.utd_ratio - 1):
for critic_step in range(FLAGS.critic_actor_ratio - 1):
with timer.context("sample_replay_buffer"):
batch = next(replay_iterator)
demo_batch = next(demo_iterator)
Expand Down
2 changes: 1 addition & 1 deletion examples/async_cable_route_drq/run_actor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ python async_drq_randomized.py "$@" \
--seed 0 \
--random_steps 0 \
--training_starts 200 \
--utd_ratio 4 \
--critic_actor_ratio 4 \
--batch_size 256 \
--eval_period 2000 \
--encoder_type resnet-pretrained \
Expand Down
2 changes: 1 addition & 1 deletion examples/async_cable_route_drq/run_learner.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ python async_drq_randomized.py "$@" \
--seed 0 \
--random_steps 600 \
--training_starts 200 \
--utd_ratio 4 \
--critic_actor_ratio 4 \
--batch_size 256 \
--eval_period 2000 \
--encoder_type resnet-pretrained \
Expand Down
4 changes: 2 additions & 2 deletions examples/async_drq_sim/async_drq_sim.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
flags.DEFINE_integer("seed", 42, "Random seed.")
flags.DEFINE_bool("save_model", False, "Whether to save model.")
flags.DEFINE_integer("batch_size", 256, "Batch size.")
flags.DEFINE_integer("utd_ratio", 4, "UTD ratio.")
flags.DEFINE_integer("critic_actor_ratio", 4, "critic to actor update ratio.")

flags.DEFINE_integer("max_steps", 1000000, "Maximum number of training steps.")
flags.DEFINE_integer("replay_buffer_capacity", 200000, "Replay buffer capacity.")
Expand Down Expand Up @@ -258,7 +258,7 @@ def stats_callback(type: str, payload: dict) -> dict:
for step in tqdm.tqdm(range(FLAGS.max_steps), dynamic_ncols=True, desc="learner"):
# run n-1 critic updates and 1 critic + actor update.
# This makes training on GPU faster by reducing the large batch transfer time from CPU to GPU
for critic_step in range(FLAGS.utd_ratio - 1):
for critic_step in range(FLAGS.critic_actor_ratio - 1):
with timer.context("sample_replay_buffer"):
batch = next(replay_iterator)

Expand Down
2 changes: 1 addition & 1 deletion examples/async_drq_sim/run_actor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ python async_drq_sim.py "$@" \
--seed 0 \
--random_steps 1000 \
--training_starts 1000 \
--utd_ratio 4 \
--critic_actor_ratio 4 \
--batch_size 256 \
--eval_period 2000 \
--encoder_type resnet-pretrained \
Expand Down
2 changes: 1 addition & 1 deletion examples/async_drq_sim/run_learner.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ python async_drq_sim.py "$@" \
--seed 0 \
--random_steps 1000 \
--training_starts 1000 \
--utd_ratio 4 \
--critic_actor_ratio 4 \
--eval_period 2000 \
--encoder_type resnet-pretrained \
# --demo_path franka_lift_cube_image_20_trajs.pkl \
Expand Down
4 changes: 2 additions & 2 deletions examples/async_pcb_insert_drq/async_drq_randomized.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
flags.DEFINE_integer("seed", 42, "Random seed.")
flags.DEFINE_bool("save_model", False, "Whether to save model.")
flags.DEFINE_integer("batch_size", 256, "Batch size.")
flags.DEFINE_integer("utd_ratio", 4, "UTD ratio.")
flags.DEFINE_integer("critic_actor_ratio", 4, "critic to actor update ratio.")

flags.DEFINE_integer("max_steps", 1000000, "Maximum number of training steps.")
flags.DEFINE_integer("replay_buffer_capacity", 200000, "Replay buffer capacity.")
Expand Down Expand Up @@ -341,7 +341,7 @@ def stats_callback(type: str, payload: dict) -> dict:
for step in tqdm.tqdm(range(FLAGS.max_steps), dynamic_ncols=True, desc="learner"):
# run n-1 critic updates and 1 critic + actor update.
# This makes training on GPU faster by reducing the large batch transfer time from CPU to GPU
for critic_step in range(FLAGS.utd_ratio - 1):
for critic_step in range(FLAGS.critic_actor_ratio - 1):
with timer.context("sample_replay_buffer"):
batch = next(replay_iterator)
demo_batch = next(demo_iterator)
Expand Down
2 changes: 1 addition & 1 deletion examples/async_pcb_insert_drq/run_actor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ python async_drq_randomized.py "$@" \
--seed 0 \
--random_steps 0 \
--training_starts 200 \
--utd_ratio 4 \
--critic_actor_ratio 4 \
--batch_size 256 \
--eval_period 2000 \
--encoder_type resnet-pretrained \
Expand Down
2 changes: 1 addition & 1 deletion examples/async_pcb_insert_drq/run_learner.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ python async_drq_randomized.py "$@" \
--seed 0 \
--random_steps 1000 \
--training_starts 200 \
--utd_ratio 4 \
--critic_actor_ratio 4 \
--batch_size 256 \
--eval_period 2000 \
--encoder_type resnet-pretrained \
Expand Down
5 changes: 2 additions & 3 deletions examples/async_peg_insert_drq/async_drq_randomized.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,7 @@
flags.DEFINE_integer("max_traj_length", 100, "Maximum length of trajectory.")
flags.DEFINE_integer("seed", 42, "Random seed.")
flags.DEFINE_bool("save_model", False, "Whether to save model.")
flags.DEFINE_integer("batch_size", 256, "Batch size.")
flags.DEFINE_integer("utd_ratio", 4, "UTD ratio.")
flags.DEFINE_integer("critic_actor_ratio", 4, "critic to actor update ratio.")

flags.DEFINE_integer("max_steps", 1000000, "Maximum number of training steps.")
flags.DEFINE_integer("replay_buffer_capacity", 200000, "Replay buffer capacity.")
Expand Down Expand Up @@ -276,7 +275,7 @@ def stats_callback(type: str, payload: dict) -> dict:
for step in tqdm.tqdm(range(FLAGS.max_steps), dynamic_ncols=True, desc="learner"):
# run n-1 critic updates and 1 critic + actor update.
# This makes training on GPU faster by reducing the large batch transfer time from CPU to GPU
for critic_step in range(FLAGS.utd_ratio - 1):
for critic_step in range(FLAGS.critic_actor_ratio - 1):
with timer.context("sample_replay_buffer"):
batch = next(replay_iterator)
demo_batch = next(demo_iterator)
Expand Down
2 changes: 1 addition & 1 deletion examples/async_peg_insert_drq/run_actor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ python async_drq_randomized.py "$@" \
--seed 0 \
--random_steps 0 \
--training_starts 200 \
--utd_ratio 4 \
--critic_actor_ratio 4 \
--batch_size 256 \
--eval_period 2000 \
--encoder_type resnet-pretrained \
Expand Down
2 changes: 1 addition & 1 deletion examples/async_peg_insert_drq/run_learner.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ python async_drq_randomized.py "$@" \
--seed 0 \
--random_steps 1000 \
--training_starts 200 \
--utd_ratio 4 \
--critic_actor_ratio 4 \
--batch_size 256 \
--eval_period 2000 \
--encoder_type resnet-pretrained \
Expand Down
4 changes: 2 additions & 2 deletions examples/async_sac_state_sim/async_sac_state_sim.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
flags.DEFINE_integer("seed", 42, "Random seed.")
flags.DEFINE_bool("save_model", False, "Whether to save model.")
flags.DEFINE_integer("batch_size", 256, "Batch size.")
flags.DEFINE_integer("utd_ratio", 8, "UTD ratio.")
flags.DEFINE_integer("critic_actor_ratio", 8, "critic to actor update ratio.")

flags.DEFINE_integer("max_steps", 1000000, "Maximum number of training steps.")
flags.DEFINE_integer("replay_buffer_capacity", 1000000, "Replay buffer capacity.")
Expand Down Expand Up @@ -284,7 +284,7 @@ def main(_):
)
replay_iterator = replay_buffer.get_iterator(
sample_args={
"batch_size": FLAGS.batch_size * FLAGS.utd_ratio,
"batch_size": FLAGS.batch_size * FLAGS.critic_actor_ratio,
},
device=sharding.replicate(),
)
Expand Down
2 changes: 1 addition & 1 deletion examples/async_sac_state_sim/run_actor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ python async_sac_state_sim.py "$@" \
--seed 0 \
--random_steps 1000 \
--training_starts 1000 \
--utd_ratio 8 \
--critic_actor_ratio 8 \
--batch_size 256 \
--eval_period 2000 \
--debug
2 changes: 1 addition & 1 deletion examples/async_sac_state_sim/run_learner.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ python async_sac_state_sim.py "$@" \
--seed 0 \
--random_steps 1000 \
--training_starts 1000 \
--utd_ratio 8 \
--critic_actor_ratio 8 \
--batch_size 256 \
--eval_period 2000 \
--debug # wandb is disabled when debug

0 comments on commit 6f23843

Please sign in to comment.