From b7e0fa4f955dddb1f5b3eabb42f5885ccc22c974 Mon Sep 17 00:00:00 2001 From: youliangtan Date: Mon, 17 Jun 2024 11:02:14 -0700 Subject: [PATCH 1/2] fix rare duplicated data buffer entry Signed-off-by: youliangtan --- .../async_drq_randomized.py | 4 ++-- examples/async_bin_relocation_fwbw_drq/run_actor.sh | 6 +----- .../async_cable_route_drq/async_drq_randomized.py | 4 ++-- examples/async_cable_route_drq/run_actor.sh | 4 ---- examples/async_drq_sim/async_drq_sim.py | 13 +++++++++++-- examples/async_drq_sim/run_actor.sh | 4 ---- examples/async_drq_sim/run_learner.sh | 2 -- .../async_pcb_insert_drq/async_drq_randomized.py | 4 ++-- examples/async_pcb_insert_drq/run_actor.sh | 4 ---- .../async_peg_insert_drq/async_drq_randomized.py | 4 ++-- examples/async_peg_insert_drq/run_actor.sh | 6 ------ examples/async_sac_state_sim/async_sac_state_sim.py | 13 +++++++++++-- examples/async_sac_state_sim/run_actor.sh | 4 ---- examples/async_sac_state_sim/run_learner.sh | 2 -- serl_launcher/serl_launcher/utils/launcher.py | 1 + serl_launcher/setup.py | 2 +- 16 files changed, 33 insertions(+), 44 deletions(-) diff --git a/examples/async_bin_relocation_fwbw_drq/async_drq_randomized.py b/examples/async_bin_relocation_fwbw_drq/async_drq_randomized.py index c5fb76e..78a8eee 100644 --- a/examples/async_bin_relocation_fwbw_drq/async_drq_randomized.py +++ b/examples/async_bin_relocation_fwbw_drq/async_drq_randomized.py @@ -56,7 +56,7 @@ flags.DEFINE_integer("random_steps", 300, "Sample random actions for this many steps.") flags.DEFINE_integer("training_starts", 300, "Training starts after this step.") -flags.DEFINE_integer("steps_per_update", 50, "Number of steps per update the server.") +flags.DEFINE_integer("steps_per_update", 30, "Number of steps per update the server.") flags.DEFINE_integer("log_period", 10, "Logging period.") flags.DEFINE_integer("eval_period", 2000, "Evaluation period.") @@ -537,7 +537,7 @@ def main(_): elif FLAGS.actor: sampling_rng = jax.device_put(sampling_rng, sharding.replicate()) data_stores = OrderedDict( - {name: QueuedDataStore(50000) for name in id_to_task.values()} + {name: QueuedDataStore(2000) for name in id_to_task.values()} ) # actor loop print_green("starting actor loop") diff --git a/examples/async_bin_relocation_fwbw_drq/run_actor.sh b/examples/async_bin_relocation_fwbw_drq/run_actor.sh index a887cd2..5f25bc1 100644 --- a/examples/async_bin_relocation_fwbw_drq/run_actor.sh +++ b/examples/async_bin_relocation_fwbw_drq/run_actor.sh @@ -7,10 +7,6 @@ python async_drq_randomized.py "$@" \ --exp_name=serl_dev_drq_rlpd20demos_bin_fwbw_resnet_096 \ --seed 0 \ --random_steps 200 \ - --training_starts 200 \ - --critic_actor_ratio 4 \ - --batch_size 256 \ - --eval_period 2000 \ --encoder_type resnet-pretrained \ --demo_path fw_bin_2000_demo_2024-01-23_18-49-56.pkl \ --fw_ckpt_path /home/undergrad/code/serl_dev/examples/async_bin_relocation_fwbw_drq/bin_fw_096 \ @@ -18,4 +14,4 @@ python async_drq_randomized.py "$@" \ --fw_reward_classifier_ckpt_path "/home/undergrad/code/serl_dev/examples/async_bin_relocation_fwbw_drq/fw_classifier_ckpt" \ --bw_reward_classifier_ckpt_path "/home/undergrad/code/serl_dev/examples/async_bin_relocation_fwbw_drq/bw_classifier_ckpt" \ --eval_checkpoint_step 31000 \ - --eval_n_trajs 100 + --eval_checkpoint_step 100 diff --git a/examples/async_cable_route_drq/async_drq_randomized.py b/examples/async_cable_route_drq/async_drq_randomized.py index e3a3b1a..90770fc 100644 --- a/examples/async_cable_route_drq/async_drq_randomized.py +++ b/examples/async_cable_route_drq/async_drq_randomized.py @@ -56,7 +56,7 @@ flags.DEFINE_integer("random_steps", 300, "Sample random actions for this many steps.") flags.DEFINE_integer("training_starts", 300, "Training starts after this step.") -flags.DEFINE_integer("steps_per_update", 50, "Number of steps per update the server.") +flags.DEFINE_integer("steps_per_update", 30, "Number of steps per update the server.") flags.DEFINE_integer("log_period", 10, "Logging period.") flags.DEFINE_integer("eval_period", 2000, "Evaluation period.") @@ -410,7 +410,7 @@ def main(_): elif FLAGS.actor: sampling_rng = jax.device_put(sampling_rng, sharding.replicate()) - data_store = QueuedDataStore(50000) # the queue size on the actor + data_store = QueuedDataStore(2000) # the queue size on the actor # actor loop print_green("starting actor loop") actor(agent, data_store, env, sampling_rng) diff --git a/examples/async_cable_route_drq/run_actor.sh b/examples/async_cable_route_drq/run_actor.sh index eb47a1c..93a4f8a 100644 --- a/examples/async_cable_route_drq/run_actor.sh +++ b/examples/async_cable_route_drq/run_actor.sh @@ -7,10 +7,6 @@ python async_drq_randomized.py "$@" \ --exp_name=serl_dev_drq_rlpd20demos_cable_random_resnet \ --seed 0 \ --random_steps 0 \ - --training_starts 200 \ - --critic_actor_ratio 4 \ - --batch_size 256 \ - --eval_period 2000 \ --encoder_type resnet-pretrained \ --demo_path cable_route_20_demos_2024-01-04_12-10-54.pkl \ --checkpoint_path /home/undergrad/code/serl_dev/examples/async_cable_route_drq/10x10_30degs_20demos_rand_cable_096 \ diff --git a/examples/async_drq_sim/async_drq_sim.py b/examples/async_drq_sim/async_drq_sim.py index 29f55ba..a84bc58 100644 --- a/examples/async_drq_sim/async_drq_sim.py +++ b/examples/async_drq_sim/async_drq_sim.py @@ -52,7 +52,7 @@ flags.DEFINE_integer("random_steps", 300, "Sample random actions for this many steps.") flags.DEFINE_integer("training_starts", 300, "Training starts after this step.") -flags.DEFINE_integer("steps_per_update", 50, "Number of steps per update the server.") +flags.DEFINE_integer("steps_per_update", 30, "Number of steps per update the server.") flags.DEFINE_integer("log_period", 10, "Logging period.") flags.DEFINE_integer("eval_period", 2000, "Evaluation period.") @@ -255,6 +255,14 @@ def stats_callback(type: str, payload: dict) -> dict: # wait till the replay buffer is filled with enough data timer = Timer() + + # show replay buffer progress bar during training + pbar = tqdm.tqdm( + total=FLAGS.replay_buffer_capacity, + initial=len(replay_buffer), + desc="replay buffer", + ) + for step in tqdm.tqdm(range(FLAGS.max_steps), dynamic_ncols=True, desc="learner"): # run n-1 critic updates and 1 critic + actor update. # This makes training on GPU faster by reducing the large batch transfer time from CPU to GPU @@ -298,6 +306,7 @@ def stats_callback(type: str, payload: dict) -> dict: FLAGS.checkpoint_path, agent.state, step=update_steps, keep=20 ) + pbar.update(len(replay_buffer) - pbar.n) # update replay buffer bar update_steps += 1 @@ -397,7 +406,7 @@ def preload_data_transform(data, metadata) -> Optional[Dict[str, Any]]: elif FLAGS.actor: sampling_rng = jax.device_put(sampling_rng, sharding.replicate()) - data_store = QueuedDataStore(50000) # the queue size on the actor + data_store = QueuedDataStore(2000) # the queue size on the actor # actor loop print_green("starting actor loop") diff --git a/examples/async_drq_sim/run_actor.sh b/examples/async_drq_sim/run_actor.sh index 1cf4557..52fcfc4 100644 --- a/examples/async_drq_sim/run_actor.sh +++ b/examples/async_drq_sim/run_actor.sh @@ -6,9 +6,5 @@ python async_drq_sim.py "$@" \ --exp_name=serl_dev_drq_sim_test_resnet \ --seed 0 \ --random_steps 1000 \ - --training_starts 1000 \ - --critic_actor_ratio 4 \ - --batch_size 256 \ - --eval_period 2000 \ --encoder_type resnet-pretrained \ --debug diff --git a/examples/async_drq_sim/run_learner.sh b/examples/async_drq_sim/run_learner.sh index 4836e6c..3944544 100644 --- a/examples/async_drq_sim/run_learner.sh +++ b/examples/async_drq_sim/run_learner.sh @@ -4,10 +4,8 @@ python async_drq_sim.py "$@" \ --learner \ --exp_name=serl_dev_drq_sim_test_resnet \ --seed 0 \ - --random_steps 1000 \ --training_starts 1000 \ --critic_actor_ratio 4 \ - --eval_period 2000 \ --encoder_type resnet-pretrained \ # --demo_path franka_lift_cube_image_20_trajs.pkl \ --debug # wandb is disabled when debug diff --git a/examples/async_pcb_insert_drq/async_drq_randomized.py b/examples/async_pcb_insert_drq/async_drq_randomized.py index 9a1a059..8248379 100644 --- a/examples/async_pcb_insert_drq/async_drq_randomized.py +++ b/examples/async_pcb_insert_drq/async_drq_randomized.py @@ -57,7 +57,7 @@ flags.DEFINE_integer("random_steps", 300, "Sample random actions for this many steps.") flags.DEFINE_integer("training_starts", 300, "Training starts after this step.") -flags.DEFINE_integer("steps_per_update", 50, "Number of steps per update the server.") +flags.DEFINE_integer("steps_per_update", 30, "Number of steps per update the server.") flags.DEFINE_integer("log_period", 10, "Logging period.") flags.DEFINE_integer("eval_period", 2000, "Evaluation period.") @@ -476,7 +476,7 @@ def main(_): elif FLAGS.actor: sampling_rng = jax.device_put(sampling_rng, sharding.replicate()) - data_store = QueuedDataStore(50000) # the queue size on the actor + data_store = QueuedDataStore(2000) # the queue size on the actor # actor loop print_green("starting actor loop") diff --git a/examples/async_pcb_insert_drq/run_actor.sh b/examples/async_pcb_insert_drq/run_actor.sh index 41596b0..64153ab 100644 --- a/examples/async_pcb_insert_drq/run_actor.sh +++ b/examples/async_pcb_insert_drq/run_actor.sh @@ -7,10 +7,6 @@ python async_drq_randomized.py "$@" \ --exp_name=serl_dev_drq_rlpd10demos_peg_insert_random_resnet \ --seed 0 \ --random_steps 0 \ - --training_starts 200 \ - --critic_actor_ratio 4 \ - --batch_size 256 \ - --eval_period 2000 \ --encoder_type resnet-pretrained \ --demo_path pcb_insert_20_demos_2023-12-27_19-40-50.pkl \ --checkpoint_path /home/undergrad/code/serl_dev/examples/async_pcb_insert_drq/5x5_20degs_20demos_rand_pcb_insert_096 \ diff --git a/examples/async_peg_insert_drq/async_drq_randomized.py b/examples/async_peg_insert_drq/async_drq_randomized.py index 81cc4a8..4fd76f0 100644 --- a/examples/async_peg_insert_drq/async_drq_randomized.py +++ b/examples/async_peg_insert_drq/async_drq_randomized.py @@ -52,7 +52,7 @@ flags.DEFINE_integer("random_steps", 300, "Sample random actions for this many steps.") flags.DEFINE_integer("training_starts", 300, "Training starts after this step.") -flags.DEFINE_integer("steps_per_update", 50, "Number of steps per update the server.") +flags.DEFINE_integer("steps_per_update", 30, "Number of steps per update the server.") flags.DEFINE_integer("log_period", 10, "Logging period.") flags.DEFINE_integer("eval_period", 2000, "Evaluation period.") @@ -383,7 +383,7 @@ def main(_): elif FLAGS.actor: sampling_rng = jax.device_put(sampling_rng, sharding.replicate()) - data_store = QueuedDataStore(50000) # the queue size on the actor + data_store = QueuedDataStore(2000) # the queue size on the actor # actor loop print_green("starting actor loop") diff --git a/examples/async_peg_insert_drq/run_actor.sh b/examples/async_peg_insert_drq/run_actor.sh index 9b34b2a..a251e75 100644 --- a/examples/async_peg_insert_drq/run_actor.sh +++ b/examples/async_peg_insert_drq/run_actor.sh @@ -8,11 +8,5 @@ python async_drq_randomized.py "$@" \ --seed 0 \ --random_steps 0 \ --training_starts 200 \ - --critic_actor_ratio 4 \ - --batch_size 256 \ - --eval_period 2000 \ --encoder_type resnet-pretrained \ --demo_path peg_insert_20_demos_2023-12-25_16-13-25.pkl \ - # --checkpoint_path /home/undergrad/code/serl_dev/examples/async_pcb_insert_drq/5x5_20degs_100demos_rand_pcb_insert_bc \ - # --eval_checkpoint_step 20000 \ - # --eval_n_trajs 100 \ diff --git a/examples/async_sac_state_sim/async_sac_state_sim.py b/examples/async_sac_state_sim/async_sac_state_sim.py index 5994208..90a1acf 100644 --- a/examples/async_sac_state_sim/async_sac_state_sim.py +++ b/examples/async_sac_state_sim/async_sac_state_sim.py @@ -43,7 +43,7 @@ flags.DEFINE_integer("random_steps", 300, "Sample random actions for this many steps.") flags.DEFINE_integer("training_starts", 300, "Training starts after this step.") -flags.DEFINE_integer("steps_per_update", 50, "Number of steps per update the server.") +flags.DEFINE_integer("steps_per_update", 30, "Number of steps per update the server.") flags.DEFINE_integer("log_period", 10, "Logging period.") flags.DEFINE_integer("eval_period", 2000, "Evaluation period.") @@ -214,6 +214,14 @@ def stats_callback(type: str, payload: dict) -> dict: # wait till the replay buffer is filled with enough data timer = Timer() + + # show replay buffer progress bar during training + pbar = tqdm.tqdm( + total=FLAGS.replay_buffer_capacity, + initial=len(replay_buffer), + desc="replay buffer", + ) + for step in tqdm.tqdm(range(FLAGS.max_steps), dynamic_ncols=True, desc="learner"): # Train the networks with timer.context("sample_replay_buffer"): @@ -236,6 +244,7 @@ def stats_callback(type: str, payload: dict) -> dict: FLAGS.checkpoint_path, agent.state, step=update_steps, keep=20 ) + pbar.update(len(replay_buffer) - pbar.n) # update replay buffer bar update_steps += 1 @@ -299,7 +308,7 @@ def main(_): elif FLAGS.actor: sampling_rng = jax.device_put(sampling_rng, sharding.replicate()) - data_store = QueuedDataStore(50000) # the queue size on the actor + data_store = QueuedDataStore(2000) # the queue size on the actor # actor loop print_green("starting actor loop") diff --git a/examples/async_sac_state_sim/run_actor.sh b/examples/async_sac_state_sim/run_actor.sh index ce4ff87..5767791 100644 --- a/examples/async_sac_state_sim/run_actor.sh +++ b/examples/async_sac_state_sim/run_actor.sh @@ -7,8 +7,4 @@ python async_sac_state_sim.py "$@" \ --exp_name=serl_dev_sim_test \ --seed 0 \ --random_steps 1000 \ - --training_starts 1000 \ - --critic_actor_ratio 8 \ - --batch_size 256 \ - --eval_period 2000 \ --debug diff --git a/examples/async_sac_state_sim/run_learner.sh b/examples/async_sac_state_sim/run_learner.sh index f5a2cb8..10a203c 100644 --- a/examples/async_sac_state_sim/run_learner.sh +++ b/examples/async_sac_state_sim/run_learner.sh @@ -5,9 +5,7 @@ python async_sac_state_sim.py "$@" \ --env PandaPickCube-v0 \ --exp_name=serl_dev_sim_test \ --seed 0 \ - --random_steps 1000 \ --training_starts 1000 \ --critic_actor_ratio 8 \ --batch_size 256 \ - --eval_period 2000 \ --debug # wandb is disabled when debug diff --git a/serl_launcher/serl_launcher/utils/launcher.py b/serl_launcher/serl_launcher/utils/launcher.py index 39740c5..99d5a61 100644 --- a/serl_launcher/serl_launcher/utils/launcher.py +++ b/serl_launcher/serl_launcher/utils/launcher.py @@ -173,6 +173,7 @@ def make_trainer_config(port_number: int = 5488, broadcast_port: int = 5489): port_number=port_number, broadcast_port=broadcast_port, request_types=["send-stats"], + # experimental_pipeline_url="tcp://127.0.0.1:5547", # experimental ds update ) diff --git a/serl_launcher/setup.py b/serl_launcher/setup.py index 0125bf0..26658d6 100644 --- a/serl_launcher/setup.py +++ b/serl_launcher/setup.py @@ -13,7 +13,7 @@ "typing_extensions", "opencv-python", "lz4", - "agentlace@git+https://github.com/youliangtan/agentlace.git@b9be677d5d20772fca98c8be44777ecb7111bc59", + "agentlace@git+https://github.com/youliangtan/agentlace.git@f025024631db0992a90085ee4637d8c0c90da317", ], packages=find_packages(), zip_safe=False, From 0f962cb785c1de170705ac15d60ca9ef6a0f525e Mon Sep 17 00:00:00 2001 From: youliangtan Date: Mon, 17 Jun 2024 11:04:52 -0700 Subject: [PATCH 2/2] bump up version Signed-off-by: youliangtan --- serl_launcher/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/serl_launcher/setup.py b/serl_launcher/setup.py index 26658d6..fa5dc0a 100644 --- a/serl_launcher/setup.py +++ b/serl_launcher/setup.py @@ -2,7 +2,7 @@ setup( name="serl_launcher", - version="0.1.2", + version="0.1.3", description="library for rl experiments", url="https://github.com/rail-berkeley/serl", author="auth",