Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix rare duplicated data buffer entry #62

Merged
merged 2 commits into from
Jun 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@

flags.DEFINE_integer("random_steps", 300, "Sample random actions for this many steps.")
flags.DEFINE_integer("training_starts", 300, "Training starts after this step.")
flags.DEFINE_integer("steps_per_update", 50, "Number of steps per update the server.")
flags.DEFINE_integer("steps_per_update", 30, "Number of steps per update the server.")

flags.DEFINE_integer("log_period", 10, "Logging period.")
flags.DEFINE_integer("eval_period", 2000, "Evaluation period.")
Expand Down Expand Up @@ -537,7 +537,7 @@ def main(_):
elif FLAGS.actor:
sampling_rng = jax.device_put(sampling_rng, sharding.replicate())
data_stores = OrderedDict(
{name: QueuedDataStore(50000) for name in id_to_task.values()}
{name: QueuedDataStore(2000) for name in id_to_task.values()}
)
# actor loop
print_green("starting actor loop")
Expand Down
6 changes: 1 addition & 5 deletions examples/async_bin_relocation_fwbw_drq/run_actor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,11 @@ python async_drq_randomized.py "$@" \
--exp_name=serl_dev_drq_rlpd20demos_bin_fwbw_resnet_096 \
--seed 0 \
--random_steps 200 \
--training_starts 200 \
--critic_actor_ratio 4 \
--batch_size 256 \
--eval_period 2000 \
--encoder_type resnet-pretrained \
--demo_path fw_bin_2000_demo_2024-01-23_18-49-56.pkl \
--fw_ckpt_path /home/undergrad/code/serl_dev/examples/async_bin_relocation_fwbw_drq/bin_fw_096 \
--bw_ckpt_path /home/undergrad/code/serl_dev/examples/async_bin_relocation_fwbw_drq/bin_bw_096 \
--fw_reward_classifier_ckpt_path "/home/undergrad/code/serl_dev/examples/async_bin_relocation_fwbw_drq/fw_classifier_ckpt" \
--bw_reward_classifier_ckpt_path "/home/undergrad/code/serl_dev/examples/async_bin_relocation_fwbw_drq/bw_classifier_ckpt" \
--eval_checkpoint_step 31000 \
--eval_n_trajs 100
--eval_checkpoint_step 100
4 changes: 2 additions & 2 deletions examples/async_cable_route_drq/async_drq_randomized.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@

flags.DEFINE_integer("random_steps", 300, "Sample random actions for this many steps.")
flags.DEFINE_integer("training_starts", 300, "Training starts after this step.")
flags.DEFINE_integer("steps_per_update", 50, "Number of steps per update the server.")
flags.DEFINE_integer("steps_per_update", 30, "Number of steps per update the server.")

flags.DEFINE_integer("log_period", 10, "Logging period.")
flags.DEFINE_integer("eval_period", 2000, "Evaluation period.")
Expand Down Expand Up @@ -410,7 +410,7 @@ def main(_):

elif FLAGS.actor:
sampling_rng = jax.device_put(sampling_rng, sharding.replicate())
data_store = QueuedDataStore(50000) # the queue size on the actor
data_store = QueuedDataStore(2000) # the queue size on the actor
# actor loop
print_green("starting actor loop")
actor(agent, data_store, env, sampling_rng)
Expand Down
4 changes: 0 additions & 4 deletions examples/async_cable_route_drq/run_actor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,6 @@ python async_drq_randomized.py "$@" \
--exp_name=serl_dev_drq_rlpd20demos_cable_random_resnet \
--seed 0 \
--random_steps 0 \
--training_starts 200 \
--critic_actor_ratio 4 \
--batch_size 256 \
--eval_period 2000 \
--encoder_type resnet-pretrained \
--demo_path cable_route_20_demos_2024-01-04_12-10-54.pkl \
--checkpoint_path /home/undergrad/code/serl_dev/examples/async_cable_route_drq/10x10_30degs_20demos_rand_cable_096 \
Expand Down
13 changes: 11 additions & 2 deletions examples/async_drq_sim/async_drq_sim.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@

flags.DEFINE_integer("random_steps", 300, "Sample random actions for this many steps.")
flags.DEFINE_integer("training_starts", 300, "Training starts after this step.")
flags.DEFINE_integer("steps_per_update", 50, "Number of steps per update the server.")
flags.DEFINE_integer("steps_per_update", 30, "Number of steps per update the server.")

flags.DEFINE_integer("log_period", 10, "Logging period.")
flags.DEFINE_integer("eval_period", 2000, "Evaluation period.")
Expand Down Expand Up @@ -255,6 +255,14 @@ def stats_callback(type: str, payload: dict) -> dict:

# wait till the replay buffer is filled with enough data
timer = Timer()

# show replay buffer progress bar during training
pbar = tqdm.tqdm(
total=FLAGS.replay_buffer_capacity,
initial=len(replay_buffer),
desc="replay buffer",
)

for step in tqdm.tqdm(range(FLAGS.max_steps), dynamic_ncols=True, desc="learner"):
# run n-1 critic updates and 1 critic + actor update.
# This makes training on GPU faster by reducing the large batch transfer time from CPU to GPU
Expand Down Expand Up @@ -298,6 +306,7 @@ def stats_callback(type: str, payload: dict) -> dict:
FLAGS.checkpoint_path, agent.state, step=update_steps, keep=20
)

pbar.update(len(replay_buffer) - pbar.n) # update replay buffer bar
update_steps += 1


Expand Down Expand Up @@ -397,7 +406,7 @@ def preload_data_transform(data, metadata) -> Optional[Dict[str, Any]]:

elif FLAGS.actor:
sampling_rng = jax.device_put(sampling_rng, sharding.replicate())
data_store = QueuedDataStore(50000) # the queue size on the actor
data_store = QueuedDataStore(2000) # the queue size on the actor

# actor loop
print_green("starting actor loop")
Expand Down
4 changes: 0 additions & 4 deletions examples/async_drq_sim/run_actor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,5 @@ python async_drq_sim.py "$@" \
--exp_name=serl_dev_drq_sim_test_resnet \
--seed 0 \
--random_steps 1000 \
--training_starts 1000 \
--critic_actor_ratio 4 \
--batch_size 256 \
--eval_period 2000 \
--encoder_type resnet-pretrained \
--debug
2 changes: 0 additions & 2 deletions examples/async_drq_sim/run_learner.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,8 @@ python async_drq_sim.py "$@" \
--learner \
--exp_name=serl_dev_drq_sim_test_resnet \
--seed 0 \
--random_steps 1000 \
--training_starts 1000 \
--critic_actor_ratio 4 \
--eval_period 2000 \
--encoder_type resnet-pretrained \
# --demo_path franka_lift_cube_image_20_trajs.pkl \
--debug # wandb is disabled when debug
4 changes: 2 additions & 2 deletions examples/async_pcb_insert_drq/async_drq_randomized.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@

flags.DEFINE_integer("random_steps", 300, "Sample random actions for this many steps.")
flags.DEFINE_integer("training_starts", 300, "Training starts after this step.")
flags.DEFINE_integer("steps_per_update", 50, "Number of steps per update the server.")
flags.DEFINE_integer("steps_per_update", 30, "Number of steps per update the server.")

flags.DEFINE_integer("log_period", 10, "Logging period.")
flags.DEFINE_integer("eval_period", 2000, "Evaluation period.")
Expand Down Expand Up @@ -476,7 +476,7 @@ def main(_):

elif FLAGS.actor:
sampling_rng = jax.device_put(sampling_rng, sharding.replicate())
data_store = QueuedDataStore(50000) # the queue size on the actor
data_store = QueuedDataStore(2000) # the queue size on the actor

# actor loop
print_green("starting actor loop")
Expand Down
4 changes: 0 additions & 4 deletions examples/async_pcb_insert_drq/run_actor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,6 @@ python async_drq_randomized.py "$@" \
--exp_name=serl_dev_drq_rlpd10demos_peg_insert_random_resnet \
--seed 0 \
--random_steps 0 \
--training_starts 200 \
--critic_actor_ratio 4 \
--batch_size 256 \
--eval_period 2000 \
--encoder_type resnet-pretrained \
--demo_path pcb_insert_20_demos_2023-12-27_19-40-50.pkl \
--checkpoint_path /home/undergrad/code/serl_dev/examples/async_pcb_insert_drq/5x5_20degs_20demos_rand_pcb_insert_096 \
Expand Down
4 changes: 2 additions & 2 deletions examples/async_peg_insert_drq/async_drq_randomized.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@

flags.DEFINE_integer("random_steps", 300, "Sample random actions for this many steps.")
flags.DEFINE_integer("training_starts", 300, "Training starts after this step.")
flags.DEFINE_integer("steps_per_update", 50, "Number of steps per update the server.")
flags.DEFINE_integer("steps_per_update", 30, "Number of steps per update the server.")

flags.DEFINE_integer("log_period", 10, "Logging period.")
flags.DEFINE_integer("eval_period", 2000, "Evaluation period.")
Expand Down Expand Up @@ -383,7 +383,7 @@ def main(_):

elif FLAGS.actor:
sampling_rng = jax.device_put(sampling_rng, sharding.replicate())
data_store = QueuedDataStore(50000) # the queue size on the actor
data_store = QueuedDataStore(2000) # the queue size on the actor

# actor loop
print_green("starting actor loop")
Expand Down
6 changes: 0 additions & 6 deletions examples/async_peg_insert_drq/run_actor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,5 @@ python async_drq_randomized.py "$@" \
--seed 0 \
--random_steps 0 \
--training_starts 200 \
--critic_actor_ratio 4 \
--batch_size 256 \
--eval_period 2000 \
--encoder_type resnet-pretrained \
--demo_path peg_insert_20_demos_2023-12-25_16-13-25.pkl \
# --checkpoint_path /home/undergrad/code/serl_dev/examples/async_pcb_insert_drq/5x5_20degs_100demos_rand_pcb_insert_bc \
# --eval_checkpoint_step 20000 \
# --eval_n_trajs 100 \
13 changes: 11 additions & 2 deletions examples/async_sac_state_sim/async_sac_state_sim.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@

flags.DEFINE_integer("random_steps", 300, "Sample random actions for this many steps.")
flags.DEFINE_integer("training_starts", 300, "Training starts after this step.")
flags.DEFINE_integer("steps_per_update", 50, "Number of steps per update the server.")
flags.DEFINE_integer("steps_per_update", 30, "Number of steps per update the server.")

flags.DEFINE_integer("log_period", 10, "Logging period.")
flags.DEFINE_integer("eval_period", 2000, "Evaluation period.")
Expand Down Expand Up @@ -214,6 +214,14 @@ def stats_callback(type: str, payload: dict) -> dict:

# wait till the replay buffer is filled with enough data
timer = Timer()

# show replay buffer progress bar during training
pbar = tqdm.tqdm(
total=FLAGS.replay_buffer_capacity,
initial=len(replay_buffer),
desc="replay buffer",
)

for step in tqdm.tqdm(range(FLAGS.max_steps), dynamic_ncols=True, desc="learner"):
# Train the networks
with timer.context("sample_replay_buffer"):
Expand All @@ -236,6 +244,7 @@ def stats_callback(type: str, payload: dict) -> dict:
FLAGS.checkpoint_path, agent.state, step=update_steps, keep=20
)

pbar.update(len(replay_buffer) - pbar.n) # update replay buffer bar
update_steps += 1


Expand Down Expand Up @@ -299,7 +308,7 @@ def main(_):

elif FLAGS.actor:
sampling_rng = jax.device_put(sampling_rng, sharding.replicate())
data_store = QueuedDataStore(50000) # the queue size on the actor
data_store = QueuedDataStore(2000) # the queue size on the actor

# actor loop
print_green("starting actor loop")
Expand Down
4 changes: 0 additions & 4 deletions examples/async_sac_state_sim/run_actor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,4 @@ python async_sac_state_sim.py "$@" \
--exp_name=serl_dev_sim_test \
--seed 0 \
--random_steps 1000 \
--training_starts 1000 \
--critic_actor_ratio 8 \
--batch_size 256 \
--eval_period 2000 \
--debug
2 changes: 0 additions & 2 deletions examples/async_sac_state_sim/run_learner.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@ python async_sac_state_sim.py "$@" \
--env PandaPickCube-v0 \
--exp_name=serl_dev_sim_test \
--seed 0 \
--random_steps 1000 \
--training_starts 1000 \
--critic_actor_ratio 8 \
--batch_size 256 \
--eval_period 2000 \
--debug # wandb is disabled when debug
1 change: 1 addition & 0 deletions serl_launcher/serl_launcher/utils/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ def make_trainer_config(port_number: int = 5488, broadcast_port: int = 5489):
port_number=port_number,
broadcast_port=broadcast_port,
request_types=["send-stats"],
# experimental_pipeline_url="tcp://127.0.0.1:5547", # experimental ds update
)


Expand Down
4 changes: 2 additions & 2 deletions serl_launcher/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name="serl_launcher",
version="0.1.2",
version="0.1.3",
description="library for rl experiments",
url="https://github.com/rail-berkeley/serl",
author="auth",
Expand All @@ -13,7 +13,7 @@
"typing_extensions",
"opencv-python",
"lz4",
"agentlace@git+https://github.com/youliangtan/agentlace.git@b9be677d5d20772fca98c8be44777ecb7111bc59",
"agentlace@git+https://github.com/youliangtan/agentlace.git@f025024631db0992a90085ee4637d8c0c90da317",
],
packages=find_packages(),
zip_safe=False,
Expand Down
Loading