Skip to content

Add ReBRAC configs and source #59

Merged
merged 20 commits into from
Jul 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
193 changes: 97 additions & 96 deletions README.md

Large diffs are not rendered by default.

760 changes: 760 additions & 0 deletions algorithms/offline/rebrac.py

Large diffs are not rendered by default.

28 changes: 28 additions & 0 deletions configs/offline/rebrac/antmaze/large_diverse_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
actor_bc_coef: 0.002
actor_learning_rate: 0.0003
actor_ln: false
actor_n_hiddens: 3
batch_size: 256
critic_bc_coef: 0.002
critic_learning_rate: 0.00005
critic_ln: true
critic_n_hiddens: 3
dataset_name: antmaze-large-diverse-v2
eval_episodes: 100
eval_every: 50
eval_seed: 42
gamma: 0.999
group: rebrac-antmaze-large-diverse-v2
hidden_dim: 256
name: rebrac
noise_clip: 0.5
normalize_q: true
normalize_reward: true
normalize_states: false
num_epochs: 1000
num_updates_on_epoch: 1000
policy_freq: 2
policy_noise: 0.2
project: ReBRAC
tau: 0.005
train_seed: 0
28 changes: 28 additions & 0 deletions configs/offline/rebrac/antmaze/large_play_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
actor_bc_coef: 0.002
actor_learning_rate: 0.0003
actor_ln: false
actor_n_hiddens: 3
batch_size: 256
critic_bc_coef: 0.001
critic_learning_rate: 0.00005
critic_ln: true
critic_n_hiddens: 3
dataset_name: antmaze-large-play-v2
eval_episodes: 100
eval_every: 50
eval_seed: 42
gamma: 0.999
group: rebrac-antmaze-large-play-v2
hidden_dim: 256
name: rebrac
noise_clip: 0.5
normalize_q: true
normalize_reward: true
normalize_states: false
num_epochs: 1000
num_updates_on_epoch: 1000
policy_freq: 2
policy_noise: 0.2
project: ReBRAC
tau: 0.005
train_seed: 0
28 changes: 28 additions & 0 deletions configs/offline/rebrac/antmaze/medium_diverse_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
actor_bc_coef: 0.001
actor_learning_rate: 0.0003
actor_ln: false
actor_n_hiddens: 3
batch_size: 256
critic_bc_coef: 0.0
critic_learning_rate: 0.00005
critic_ln: true
critic_n_hiddens: 3
dataset_name: antmaze-medium-diverse-v2
eval_episodes: 100
eval_every: 50
eval_seed: 42
gamma: 0.999
group: rebrac-antmaze-medium-diverse-v2
hidden_dim: 256
name: rebrac
noise_clip: 0.5
normalize_q: true
normalize_reward: true
normalize_states: false
num_epochs: 1000
num_updates_on_epoch: 1000
policy_freq: 2
policy_noise: 0.2
project: ReBRAC
tau: 0.005
train_seed: 0
28 changes: 28 additions & 0 deletions configs/offline/rebrac/antmaze/medium_play_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
actor_bc_coef: 0.001
actor_learning_rate: 0.0003
actor_ln: false
actor_n_hiddens: 3
batch_size: 256
critic_bc_coef: 0.0005
critic_learning_rate: 0.00005
critic_ln: true
critic_n_hiddens: 3
dataset_name: antmaze-medium-play-v2
eval_episodes: 100
eval_every: 50
eval_seed: 42
gamma: 0.999
group: rebrac-antmaze-medium-play-v2
hidden_dim: 256
name: rebrac
noise_clip: 0.5
normalize_q: true
normalize_reward: true
normalize_states: false
num_epochs: 1000
num_updates_on_epoch: 1000
policy_freq: 2
policy_noise: 0.2
project: ReBRAC
tau: 0.005
train_seed: 0
28 changes: 28 additions & 0 deletions configs/offline/rebrac/antmaze/umaze_diverse_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
actor_bc_coef: 0.003
actor_learning_rate: 0.0003
actor_ln: false
actor_n_hiddens: 3
batch_size: 256
critic_bc_coef: 0.001
critic_learning_rate: 0.00005
critic_ln: true
critic_n_hiddens: 3
dataset_name: antmaze-umaze-diverse-v2
eval_episodes: 100
eval_every: 50
eval_seed: 42
gamma: 0.999
group: rebrac-antmaze-umaze-diverse-v2
hidden_dim: 256
name: rebrac
noise_clip: 0.5
normalize_q: true
normalize_reward: true
normalize_states: false
num_epochs: 1000
num_updates_on_epoch: 1000
policy_freq: 2
policy_noise: 0.2
project: ReBRAC
tau: 0.005
train_seed: 0
28 changes: 28 additions & 0 deletions configs/offline/rebrac/antmaze/umaze_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
actor_bc_coef: 0.003
actor_learning_rate: 0.0003
actor_ln: false
actor_n_hiddens: 3
batch_size: 256
critic_bc_coef: 0.002
critic_learning_rate: 0.00005
critic_ln: true
critic_n_hiddens: 3
dataset_name: antmaze-umaze-v2
eval_episodes: 100
eval_every: 50
eval_seed: 42
gamma: 0.999
group: rebrac-antmaze-umaze-v2
hidden_dim: 256
name: rebrac
noise_clip: 0.5
normalize_q: true
normalize_reward: true
normalize_states: false
num_epochs: 1000
num_updates_on_epoch: 1000
policy_freq: 2
policy_noise: 0.2
project: ReBRAC
tau: 0.005
train_seed: 0
28 changes: 28 additions & 0 deletions configs/offline/rebrac/door/cloned_v1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
actor_bc_coef: 0.01
actor_learning_rate: 0.0003
actor_ln: false
actor_n_hiddens: 3
batch_size: 256
critic_bc_coef: 0.1
critic_learning_rate: 0.0003
critic_ln: true
critic_n_hiddens: 3
dataset_name: door-cloned-v1
eval_episodes: 10
eval_every: 5
eval_seed: 42
gamma: 0.99
group: rebrac-door-cloned-v1
hidden_dim: 256
name: rebrac
noise_clip: 0.5
normalize_q: true
normalize_reward: false
normalize_states: false
num_epochs: 1000
num_updates_on_epoch: 1000
policy_freq: 2
policy_noise: 0.2
project: ReBRAC
tau: 0.005
train_seed: 0
28 changes: 28 additions & 0 deletions configs/offline/rebrac/door/expert_v1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
actor_bc_coef: 0.05
actor_learning_rate: 0.0003
actor_ln: false
actor_n_hiddens: 3
batch_size: 256
critic_bc_coef: 0.01
critic_learning_rate: 0.0003
critic_ln: true
critic_n_hiddens: 3
dataset_name: door-expert-v1
eval_episodes: 10
eval_every: 5
eval_seed: 42
gamma: 0.99
group: rebrac-door-expert-v1
hidden_dim: 256
name: rebrac
noise_clip: 0.5
normalize_q: true
normalize_reward: false
normalize_states: false
num_epochs: 1000
num_updates_on_epoch: 1000
policy_freq: 2
policy_noise: 0.2
project: ReBRAC
tau: 0.005
train_seed: 0
28 changes: 28 additions & 0 deletions configs/offline/rebrac/door/human_v1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
actor_bc_coef: 0.1
actor_learning_rate: 0.0003
actor_ln: false
actor_n_hiddens: 3
batch_size: 256
critic_bc_coef: 0.1
critic_learning_rate: 0.0003
critic_ln: true
critic_n_hiddens: 3
dataset_name: door-human-v1
eval_episodes: 10
eval_every: 5
eval_seed: 42
gamma: 0.99
group: rebrac-door-human-v1
hidden_dim: 256
name: rebrac
noise_clip: 0.5
normalize_q: true
normalize_reward: false
normalize_states: false
num_epochs: 1000
num_updates_on_epoch: 1000
policy_freq: 2
policy_noise: 0.2
project: ReBRAC
tau: 0.005
train_seed: 0
28 changes: 28 additions & 0 deletions configs/offline/rebrac/halfcheetah/expert_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
actor_bc_coef: 0.01
actor_learning_rate: 0.001
actor_ln: false
actor_n_hiddens: 3
batch_size: 1024
critic_bc_coef: 0.01
critic_learning_rate: 0.001
critic_ln: true
critic_n_hiddens: 3
dataset_name: halfcheetah-expert-v2
eval_episodes: 10
eval_every: 5
eval_seed: 42
gamma: 0.99
group: rebrac-halfcheetah-expert-v2
hidden_dim: 256
name: rebrac
noise_clip: 0.5
normalize_q: true
normalize_reward: false
normalize_states: false
num_epochs: 1000
num_updates_on_epoch: 1000
policy_freq: 2
policy_noise: 0.2
project: ReBRAC
tau: 0.005
train_seed: 0
28 changes: 28 additions & 0 deletions configs/offline/rebrac/halfcheetah/full_replay_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
actor_bc_coef: 0.001
actor_learning_rate: 0.001
actor_ln: false
actor_n_hiddens: 3
batch_size: 1024
critic_bc_coef: 0.1
critic_learning_rate: 0.001
critic_ln: true
critic_n_hiddens: 3
dataset_name: halfcheetah-full-replay-v2
eval_episodes: 10
eval_every: 5
eval_seed: 42
gamma: 0.99
group: rebrac-halfcheetah-full-replay-v2
hidden_dim: 256
name: rebrac
noise_clip: 0.5
normalize_q: true
normalize_reward: false
normalize_states: false
num_epochs: 1000
num_updates_on_epoch: 1000
policy_freq: 2
policy_noise: 0.2
project: ReBRAC
tau: 0.005
train_seed: 0
28 changes: 28 additions & 0 deletions configs/offline/rebrac/halfcheetah/medium_expert_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
actor_bc_coef: 0.01
actor_learning_rate: 0.001
actor_ln: false
actor_n_hiddens: 3
batch_size: 1024
critic_bc_coef: 0.1
critic_learning_rate: 0.001
critic_ln: true
critic_n_hiddens: 3
dataset_name: halfcheetah-medium-expert-v2
eval_episodes: 10
eval_every: 5
eval_seed: 42
gamma: 0.99
group: rebrac-halfcheetah-medium-expert-v2
hidden_dim: 256
name: rebrac
noise_clip: 0.5
normalize_q: true
normalize_reward: false
normalize_states: false
num_epochs: 1000
num_updates_on_epoch: 1000
policy_freq: 2
policy_noise: 0.2
project: ReBRAC
tau: 0.005
train_seed: 0
28 changes: 28 additions & 0 deletions configs/offline/rebrac/halfcheetah/medium_replay_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
actor_bc_coef: 0.01
actor_learning_rate: 0.001
actor_ln: false
actor_n_hiddens: 3
batch_size: 1024
critic_bc_coef: 0.001
critic_learning_rate: 0.001
critic_ln: true
critic_n_hiddens: 3
dataset_name: halfcheetah-medium-replay-v2
eval_episodes: 10
eval_every: 5
eval_seed: 42
gamma: 0.99
group: rebrac-halfcheetah-medium-replay-v2
hidden_dim: 256
name: rebrac
noise_clip: 0.5
normalize_q: true
normalize_reward: false
normalize_states: false
num_epochs: 1000
num_updates_on_epoch: 1000
policy_freq: 2
policy_noise: 0.2
project: ReBRAC
tau: 0.005
train_seed: 0
Loading