config.ini

[demonstrations_location]
expert_state_location = ./expert_data/hopper_expert_states.npy
expert_action_location = ./expert_data/hopper_expert_actions.npy
expert_next_state_location = ./expert_data/hopper_expert_next_states.npy
expert_done_location = ./expert_data/hopper_expert_done.npy

[airl]
state_only = True
lr = 0.0003
is_airl = True
layer_num = 3
activation_function = torch.tanh
last_activation = None
batch_size = 512
hidden_dim = 64
gamma = 0.99

[vairl]
state_only = True
lr = 0.0003
is_airl = True
epoch = 3
beta = 0
dual_stepsize = 1e-5
mutual_info_constraint = 0.5
batch_size = 512
hidden_dim = 64
z_dim = 4
gamma = 0.99

[gail]
is_airl = False
lr = 0.0003
layer_num = 3
activation_function = torch.tanh
last_activation = torch.sigmoid
batch_size = 512
hidden_dim = 64

[vail]
is_airl = False
lr = 0.0003
epoch = 3
dual_stepsize = 1e-5
mutual_info_constraint = 0.5
batch_size = 512
hidden_dim = 64
z_dim = 4
beta = 0

[eairl]
hidden_dim = 64
lr = 0.0003
beta = 1
gamma = 0.99
state_only = False
layer_num = 3
activation_function = torch.tanh
last_activation = None
is_airl = True
batch_size = 512
update_cycle = 5
i_lambda = 0.001
trainable_std = False

[sqil]
lambda_ = 1
is_airl = False
batch_size = 64

[ppo]
entropy_coef = 1e-2
critic_coef = 0.5
max_grad_norm = 0.5
actor_lr = 0.0003
critic_lr = 0.0003
gamma         = 0.99
lambda_         = 0.95
max_clip      = 0.2
train_epoch       = 10
hidden_dim = 64 
batch_size = 64
layer_num = 3
traj_length     = 2048
activation_function = torch.tanh
last_activation = None
trainable_std = False
on_policy = True

[sac]
alpha_init = 0.2
gamma = 0.99
q_lr = 3e-4
actor_lr = 3e-4
alpha_lr = 3e-4
soft_update_rate = 0.005
hidden_dim = 256
learn_start_size = 1000
memory_size = 1e+6
batch_size = 64
layer_num = 3
activation_function = torch.relu
last_activation = None
trainable_std = True
on_policy = False