Skip to content

Commit

Permalink
2s3z
Browse files Browse the repository at this point in the history
  • Loading branch information
wenzhangliu committed Oct 25, 2023
1 parent e2ef0a1 commit 7ec346a
Show file tree
Hide file tree
Showing 7 changed files with 382 additions and 0 deletions.
57 changes: 57 additions & 0 deletions xuance/configs/coma/sc2/2s3z.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
agent: "COMA" # the learning algorithms_marl
env_name: "StarCraft2"
env_id: "1c3s5z"
fps: 15
policy: "Categorical_COMA_Policy"
representation: "Basic_RNN"
vectorize: "Dummy_StarCraft2"
runner: "StarCraft2_Runner"

use_recurrent: True
rnn: "GRU"
recurrent_layer_N: 1
fc_hidden_sizes: [64, ]
recurrent_hidden_size: 64
N_recurrent_layers: 1
dropout: 0
normalize: "LayerNorm"
initialize: "orthogonal"
gain: 0.01

actor_hidden_size: [64, ]
critic_hidden_size: [128, 128]
activation: "ReLU"

seed: 1
parallels: 1
n_size: 128
n_epoch: 15
n_minibatch: 1
learning_rate_actor: 0.0007
learning_rate_critic: 0.0007

clip_grad: 10
clip_type: 1 # Gradient clip for Mindspore: 0: ms.ops.clip_by_value; 1: ms.nn.ClipByNorm()
gamma: 0.95 # discount factor
td_lambda: 0.1

start_greedy: 0.5
end_greedy: 0.01
decay_step_greedy: 2500000
sync_frequency: 200

use_global_state: True # if use global state to replace merged observations
use_advnorm: True
use_gae: True
gae_lambda: 0.95

start_training: 1
running_steps: 2000000
train_per_step: True
training_frequency: 1

test_steps: 10000
eval_interval: 10000
test_episode: 10
log_dir: "./logs/coma/"
model_dir: "./models/coma/"
63 changes: 63 additions & 0 deletions xuance/configs/ippo/sc2/2s3z.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
agent: "IPPO"
env_name: "StarCraft2"
env_id: "1c3s5z"
fps: 15
policy: "Categorical_MAAC_Policy"
representation: "Basic_RNN"
vectorize: "Dummy_StarCraft2"
runner: "StarCraft2_Runner"

# recurrent settings for Basic_RNN representation
use_recurrent: True
rnn: "GRU"
recurrent_layer_N: 1
fc_hidden_sizes: [64, 64, 64]
recurrent_hidden_size: 64
N_recurrent_layers: 1
dropout: 0
normalize: "LayerNorm"
initialize: "orthogonal"
gain: 0.01

actor_hidden_size: []
critic_hidden_size: []
activation: "ReLU"

seed: 1
parallels: 1
n_size: 128
n_epoch: 15
n_minibatch: 1
learning_rate: 0.0007 # 7e-4
weight_decay: 0

vf_coef: 1.0
ent_coef: 0.01
target_kl: 0.25
clip_range: 0.2
clip_type: 1 # Gradient clip for Mindspore: 0: ms.ops.clip_by_value; 1: ms.nn.ClipByNorm()
gamma: 0.99 # discount factor

# tricks
use_linear_lr_decay: False # if use linear learning rate decay
end_factor_lr_decay: 0.5
use_global_state: False # if use global state to replace joint observations
use_grad_norm: True # gradient normalization
max_grad_norm: 10.0
use_value_clip: True # limit the value range
value_clip_range: 0.2
use_value_norm: True # use running mean and std to normalize rewards.
use_huber_loss: True # True: use huber loss; False: use MSE loss.
huber_delta: 10.0
use_advnorm: True # use advantage normalization.
use_gae: True # use GAE trick to calculate returns.
gae_lambda: 0.95

start_training: 1
running_steps: 2000000
training_frequency: 1

eval_interval: 10000
test_episode: 10
log_dir: "./logs/ippo/"
model_dir: "./models/ippo/"
47 changes: 47 additions & 0 deletions xuance/configs/iql/sc2/2s3z.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
agent: "IQL" # the learning algorithms_marl
env_name: "StarCraft2"
env_id: "1c3s5z"
fps: 15
policy: "Basic_Q_network_marl"
representation: "Basic_RNN"
vectorize: "Dummy_StarCraft2"
runner: "StarCraft2_Runner"
on_policy: False

# recurrent settings for Basic_RNN representation
use_recurrent: True
rnn: "GRU"
recurrent_layer_N: 1
fc_hidden_sizes: [64, ]
recurrent_hidden_size: 64
N_recurrent_layers: 1
dropout: 0

representation_hidden_size: [64, ]
q_hidden_size: [64, ] # the units for each hidden layer
activation: "ReLU"

seed: 1
parallels: 1
buffer_size: 5000
batch_size: 32
learning_rate: 0.0007
gamma: 0.99 # discount factor
double_q: True # use double q learning

start_greedy: 1.0
end_greedy: 0.05
decay_step_greedy: 50000
start_training: 1000 # start training after n episodes
running_steps: 2000000 # 2M
train_per_step: False # True: train model per step; False: train model per episode.
training_frequency: 1
sync_frequency: 200

use_grad_clip: False
grad_clip_norm: 0.5

eval_interval: 10000
test_episode: 10
log_dir: "./logs/iql/"
model_dir: "./models/iql/"
63 changes: 63 additions & 0 deletions xuance/configs/mappo/sc2/2s3z.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
agent: "MAPPO"
env_name: "StarCraft2"
env_id: "1c3s5z"
fps: 15
policy: "Categorical_MAAC_Policy"
representation: "Basic_RNN"
vectorize: "Dummy_StarCraft2"
runner: "StarCraft2_Runner"

# recurrent settings for Basic_RNN representation
use_recurrent: True
rnn: "GRU"
recurrent_layer_N: 1
fc_hidden_sizes: [64, 64, 64]
recurrent_hidden_size: 64
N_recurrent_layers: 1
dropout: 0
normalize: "LayerNorm"
initialize: "orthogonal"
gain: 0.01

actor_hidden_size: []
critic_hidden_size: []
activation: "ReLU"

seed: 1
parallels: 1
n_size: 128
n_epoch: 15
n_minibatch: 1
learning_rate: 0.0007 # 7e-4
weight_decay: 0

vf_coef: 1.0
ent_coef: 0.01
target_kl: 0.25
clip_range: 0.2
clip_type: 1 # Gradient clip for Mindspore: 0: ms.ops.clip_by_value; 1: ms.nn.ClipByNorm()
gamma: 0.99 # discount factor

# tricks
use_linear_lr_decay: False # if use linear learning rate decay
end_factor_lr_decay: 0.5
use_global_state: False # if use global state to replace joint observations
use_grad_norm: True # gradient normalization
max_grad_norm: 10.0
use_value_clip: True # limit the value range
value_clip_range: 0.2
use_value_norm: True # use running mean and std to normalize rewards.
use_huber_loss: True # True: use huber loss; False: use MSE loss.
huber_delta: 10.0
use_advnorm: True # use advantage normalization.
use_gae: True # use GAE trick to calculate returns.
gae_lambda: 0.95

start_training: 1
running_steps: 2000000
training_frequency: 1

eval_interval: 10000
test_episode: 10
log_dir: "./logs/mappo/"
model_dir: "./models/mappo/"
51 changes: 51 additions & 0 deletions xuance/configs/qmix/sc2/2s3z.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
agent: "QMIX" # the learning algorithms_marl
global_state: True
env_name: "StarCraft2"
env_id: "1c3s5z"
fps: 15
policy: "Mixing_Q_network"
representation: "Basic_RNN"
vectorize: "Dummy_StarCraft2"
runner: "StarCraft2_Runner"
on_policy: False

# recurrent settings for Basic_RNN representation
use_recurrent: True
rnn: "GRU"
recurrent_layer_N: 1
fc_hidden_sizes: [64, ]
recurrent_hidden_size: 64
N_recurrent_layers: 1
dropout: 0

representation_hidden_size: [64, ]
q_hidden_size: [64, ] # the units for each hidden layer
activation: "ReLU"

hidden_dim_mixing_net: 32 # hidden units of mixing network
hidden_dim_hyper_net: 32 # hidden units of hyper network

seed: 1
parallels: 1
buffer_size: 5000
batch_size: 32
learning_rate: 0.0007
gamma: 0.99 # discount factor
double_q: True # use double q learning

start_greedy: 1.0
end_greedy: 0.05
decay_step_greedy: 50000
start_training: 1000 # start training after n episodes
running_steps: 2000000 # 2M
train_per_step: False # True: train model per step; False: train model per episode.
training_frequency: 1
sync_frequency: 200

use_grad_clip: False
grad_clip_norm: 0.5

eval_interval: 10000
test_episode: 10
log_dir: "./logs/qmix/"
model_dir: "./models/qmix/"
48 changes: 48 additions & 0 deletions xuance/configs/vdn/sc2/2s3z.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
agent: "VDN" # the learning algorithms_marl
global_state: False
env_name: "StarCraft2"
env_id: "1c3s5z"
fps: 15
policy: "Mixing_Q_network"
representation: "Basic_RNN"
vectorize: "Dummy_StarCraft2"
runner: "StarCraft2_Runner"
on_policy: False

# recurrent settings for Basic_RNN representation
use_recurrent: True
rnn: "GRU"
recurrent_layer_N: 1
fc_hidden_sizes: [64, ]
recurrent_hidden_size: 64
N_recurrent_layers: 1
dropout: 0

representation_hidden_size: [64, ]
q_hidden_size: [64, ] # the units for each hidden layer
activation: "ReLU"

seed: 1
parallels: 1
buffer_size: 5000
batch_size: 32
learning_rate: 0.0007
gamma: 0.99 # discount factor
double_q: True # use double q learning

start_greedy: 1.0
end_greedy: 0.05
decay_step_greedy: 50000
start_training: 1000 # start training after n episodes
running_steps: 2000000 # 2M
train_per_step: False # True: train model per step; False: train model per episode.
training_frequency: 1
sync_frequency: 200

use_grad_clip: False
grad_clip_norm: 0.5

eval_interval: 10000
test_episode: 10
log_dir: "./logs/vdn/"
model_dir: "./models/vdn/"
53 changes: 53 additions & 0 deletions xuance/configs/wqmix/sc2/2s3z.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
agent: "OWQMIX" # choice: CWQMIX, OWQMIX
env_name: "StarCraft2"
env_id: "1c3s5z"
fps: 15
policy: "Weighted_Mixing_Q_network"
representation: "Basic_RNN"
vectorize: "Dummy_StarCraft2"
runner: "StarCraft2_Runner"
on_policy: False

# recurrent settings for Basic_RNN representation
use_recurrent: True
rnn: "GRU"
recurrent_layer_N: 1
fc_hidden_sizes: [64, ]
recurrent_hidden_size: 64
N_recurrent_layers: 1
dropout: 0

representation_hidden_size: [64, ]
q_hidden_size: [64, ] # the units for each hidden layer
activation: "ReLU"
alpha: 0.1

hidden_dim_mixing_net: 32 # hidden units of mixing network
hidden_dim_hyper_net: 64 # hidden units of hyper network

hidden_dim_ff_mix_net: 256 # hidden units of mixing network

seed: 1
parallels: 1
buffer_size: 5000
batch_size: 32
learning_rate: 0.0007
gamma: 0.99 # discount factor
double_q: True # use double q learning

start_greedy: 1.0
end_greedy: 0.05
decay_step_greedy: 50000
start_training: 1000 # start training after n episodes
running_steps: 2000000 # 2M
train_per_step: False # True: train model per step; False: train model per episode.
training_frequency: 1
sync_frequency: 200

use_grad_clip: False
grad_clip_norm: 0.5

eval_interval: 10000
test_episode: 10
log_dir: "./logs/wqmix/"
model_dir: "./models/wqmix/"

0 comments on commit 7ec346a

Please sign in to comment.