2s3z

agi-brain · Oct 25, 2023 · 7ec346a · 7ec346a
1 parent e2ef0a1
commit 7ec346a
Show file tree

Hide file tree

Showing 7 changed files with 382 additions and 0 deletions.
diff --git a/xuance/configs/coma/sc2/2s3z.yaml b/xuance/configs/coma/sc2/2s3z.yaml
@@ -0,0 +1,57 @@
+agent: "COMA"  # the learning algorithms_marl
+env_name: "StarCraft2"
+env_id: "1c3s5z"
+fps: 15
+policy: "Categorical_COMA_Policy"
+representation: "Basic_RNN"
+vectorize: "Dummy_StarCraft2"
+runner: "StarCraft2_Runner"
+
+use_recurrent: True
+rnn: "GRU"
+recurrent_layer_N: 1
+fc_hidden_sizes: [64, ]
+recurrent_hidden_size: 64
+N_recurrent_layers: 1
+dropout: 0
+normalize: "LayerNorm"
+initialize: "orthogonal"
+gain: 0.01
+
+actor_hidden_size: [64, ]
+critic_hidden_size: [128, 128]
+activation: "ReLU"
+
+seed: 1
+parallels: 1
+n_size: 128
+n_epoch: 15
+n_minibatch: 1
+learning_rate_actor: 0.0007
+learning_rate_critic: 0.0007
+
+clip_grad: 10
+clip_type: 1  # Gradient clip for Mindspore: 0: ms.ops.clip_by_value; 1: ms.nn.ClipByNorm()
+gamma: 0.95  # discount factor
+td_lambda: 0.1
+
+start_greedy: 0.5
+end_greedy: 0.01
+decay_step_greedy: 2500000
+sync_frequency: 200
+
+use_global_state: True  # if use global state to replace merged observations
+use_advnorm: True
+use_gae: True
+gae_lambda: 0.95
+
+start_training: 1
+running_steps: 2000000
+train_per_step: True
+training_frequency: 1
+
+test_steps: 10000
+eval_interval: 10000
+test_episode: 10
+log_dir: "./logs/coma/"
+model_dir: "./models/coma/"
diff --git a/xuance/configs/ippo/sc2/2s3z.yaml b/xuance/configs/ippo/sc2/2s3z.yaml
@@ -0,0 +1,63 @@
+agent: "IPPO"
+env_name: "StarCraft2"
+env_id: "1c3s5z"
+fps: 15
+policy: "Categorical_MAAC_Policy"
+representation: "Basic_RNN"
+vectorize: "Dummy_StarCraft2"
+runner: "StarCraft2_Runner"
+
+# recurrent settings for Basic_RNN representation
+use_recurrent: True
+rnn: "GRU"
+recurrent_layer_N: 1
+fc_hidden_sizes: [64, 64, 64]
+recurrent_hidden_size: 64
+N_recurrent_layers: 1
+dropout: 0
+normalize: "LayerNorm"
+initialize: "orthogonal"
+gain: 0.01
+
+actor_hidden_size: []
+critic_hidden_size: []
+activation: "ReLU"
+
+seed: 1
+parallels: 1
+n_size: 128
+n_epoch: 15
+n_minibatch: 1
+learning_rate: 0.0007  # 7e-4
+weight_decay: 0
+
+vf_coef: 1.0
+ent_coef: 0.01
+target_kl: 0.25
+clip_range: 0.2
+clip_type: 1  # Gradient clip for Mindspore: 0: ms.ops.clip_by_value; 1: ms.nn.ClipByNorm()
+gamma: 0.99  # discount factor
+
+# tricks
+use_linear_lr_decay: False  # if use linear learning rate decay
+end_factor_lr_decay: 0.5
+use_global_state: False  # if use global state to replace joint observations
+use_grad_norm: True  # gradient normalization
+max_grad_norm: 10.0
+use_value_clip: True  # limit the value range
+value_clip_range: 0.2
+use_value_norm: True  # use running mean and std to normalize rewards.
+use_huber_loss: True  # True: use huber loss; False: use MSE loss.
+huber_delta: 10.0
+use_advnorm: True  # use advantage normalization.
+use_gae: True  # use GAE trick to calculate returns.
+gae_lambda: 0.95
+
+start_training: 1
+running_steps: 2000000
+training_frequency: 1
+
+eval_interval: 10000
+test_episode: 10
+log_dir: "./logs/ippo/"
+model_dir: "./models/ippo/"
diff --git a/xuance/configs/iql/sc2/2s3z.yaml b/xuance/configs/iql/sc2/2s3z.yaml
@@ -0,0 +1,47 @@
+agent: "IQL"  # the learning algorithms_marl
+env_name: "StarCraft2"
+env_id: "1c3s5z"
+fps: 15
+policy: "Basic_Q_network_marl"
+representation: "Basic_RNN"
+vectorize: "Dummy_StarCraft2"
+runner: "StarCraft2_Runner"
+on_policy: False
+
+# recurrent settings for Basic_RNN representation
+use_recurrent: True
+rnn: "GRU"
+recurrent_layer_N: 1
+fc_hidden_sizes: [64, ]
+recurrent_hidden_size: 64
+N_recurrent_layers: 1
+dropout: 0
+
+representation_hidden_size: [64, ]
+q_hidden_size: [64, ]  # the units for each hidden layer
+activation: "ReLU"
+
+seed: 1
+parallels: 1
+buffer_size: 5000
+batch_size: 32
+learning_rate: 0.0007
+gamma: 0.99  # discount factor
+double_q: True  # use double q learning
+
+start_greedy: 1.0
+end_greedy: 0.05
+decay_step_greedy: 50000
+start_training: 1000  # start training after n episodes
+running_steps: 2000000  # 2M
+train_per_step: False  # True: train model per step; False: train model per episode.
+training_frequency: 1
+sync_frequency: 200
+
+use_grad_clip: False
+grad_clip_norm: 0.5
+
+eval_interval: 10000
+test_episode: 10
+log_dir: "./logs/iql/"
+model_dir: "./models/iql/"
diff --git a/xuance/configs/mappo/sc2/2s3z.yaml b/xuance/configs/mappo/sc2/2s3z.yaml
@@ -0,0 +1,63 @@
+agent: "MAPPO"
+env_name: "StarCraft2"
+env_id: "1c3s5z"
+fps: 15
+policy: "Categorical_MAAC_Policy"
+representation: "Basic_RNN"
+vectorize: "Dummy_StarCraft2"
+runner: "StarCraft2_Runner"
+
+# recurrent settings for Basic_RNN representation
+use_recurrent: True
+rnn: "GRU"
+recurrent_layer_N: 1
+fc_hidden_sizes: [64, 64, 64]
+recurrent_hidden_size: 64
+N_recurrent_layers: 1
+dropout: 0
+normalize: "LayerNorm"
+initialize: "orthogonal"
+gain: 0.01
+
+actor_hidden_size: []
+critic_hidden_size: []
+activation: "ReLU"
+
+seed: 1
+parallels: 1
+n_size: 128
+n_epoch: 15
+n_minibatch: 1
+learning_rate: 0.0007  # 7e-4
+weight_decay: 0
+
+vf_coef: 1.0
+ent_coef: 0.01
+target_kl: 0.25
+clip_range: 0.2
+clip_type: 1  # Gradient clip for Mindspore: 0: ms.ops.clip_by_value; 1: ms.nn.ClipByNorm()
+gamma: 0.99  # discount factor
+
+# tricks
+use_linear_lr_decay: False  # if use linear learning rate decay
+end_factor_lr_decay: 0.5
+use_global_state: False  # if use global state to replace joint observations
+use_grad_norm: True  # gradient normalization
+max_grad_norm: 10.0
+use_value_clip: True  # limit the value range
+value_clip_range: 0.2
+use_value_norm: True  # use running mean and std to normalize rewards.
+use_huber_loss: True  # True: use huber loss; False: use MSE loss.
+huber_delta: 10.0
+use_advnorm: True  # use advantage normalization.
+use_gae: True  # use GAE trick to calculate returns.
+gae_lambda: 0.95
+
+start_training: 1
+running_steps: 2000000
+training_frequency: 1
+
+eval_interval: 10000
+test_episode: 10
+log_dir: "./logs/mappo/"
+model_dir: "./models/mappo/"
diff --git a/xuance/configs/qmix/sc2/2s3z.yaml b/xuance/configs/qmix/sc2/2s3z.yaml
@@ -0,0 +1,51 @@
+agent: "QMIX"  # the learning algorithms_marl
+global_state: True
+env_name: "StarCraft2"
+env_id: "1c3s5z"
+fps: 15
+policy: "Mixing_Q_network"
+representation: "Basic_RNN"
+vectorize: "Dummy_StarCraft2"
+runner: "StarCraft2_Runner"
+on_policy: False
+
+# recurrent settings for Basic_RNN representation
+use_recurrent: True
+rnn: "GRU"
+recurrent_layer_N: 1
+fc_hidden_sizes: [64, ]
+recurrent_hidden_size: 64
+N_recurrent_layers: 1
+dropout: 0
+
+representation_hidden_size: [64, ]
+q_hidden_size: [64, ]  # the units for each hidden layer
+activation: "ReLU"
+
+hidden_dim_mixing_net: 32  # hidden units of mixing network
+hidden_dim_hyper_net: 32  # hidden units of hyper network
+
+seed: 1
+parallels: 1
+buffer_size: 5000
+batch_size: 32
+learning_rate: 0.0007
+gamma: 0.99  # discount factor
+double_q: True  # use double q learning
+
+start_greedy: 1.0
+end_greedy: 0.05
+decay_step_greedy: 50000
+start_training: 1000  # start training after n episodes
+running_steps: 2000000  # 2M
+train_per_step: False  # True: train model per step; False: train model per episode.
+training_frequency: 1
+sync_frequency: 200
+
+use_grad_clip: False
+grad_clip_norm: 0.5
+
+eval_interval: 10000
+test_episode: 10
+log_dir: "./logs/qmix/"
+model_dir: "./models/qmix/"
diff --git a/xuance/configs/vdn/sc2/2s3z.yaml b/xuance/configs/vdn/sc2/2s3z.yaml
@@ -0,0 +1,48 @@
+agent: "VDN"  # the learning algorithms_marl
+global_state: False
+env_name: "StarCraft2"
+env_id: "1c3s5z"
+fps: 15
+policy: "Mixing_Q_network"
+representation: "Basic_RNN"
+vectorize: "Dummy_StarCraft2"
+runner: "StarCraft2_Runner"
+on_policy: False
+
+# recurrent settings for Basic_RNN representation
+use_recurrent: True
+rnn: "GRU"
+recurrent_layer_N: 1
+fc_hidden_sizes: [64, ]
+recurrent_hidden_size: 64
+N_recurrent_layers: 1
+dropout: 0
+
+representation_hidden_size: [64, ]
+q_hidden_size: [64, ]  # the units for each hidden layer
+activation: "ReLU"
+
+seed: 1
+parallels: 1
+buffer_size: 5000
+batch_size: 32
+learning_rate: 0.0007
+gamma: 0.99  # discount factor
+double_q: True  # use double q learning
+
+start_greedy: 1.0
+end_greedy: 0.05
+decay_step_greedy: 50000
+start_training: 1000  # start training after n episodes
+running_steps: 2000000  # 2M
+train_per_step: False  # True: train model per step; False: train model per episode.
+training_frequency: 1
+sync_frequency: 200
+
+use_grad_clip: False
+grad_clip_norm: 0.5
+
+eval_interval: 10000
+test_episode: 10
+log_dir: "./logs/vdn/"
+model_dir: "./models/vdn/"
diff --git a/xuance/configs/wqmix/sc2/2s3z.yaml b/xuance/configs/wqmix/sc2/2s3z.yaml
@@ -0,0 +1,53 @@
+agent: "OWQMIX"  # choice: CWQMIX, OWQMIX
+env_name: "StarCraft2"
+env_id: "1c3s5z"
+fps: 15
+policy: "Weighted_Mixing_Q_network"
+representation: "Basic_RNN"
+vectorize: "Dummy_StarCraft2"
+runner: "StarCraft2_Runner"
+on_policy: False
+
+# recurrent settings for Basic_RNN representation
+use_recurrent: True
+rnn: "GRU"
+recurrent_layer_N: 1
+fc_hidden_sizes: [64, ]
+recurrent_hidden_size: 64
+N_recurrent_layers: 1
+dropout: 0
+
+representation_hidden_size: [64, ]
+q_hidden_size: [64, ]  # the units for each hidden layer
+activation: "ReLU"
+alpha: 0.1
+
+hidden_dim_mixing_net: 32  # hidden units of mixing network
+hidden_dim_hyper_net: 64  # hidden units of hyper network
+
+hidden_dim_ff_mix_net: 256  # hidden units of mixing network
+
+seed: 1
+parallels: 1
+buffer_size: 5000
+batch_size: 32
+learning_rate: 0.0007
+gamma: 0.99  # discount factor
+double_q: True  # use double q learning
+
+start_greedy: 1.0
+end_greedy: 0.05
+decay_step_greedy: 50000
+start_training: 1000  # start training after n episodes
+running_steps: 2000000  # 2M
+train_per_step: False  # True: train model per step; False: train model per episode.
+training_frequency: 1
+sync_frequency: 200
+
+use_grad_clip: False
+grad_clip_norm: 0.5
+
+eval_interval: 10000
+test_episode: 10
+log_dir: "./logs/wqmix/"
+model_dir: "./models/wqmix/"