2s3z

agi-brain · Oct 25, 2023 · 3b62c30 · 3b62c30
1 parent 7ec346a
commit 3b62c30
Show file tree

Hide file tree

Showing 9 changed files with 121 additions and 7 deletions.
diff --git a/xuance/configs/coma/sc2/2s3z.yaml b/xuance/configs/coma/sc2/2s3z.yaml
@@ -1,6 +1,6 @@
 agent: "COMA"  # the learning algorithms_marl
 env_name: "StarCraft2"
-env_id: "1c3s5z"
+env_id: "2s3z"
 fps: 15
 policy: "Categorical_COMA_Policy"
 representation: "Basic_RNN"

diff --git a/xuance/configs/dcg/sc2/1c3s5z.yaml b/xuance/configs/dcg/sc2/1c3s5z.yaml
@@ -0,0 +1,57 @@
+agent: "DCG"  # Options: DCG, DCG_S
+env_name: "StarCraft2"
+env_id: "1c3s5z"
+fps: 15
+policy: "DCG_policy"
+representation: "Basic_RNN"
+vectorize: "Dummy_StarCraft2"
+runner: "StarCraft2_Runner"
+on_policy: False
+
+# recurrent settings for Basic_RNN representation
+use_recurrent: True
+rnn: "GRU"
+recurrent_layer_N: 1
+fc_hidden_sizes: [64, ]
+recurrent_hidden_size: 64
+N_recurrent_layers: 1
+dropout: 0
+
+representation_hidden_size: [64, ]
+q_hidden_size: [64, ]  # the units for each hidden layer
+hidden_utility_dim: 64  # hidden units of the utility function
+hidden_payoff_dim: 64  # hidden units of the payoff function
+bias_net: "Basic_MLP"
+hidden_bias_dim: [64, ]  # hidden units of the bias network with global states as input
+activation: "ReLU"
+
+low_rank_payoff: False  # low-rank approximation of payoff function
+payoff_rank: 5  # the rank K in the paper
+graph_type: "FULL"  # specific type of the coordination graph
+n_msg_iterations: 8  # number of iterations for message passing during belief propagation
+msg_normalized: True  # Message normalization during greedy action selection (Kok and Vlassis, 2006)
+
+seed: 1
+parallels: 1
+buffer_size: 5000
+batch_size: 32
+learning_rate: 0.0007
+gamma: 0.99  # discount factor
+double_q: True  # use double q learning
+
+start_greedy: 1.0
+end_greedy: 0.05
+decay_step_greedy: 50000
+start_training: 1000  # start training after n episodes
+running_steps: 2000000  # 2M
+train_per_step: False  # True: train model per step; False: train model per episode.training_frequency: 1
+training_frequency: 1
+sync_frequency: 200
+
+use_grad_clip: False
+grad_clip_norm: 0.5
+
+eval_interval: 10000
+test_episode: 10
+log_dir: "./logs/dcg/"
+model_dir: "./models/dcg/"
diff --git a/xuance/configs/dcg/sc2/2s3z.yaml b/xuance/configs/dcg/sc2/2s3z.yaml
@@ -0,0 +1,57 @@
+agent: "DCG"  # Options: DCG, DCG_S
+env_name: "StarCraft2"
+env_id: "2s3z"
+fps: 15
+policy: "DCG_policy"
+representation: "Basic_RNN"
+vectorize: "Dummy_StarCraft2"
+runner: "StarCraft2_Runner"
+on_policy: False
+
+# recurrent settings for Basic_RNN representation
+use_recurrent: True
+rnn: "GRU"
+recurrent_layer_N: 1
+fc_hidden_sizes: [64, ]
+recurrent_hidden_size: 64
+N_recurrent_layers: 1
+dropout: 0
+
+representation_hidden_size: [64, ]
+q_hidden_size: [64, ]  # the units for each hidden layer
+hidden_utility_dim: 64  # hidden units of the utility function
+hidden_payoff_dim: 64  # hidden units of the payoff function
+bias_net: "Basic_MLP"
+hidden_bias_dim: [64, ]  # hidden units of the bias network with global states as input
+activation: "ReLU"
+
+low_rank_payoff: False  # low-rank approximation of payoff function
+payoff_rank: 5  # the rank K in the paper
+graph_type: "FULL"  # specific type of the coordination graph
+n_msg_iterations: 8  # number of iterations for message passing during belief propagation
+msg_normalized: True  # Message normalization during greedy action selection (Kok and Vlassis, 2006)
+
+seed: 1
+parallels: 1
+buffer_size: 5000
+batch_size: 32
+learning_rate: 0.0007
+gamma: 0.99  # discount factor
+double_q: True  # use double q learning
+
+start_greedy: 1.0
+end_greedy: 0.05
+decay_step_greedy: 50000
+start_training: 1000  # start training after n episodes
+running_steps: 2000000  # 2M
+train_per_step: False  # True: train model per step; False: train model per episode.training_frequency: 1
+training_frequency: 1
+sync_frequency: 200
+
+use_grad_clip: False
+grad_clip_norm: 0.5
+
+eval_interval: 10000
+test_episode: 10
+log_dir: "./logs/dcg/"
+model_dir: "./models/dcg/"
diff --git a/xuance/configs/ippo/sc2/2s3z.yaml b/xuance/configs/ippo/sc2/2s3z.yaml
@@ -1,6 +1,6 @@
 agent: "IPPO"
 env_name: "StarCraft2"
-env_id: "1c3s5z"
+env_id: "2s3z"
 fps: 15
 policy: "Categorical_MAAC_Policy"
 representation: "Basic_RNN"

diff --git a/xuance/configs/iql/sc2/2s3z.yaml b/xuance/configs/iql/sc2/2s3z.yaml
@@ -1,6 +1,6 @@
 agent: "IQL"  # the learning algorithms_marl
 env_name: "StarCraft2"
-env_id: "1c3s5z"
+env_id: "2s3z"
 fps: 15
 policy: "Basic_Q_network_marl"
 representation: "Basic_RNN"

diff --git a/xuance/configs/mappo/sc2/2s3z.yaml b/xuance/configs/mappo/sc2/2s3z.yaml
@@ -1,6 +1,6 @@
 agent: "MAPPO"
 env_name: "StarCraft2"
-env_id: "1c3s5z"
+env_id: "2s3z"
 fps: 15
 policy: "Categorical_MAAC_Policy"
 representation: "Basic_RNN"

diff --git a/xuance/configs/qmix/sc2/2s3z.yaml b/xuance/configs/qmix/sc2/2s3z.yaml
@@ -1,7 +1,7 @@
 agent: "QMIX"  # the learning algorithms_marl
 global_state: True
 env_name: "StarCraft2"
-env_id: "1c3s5z"
+env_id: "2s3z"
 fps: 15
 policy: "Mixing_Q_network"
 representation: "Basic_RNN"

diff --git a/xuance/configs/vdn/sc2/2s3z.yaml b/xuance/configs/vdn/sc2/2s3z.yaml
@@ -1,7 +1,7 @@
 agent: "VDN"  # the learning algorithms_marl
 global_state: False
 env_name: "StarCraft2"
-env_id: "1c3s5z"
+env_id: "2s3z"
 fps: 15
 policy: "Mixing_Q_network"
 representation: "Basic_RNN"

diff --git a/xuance/configs/wqmix/sc2/2s3z.yaml b/xuance/configs/wqmix/sc2/2s3z.yaml
@@ -1,6 +1,6 @@
 agent: "OWQMIX"  # choice: CWQMIX, OWQMIX
 env_name: "StarCraft2"
-env_id: "1c3s5z"
+env_id: "2s3z"
 fps: 15
 policy: "Weighted_Mixing_Q_network"
 representation: "Basic_RNN"