Skip to content

Commit

Permalink
gfootball
Browse files Browse the repository at this point in the history
  • Loading branch information
wenzhangliu committed Nov 9, 2023
1 parent 4716c37 commit ff05395
Show file tree
Hide file tree
Showing 8 changed files with 100 additions and 16 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
.vscode
.DS_Store
logs/
videos/
dist/
.eggs/
xuance.egg-info/
Expand Down
6 changes: 3 additions & 3 deletions benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

def parse_args():
parser = argparse.ArgumentParser("Run benchmark results.")
parser.add_argument("--method", type=str, default="sac")
parser.add_argument("--env", type=str, default="classic_control")
parser.add_argument("--env-id", type=str, default="CartPole-v1")
parser.add_argument("--method", type=str, default="dqn")
parser.add_argument("--env", type=str, default="box2d")
parser.add_argument("--env-id", type=str, default="LunarLander-v2")
parser.add_argument("--seed", type=int, default=1069)
parser.add_argument("--n-steps", type=int, default=16)
parser.add_argument("--test", type=int, default=0)
Expand Down
6 changes: 3 additions & 3 deletions benchmark_marl.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@

def parse_args():
parser = argparse.ArgumentParser("Run an MARL demo.")
parser.add_argument("--method", type=str, default="iql")
parser.add_argument("--method", type=str, default="mappo")
parser.add_argument("--env", type=str, default="football")
parser.add_argument("--env-id", type=str, default="3v1")
parser.add_argument("--seed", type=int, default=10)
parser.add_argument("--seed", type=int, default=2)
parser.add_argument("--test", type=int, default=0)
parser.add_argument("--device", type=str, default="cuda:0")

parser.add_argument("--mixer", type=str, default="VDN")
# parser.add_argument("--mixer", type=str, default="VDN")
return parser.parse_args()


Expand Down
7 changes: 4 additions & 3 deletions demo_marl.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@
def parse_args():
parser = argparse.ArgumentParser("Run an MARL demo.")
parser.add_argument("--method", type=str, default="mappo")
parser.add_argument("--env", type=str, default="sc2")
parser.add_argument("--env-id", type=str, default="3m")
parser.add_argument("--test", type=int, default=0)
parser.add_argument("--env", type=str, default="football")
parser.add_argument("--env-id", type=str, default="3v1")
parser.add_argument("--test", type=int, default=1)
parser.add_argument("--seed", type=int, default=10)
parser.add_argument("--device", type=str, default="cuda:0")
return parser.parse_args()

Expand Down
72 changes: 72 additions & 0 deletions xuance/configs/mappo/football/3v1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
agent: "MAPPO" # the learning algorithms_marl
global_state: True
# environment settings
env_name: "Football"
scenario: "academy_3_vs_1_with_keeper"
use_stacked_frames: False # Whether to use stacked_frames
num_agent: 3
num_adversary: 0
obs_type: "simple115v2" # representation used to build the observation, choices: ["simple115v2", "extracted", "pixels_gray", "pixels"]
rewards_type: "scoring,checkpoints" # comma separated list of rewards to be added
smm_width: 96 # width of super minimap
smm_height: 72 # height of super minimap
fps: 15
policy: "Categorical_MAAC_Policy"
representation: "Basic_RNN"
vectorize: "Subproc_Football"
runner: "Football_Runner"

# recurrent settings for Basic_RNN representation
use_recurrent: True
rnn: "GRU"
recurrent_layer_N: 1
fc_hidden_sizes: [128, 128, 128]
recurrent_hidden_size: 128
N_recurrent_layers: 1
dropout: 0
normalize: "LayerNorm"
initialize: "orthogonal"
gain: 0.01

actor_hidden_size: []
critic_hidden_size: []
activation: "ReLU"

seed: 1
parallels: 50
n_size: 50
n_epoch: 15
n_minibatch: 2
learning_rate: 0.0007 # 7e-4
weight_decay: 0

vf_coef: 1.0
ent_coef: 0.01
clip_range: 0.2
clip_type: 1 # Gradient clip for Mindspore: 0: ms.ops.clip_by_value; 1: ms.nn.ClipByNorm()
gamma: 0.99 # discount factor

# tricks
use_linear_lr_decay: False # if use linear learning rate decay
end_factor_lr_decay: 0.5
use_global_state: True # if use global state to calculate values
use_grad_norm: True # gradient normalization
max_grad_norm: 10.0
use_value_clip: True # limit the value range
value_clip_range: 0.2
use_value_norm: True # use running mean and std to normalize rewards.
use_huber_loss: True # True: use huber loss; False: use MSE loss.
huber_delta: 10.0
use_advnorm: True # use advantage normalization.
use_gae: True # use GAE trick to calculate returns.
gae_lambda: 0.95

start_training: 1
running_steps: 25000000
training_frequency: 1

eval_interval: 250000
test_episode: 50
log_dir: "./logs/mappo/"
model_dir: "./models/mappo/"
videos_dir: "./videos/mappo/"
2 changes: 1 addition & 1 deletion xuance/environment/football/gfootball_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def close(self):
self.env.close()

def render(self):
return self.env.render()
return self.env.get_frame()

def reset(self):
obs, info = self.env.reset()
Expand Down
15 changes: 11 additions & 4 deletions xuance/environment/football/raw_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ def __init__(self, args):
extra_players = None
other_config_options = {}
self.env_id = GFOOTBALL_ENV_ID[args.env_id]
if args.render:
if args.test:
write_full_episode_dumps = True
render = True
self.render = True
write_video = True
else:
write_full_episode_dumps = False
render = False
self.render = False
write_video = False

self.env = football_env.create_environment(
Expand All @@ -29,7 +29,7 @@ def __init__(self, args):
rewards=args.rewards_type,
write_goal_dumps=write_goal_dumps,
write_full_episode_dumps=write_full_episode_dumps,
render=render,
render=self.render,
write_video=write_video,
dump_frequency=dump_frequency,
logdir=args.videos_dir,
Expand Down Expand Up @@ -73,6 +73,11 @@ def step(self, action):
truncated = False
return obs, reward, terminated, truncated, info

def get_frame(self):
original_obs = self.env._env._observation
frame = original_obs["frame"] if self.render else []
return frame

def state(self):
def do_flatten(obj):
"""Run flatten on either python list or numpy array."""
Expand All @@ -97,6 +102,8 @@ def do_flatten(obj):
game_mode = [0] * 7
game_mode[v] = 1
state.extend(game_mode)
elif k == "frame":
pass
else:
state.extend(do_flatten(v))
return state
7 changes: 5 additions & 2 deletions xuance/torch/runners/runner_football.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@
class Football_Runner(SC2_Runner):
def __init__(self, args):
self.num_agents, self.num_adversaries = 0, 0
args.render = False
if args.test:
args.parallels = 1
args.render = True
else:
args.render = False
super(Football_Runner, self).__init__(args)

def get_agent_num(self):
Expand Down Expand Up @@ -41,7 +45,6 @@ def run_episodes(self, test_mode=False):
actions_dict = self.get_actions(obs_n, available_actions, rnn_hidden, rnn_hidden_critic,
state=state, test_mode=test_mode)
next_obs_n, next_state, rewards, terminated, truncated, info = self.envs.step(actions_dict['actions_n'])
# self.envs.render(self.args.render_mode)
envs_done = self.envs.buf_done
rnn_hidden, rnn_hidden_critic = actions_dict['rnn_hidden'], actions_dict['rnn_hidden_critic']

Expand Down

0 comments on commit ff05395

Please sign in to comment.