-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathsubmission.py
73 lines (50 loc) · 1.82 KB
/
submission.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import yaml
import numpy as np
import torch
from football_ikki import Environment, Action, feature_from_states, OBS_TEMPLATE, INFO_TEMPLATE
from handyrl_core.model import load_model
model_path = 'models/1679.pth'
with open('config.yaml') as f:
config = yaml.safe_load(f)
env = Environment(config['env_args'])
model = load_model(env.net()(env), model_path)
model.eval()
p, v, _, _ = model.inference(feature_from_states([[{'observation': OBS_TEMPLATE, 'action': [0]}, None]], INFO_TEMPLATE, 0), None)
print(p)
print(v)
def output_think(env, obs, actions, p, v, r):
pmask = np.ones_like(p)
pmask[actions] = 0
p = p - pmask * 1e32
def softmax(x):
x = np.exp(x - np.max(x, axis=-1))
return x / x.sum(axis=-1)
sticky_actions = obs['players_raw'][0]['sticky_actions']
print(sticky_actions)
print(actions)
print((softmax(p) * 1000).astype(int))
print(v)
print(r)
prev_action = 0
reserved_action = None
def agent(obs):
global prev_action, reserved_action
info = [{'observation': obs, 'action': [prev_action]}, None]
env.play_info(info)
print('step %d' % len(env.states))
x = env.observation(0)
p, v, r, _ = model.inference(x, None)
actions = env.legal_actions(0)
output_think(env, obs, actions, p, v, r)
ap_list = sorted([(a, p[a]) for a in actions], key=lambda x: -x[1])
# you need return a list contains your single action(a int type number from [1, 18])
# be ware of your model output might be a float number, so make sure return a int type number.
action = ap_list[0][0]
if reserved_action is not None:
prev_action = reserved_action
reserved_action = None
print('###RESERVED###')
else:
# split action
prev_action, reserved_action = env.special_to_actions(action)
return [prev_action]