-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreward_driven_client.py
170 lines (135 loc) · 6.23 KB
/
reward_driven_client.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import numpy as np
import subprocess
import operator
from time import time
from core.communication import GameClient
from core.logic import GameLogicDelegate
from core.util import FieldState, PlayerColor, Direction, Move
from core.state import GameSettings, GameResult, GameResultCause, GameState
class RewardDrivenClient(GameLogicDelegate):
def __init__(self, host, port, reservation, autoplay=False):
# members from GameLogicDelegate
super(GameLogicDelegate).__init__()
self.reset_callback = None
self.result = None
self.cause = None
self.autoplay = autoplay
self.game_client = None
self.host = host
self.port = port
self.reservation = reservation
self.opponents_executable = "java -jar ../piranhas-not-so-simple-client-19.2.1.jar --host {host} --port {port}"
# numpy random object
self.np_random = None
def make_new_game(self, start_opponent=False):
self.game_client = GameClient(self.host, self.port, self)
self.game_client.start()
self.game_client.join(self.reservation)
if start_opponent or self.autoplay:
# start opponent
subprocess.Popen(
self.opponents_executable.format(host=self.host, port=self.port),
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
shell=True
)
self.game_client.wait_until_stopped()
# Overridden methods inherited from GameLogicDelegate
def onGameStateUpdate(self, game_state):
super().onGameStateUpdate(game_state)
self.currentGameState = GameState.copy(game_state)
def onMoveRequest(self):
super().onMoveRequest()
start_time = time()
if self.currentGameState is None:
print('[env] there is no field')
return None
our_fishes = self.currentGameState.get_fishes(GameSettings.ourColor)
estimated_rewards = {} # dict of (move, reward)
for our_fish in our_fishes:
for our_dir in Direction:
our_move = Move(our_fish[0], our_fish[1], our_dir)
# game_state = self.currentGameState.apply(our_move)
reward, done, game_state = self.currentGameState.estimate_reward(our_move, opponent_did_move=False)
if game_state is None:
# this move was invalid
continue
estimated_rewards[our_move] = (reward, done, game_state)
estimated_rewards_sorted = sorted(estimated_rewards.items(), key=lambda x: -x[1][0])
estimated_rewards_opponent = {}
time_exceeded = False
for move, (reward, done, game_state) in estimated_rewards_sorted:
possible_rewards = []
their_fishes = game_state.get_fishes(GameSettings.ourColor.otherColor())
for their_fish in their_fishes:
for their_dir in Direction:
their_move = Move(their_fish[0], their_fish[1], their_dir)
next_game_state = game_state.apply(their_move)
if next_game_state is None:
# this move was invalid
continue
reward, done, _ = self.currentGameState.estimate_reward(next_game_state)
possible_rewards.append(reward)
if time() - start_time > 1.4:
time_exceeded = True
break
if time_exceeded:
break
if time_exceeded:
break
possible_rewards = np.array(possible_rewards)
estimated_rewards_opponent[move] = (possible_rewards.mean(), possible_rewards.max(), possible_rewards.min())
if len(estimated_rewards_opponent) >= 3:
estimated_rewards = estimated_rewards_opponent
else:
for move in estimated_rewards:
reward = estimated_rewards[move][0]
estimated_rewards[move] = (reward, reward, reward)
worst_case_move = None
highest_worst_case_reward = None
best_case_move = None
highest_best_case_reward = None
typical_move = None
highest_typical_reward = None
for move, (typical_reward, best_case_reward, worst_case_reward) in estimated_rewards.items():
if highest_typical_reward is None or typical_reward > highest_typical_reward:
typical_move = move
highest_typical_reward = typical_reward
if highest_best_case_reward is None or best_case_reward > highest_best_case_reward:
best_case_move = move
highest_best_case_reward = best_case_reward
if highest_worst_case_reward is None or worst_case_reward > highest_worst_case_reward:
worst_case_move = move
highest_worst_case_reward = worst_case_reward
print(
'''[env] Sending move after {:.3f} seconds. Expected Reward:
Typical: {:10.2f} {}
Best Case: {:10.2f} {}
-> Worst Case: {:10.2f} {}
'''.format(
time()-start_time,
highest_typical_reward, typical_move,
highest_best_case_reward, best_case_move,
highest_worst_case_reward, worst_case_move
))
return worst_case_move
def onGameResult(self, result, cause, description):
print("[env] Received gameResult '({}, {})'".format(result, cause))
self.result = result
self.cause = cause
if self.autoplay:
self.make_new_game()
return True
def calc_reward(self, previous_game_state):
if self.result == GameResult.WON and \
self.cause == GameResultCause.REGULAR:
return 100.0, True
elif self.result == GameResult.LOST and \
(self.cause == GameResultCause.REGULAR or
self.cause == GameResultCause.RULE_VIOLATION):
return -100.0, True
estimated_reward, done = previous_game_state.estimate_reward(self.currentGameState)
return estimated_reward, done or self.result is not None
if __name__ == "__main__":
client = RewardDrivenClient('127.0.0.1', 13050, None, True)
client.make_new_game()