-
Notifications
You must be signed in to change notification settings - Fork 0
/
two_player_game2.py
65 lines (58 loc) · 2.02 KB
/
two_player_game2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import numpy as np
class two_player_game2():
max_d = 3
max_B = 10
def __init__(self, ph=0.55, pl=0.45, ch=1, R=1000, a=2, p=0.2):
###Parameters that can be changed while initializing###
self.ph = ph
self.pl = pl
self.ch = ch
self.R = R
self.a = a
self.p = p
#Other attributes to track (d, B define state of the environment)
self.d = 0
self.B = self.max_B
self.reward = 0.0
self.is_terminal=False
def step(self, action):
'''Take the specified action and return the next state, reward, and
whether the game is finished'''
###Determine whether or not player wins depending on action taken##
if action == 1:
win = np.random.binomial(1, self.pl)
elif action == 0:
win = np.random.binomial(1, self.ph)
self.B -= self.ch #Decrement player energy
if self.B < 0: #Reset to 0 if out of energy
self.B = 0
else:
assert False, "Actions should be in the range of (0,2)"
#Add win to winning player's tally
if win:
self.d +=1
else:
self.d -=1
#Add back 'a' energy with probability 'p'
if np.random.binomial(1, self.p):
self.B += self.a
if self.B > self.max_B: #Cap energy at B
self.B = self.max_B
#Determine if player has won d rounds more than opponent
if self.d == self.max_d:
self.is_terminal = True
self.reward += self.R
elif self.d == -self.max_d:
self.is_terminal = True
#Transition to next state
return [self.d, self.B], self.reward, self.is_terminal
def reset(self):
'''Return environment to its initial state'''
self.reward = 0
self.d = 0
self.B = self.max_B
self.is_terminal = False
return [self.d, self.B]
def get_ch(self):
'''Getter method for cost of high effort'''
return self.ch