-
Notifications
You must be signed in to change notification settings - Fork 0
/
MountainCar.py
73 lines (55 loc) · 2.12 KB
/
MountainCar.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import numpy as np
import math
class MountainCar(object):
def __init__(self, start, goal, Xrange, Vrange):
self.start = start
self.goal = goal
self.Xrange = Xrange
self.Vrange = Vrange
self.num_actions = 3
def _DoAction(self, action):
# MountainCarDoAction: executes the action (a) into the mountain car
# a: is the force to be applied to the car
# x: is the vector containning the position and speed of the car
# xp: is the vector containing the new position and velocity of the car
position = self.state[0]
speed = self.state[1]
# bounds for position
bpleft = self.Xrange[0]
# bounds for speed
bsleft = self.Vrange[0]
bsright = self.Vrange[1]
speedt1 = speed + (0.001 * (action - 1)) + (-0.0025 * math.cos(3.0 * position))
speedt1 = speedt1 * 0.999 # thermodynamic law, for a more real system with friction.
if speedt1 < bsleft:
speedt1 = bsleft
elif speedt1 > bsright:
speedt1 = bsright
post1 = position + speedt1
if post1 <= bpleft:
post1 = bpleft
speedt1 = 0.0
xp = np.array([post1, speedt1])
self.state = xp
def _GetReward(self):
# MountainCarGetReward returns the reward at the current state
# x: a vector of position and velocity of the car
# r: the returned reward.
# f: true if the car reached the goal, otherwise f is false
position = self.state[0]
# bound for position; the goal is to reach position = 0.45
bpright = self.goal
r = -1
f = False
if position >= bpright:
r = 100
f = True
return r, f
def act(self, action):
self._DoAction(action)
reward, game_over = self._GetReward()
return self.observe(), reward, game_over
def observe(self):
return self.state.reshape((1, -1))
def reset(self):
self.state = np.array([self.start[0], self.start[1]])