-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgenetic.py
153 lines (98 loc) · 3.5 KB
/
genetic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import numpy as np
import gym
def single_relu(x):
if x < 0:
return 0
else:
return x
relu = np.vectorize(single_relu)
class Agent:
def __init__(self, std=0.5, verbose=False):
self.verbose = verbose
self.std = std
self.reward = 0
self.weights1 = np.random.normal(0, std, [4, 4])
self.bias1 = .1 * np.ones(4)
self.weights2 = np.random.normal(0, std, [2, 4])
self.bias2 = .1 * np.ones(2)
def print(self, text=''):
if self.verbose:
print(text)
def mutate(self, std=None):
if not std:
std = self.std
weights1 = self.weights1 + np.random.normal(0, std, [4, 4])
weights2 = self.weights2 + np.random.normal(0, std, [2, 4])
bias1 = self.bias1 + np.random.normal(0, std, 4)
bias2 = self.bias2 + np.random.normal(0, std, 2)
return weights1, bias1, weights2, bias2
def offspring(self, std=None):
if not std:
std = self.std
weights1, bias1, weights2, bias2 = self.mutate(std)
agent = Agent(std)
agent.weights1 = weights1
agent.weights2 = weights2
agent.bias1 = bias1
agent.bias2 = bias2
return agent
def action(self, observation):
"""
The agent's "brain" is consists of a two-layer neural network, which is fed the environment's observation.
:param observation: the environment's observation output.
:return: the action that has the highest score.
"""
result = np.dot(self.weights1, observation)
self.print(result)
result += self.bias1
self.print(result)
result = relu(result)
self.print(result)
result = np.dot(self.weights2, result)
self.print(result)
result += self.bias2
self.print(result)
return np.argmax(result)
def add_reward(self, reward):
self.reward += reward
def reset(self):
self.reward = 0
class Generation:
def __init__(self, n=10, std=.1, env=None, verbose=False):
self.generation = 0
self.std = std
self.n = n
self.agents = [Agent(std=std, verbose=verbose) for _ in range(n)]
if env is None:
self.env = gym.make('CartPole-v0')
else:
self.env = env
def reset(self):
for agent in self.agents:
agent.reset()
def select(self, proportion=.3, std=None):
p = np.array([agent.reward for agent in self.agents])
p /= p.sum()
agents = np.random.choice(self.agents, size=int(proportion*self.n), replace=False, p=p)
p = np.array([agent.reward for agent in agents])
p /= p.sum()
n = self.n - int(proportion*self.n)
if std is None:
std = self.std
self.agents = [agent for agent in agents] + \
[agent.offspring(std) for agent in np.random.choice(agents, n, p=p)]
def simulation_step(self, n=100):
for agent in self.agents:
observation = self.env.reset()
for t in range(n):
observation, reward, done, info = self.env.step(agent.action(observation))
agent.add_reward(reward)
if done:
break
self.generation += 1
return np.array([agent.reward for agent in self.agents])
def simulation(self, n):
for i in range(n):
self.reset()
self.simulation_step()
self.select()