-
Notifications
You must be signed in to change notification settings - Fork 2
/
testMultiRobotROS.cpp
148 lines (123 loc) · 4.38 KB
/
testMultiRobotROS.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#include <iostream>
#include <stdlib.h>
#include <vector>
#include <random>
#include <algorithm>
#include <chrono>
#include <Eigen/Eigen>
#include <float.h>
#include "multirobot_stage/NeuroEvo.h"
#include "multirobot_stage/NeuralNet.h"
#include <ros/ros.h>
using std::vector ;
using std::shuffle ;
using namespace Eigen ;
int main(int argc, char **argv)
{
ros::init(argc, argv, "multirobot_learning") ;
std::cout << "Program to run neuro-evolution on a pair of collaboratively exploring robots\n" ;
// Initialise learning domain
// Domain contains 2 NeuroEvo agents, each agent maintains a population of policies which are encoded as neural networks
// Neural networks are initialised according to size of input and output vectors, number of nodes in the hidden layer, and the activation function
size_t nRob = 2 ; // number of robots
size_t nIn = 18 ; // input state vector size
size_t nOut = 10 ; // output action vector size
size_t nHidden = 20 ; // number of hidden neurons
size_t nPop = 15 ; // population size
vector<NeuroEvo *> robotTeam ; // default activation is TANH
for (size_t n = 0; n < nRob; n++){
NeuroEvo * NE = new NeuroEvo(nIn, nOut, nHidden, nPop) ;
robotTeam.push_back(NE) ;
}
// Logging
vector<double> rewardLog ;
double maxR = -DBL_MAX ;
size_t maxTeam = 0 ;
// Container for storing population indices
vector<size_t> tt ;
for (size_t p = 0; p < nPop*2; p++){
tt.push_back(p) ;
}
// Container to store rewards
vector<double> rr(nPop*2,0.0) ;
// Initialise learning loop
size_t nEps = 100 ;
for (size_t i = 0; i < nEps; i++){ // For each learning epoch
// Housekeeping for managing teams
unsigned seed = std::chrono::system_clock::now().time_since_epoch().count() ;
vector< vector<size_t> > teams ;
vector< vector<double> > rewards ;
// Mutate populations (doubles population size)
for (size_t n = 0; n < nRob; n++){
robotTeam[n]->MutatePopulation() ;
// Create randomised teams for this epoch
shuffle (tt.begin(), tt.end(), std::default_random_engine(seed)) ;
teams.push_back(tt) ;
// Initialise reward container for this epoch
rewards.push_back(rr) ;
}
// Evaluate each team
for (size_t j = 0; j < nPop*2; j++){
// Get policies for this team
for (size_t n = 0; n < nRob; n++){
NeuralNet * curNN = robotTeam[n]->GetNNIndex(teams[n][j]) ;
// Reshape weight matrices to single vector
MatrixXd A = curNN->GetWeightsA() ;
vector<double> AA ;
for (int ii = 0; ii < A.rows(); ii++){
for (int jj = 0; jj < A.cols(); jj++){
AA.push_back(A(ii,jj)) ;
}
}
MatrixXd B = curNN->GetWeightsB() ;
vector<double> BB ;
for (int ii = 0; ii < B.rows(); ii++){
for (int jj = 0; jj < B.cols(); jj++){
BB.push_back(B(ii,jj)) ;
}
}
// Write NN policies to rosparams
char buffer[50] ;
sprintf(buffer,"/robot_%lu/A",n) ;
ros::param::set(buffer,AA) ;
sprintf(buffer,"/robot_%lu/B",n) ;
ros::param::set(buffer,BB) ;
}
// Run stage simulation
// Simulation nodes must initialise robots in stage
// Nodes must also read in ROS params for robot control policies
// This implementation expects the reward to be written to a rosparam
// Simulation timer will automatically terminal the episode
system("rosrun multirobot_stage run-multi-robot-explore") ;
// Read out reward
double r ;
ros::param::get("/reward",r) ;
for (size_t n = 0; n < nRob; n++){
rewards[n][teams[n][j]] = r ;
}
if (r > maxR){
maxR = r ;
maxTeam = j ;
}
}
// Record/Evaluate champion team
rewardLog.push_back(maxR) ;
char strA[50] ;
char strB[50] ;
for (size_t n = 0; n < nRob; n++){
sprintf(strA,"weightsA%lu.txt",n) ;
sprintf(strB,"weightsB%lu.txt",n) ;
NeuralNet * bestNN = robotTeam[n]->GetNNIndex(teams[n][maxTeam]) ;
bestNN->OutputNN(strA,strB) ;
}
// Compete (halves population size)
for (size_t n = 0; n < nRob; n++){
robotTeam[n]->EvolvePopulation(rewards[n]) ;
}
}
for (size_t n = 0; n < nRob; n++){
delete robotTeam[n] ;
robotTeam[n] = 0 ;
}
return 0 ;
}