-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcalcReward.m
69 lines (61 loc) · 2.35 KB
/
calcReward.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
function newReward = calcReward(state, action, real)
%CALCREWARDS calculates the new reward for update in the scoreboad and for
%use in selectAction
%
% Re-writing to return reward based on MDP state and action
% Calculates sum of rewards from relative state of each intruder,
% then adds (or subtracts) reward for accelerating
%
% inputs:
% state: (intruders+1) x 3 matrix
% row 1 = [ position, lane, speed ]
% rows 2:end = [ deltaPositon, deltaLane, deltaSpeed ]
% delta* = agent* - obstacle*
% action: 1 x 2 array
% [ changeLane, changeSpeed ]
%
% Contributors: John, Ramon, Molly
%
%% constants and initializations
intruders = size(state,1) - 1; % first row of state if for agent state
newReward = 0; % intialize
actPeriod = 2; % should we pass this in?
crashReward = -100; % dominating cost for crashing
changeLaneReward = -1; % minor cost to change lane
accelerateReward = 2; % incentive to accelerate
decelerateReward = -1; % -1 per deceleration unit
carLength = 6;
%% loop over each intruder to find crashes
for i = 1:intruders
if state(i+1,2) == -99 && real == 1
break; % no longer dealing with real intruders
end
% if deltaPos_1 * deltaPos_2 < 0,
% and deltaLane + laneChangeAction == 0, agent and intruder crashed
% x = (1/2)a * t^2 + v * t + x
dx1 = state(i+1,1);
dx2 = state(i+1,1) + actPeriod*state(i+1,3) + .5 * action(2) * actPeriod^2;
sameLane = state(i+1,2) + action(1) == 0;
if sameLane & ((dx1*dx2 <= 0) | (abs(dx1) - carLength < 0 | abs(dx2) - carLength < 0))
newReward = newReward + crashReward;
end
end
%% consider change lane action
if action(1) ~= 0 % changed lane
newReward = newReward + changeLaneReward;
end
%% consider accelerate action
if action(2) > 0 % accelerating
newReward = newReward + action(2)*accelerateReward;
elseif action(2) < 0 % decelerating
newReward = newReward + abs(action(2))*decelerateReward;
end
%% old, calculated reward for accelating / decellerating
% switch action(2) % only operate on the acceleration for now
% case 1
% newReward = 1;
% case -1
% newReward = -.5;
% otherwise
% newReward = 0;
end