-
Notifications
You must be signed in to change notification settings - Fork 0
/
imarkov-grid-maxlikelihood-POMDPs.jl
78 lines (60 loc) · 2.41 KB
/
imarkov-grid-maxlikelihood-POMDPs.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#################################
#MARKOV DECISION PROCESS: GRID
#NOTE: EXPERIMENTAL use of POMPDs.jl to better understand this problem/solution
#################################
# APPROACH: define the world by extending MDP{}, and solve by implementing POMDPs.* methods
include("imarkov-grid-maxlikelihood.jl");
########
# WORLD TUPLE: (STATE, ACTION, TRANSITION, REWARD)
# NOTE: can be run in two modes
# a) Probabilistic mode: given Transition and Rewards probability functions. See Decisions Under Uncertainty 4.2.5
# b) MAX LIKELYHOOD modes: calculated Transition and Reward probability functions. See Decisions Under Uncertainty 5.2
########
using POMDPs
importall POMDPs
########
# GRID: MAX LIKELIHOOD
########
### STATE: DONE
# implement POMDPs.isterminal
function POMDPs.isterminal(mdp::MaxlikelihoodGrid, state::MaxlikelihoodState)
return isMaxlikelihoodTerminal(mdp, state)
end
### STATE: define
# implement POMDPs.states
function POMDPs.states(mdp::MaxlikelihoodGrid)
return getMaxlikelihoodStateSpace(mdp)
end
### STATE: index
# implement POMDPs.state_index
function POMDPs.state_index(mdp::MaxlikelihoodGrid, state::MaxlikelihoodState)
return getMaxlikelihoodStateIndex(mdp,state)
end
### STATE: count
# implement POMDPs.n_states
POMDPs.n_states(mdp::MaxlikelihoodGrid) = getMaxlikelihoodNumberOfStates(mdp)
### DISCOUNT (Maxlikelihood):
# implement POMDPs.discount
POMDPs.discount(mdp::MaxlikelihoodGrid) = getMaxlikelihoodDiscountFactor(mdp);
### ACTION: index
# POMDPs.action_index
function POMDPs.action_index(mdp::MaxlikelihoodGrid, act::Symbol)
actionIndex = getMaxlikelihoodActionsIndexDict()
return actionIndex[act]
end
### ACTIONS: itemize
# implement POMDPs.actions
POMDPs.actions(mdp::MaxlikelihoodGrid) = gridMaxlikelihoodActions;
### total actions
# implement POMDPs.n_actions
POMDPs.n_actions(mdp::MaxlikelihoodGrid) = length(gridMaxlikelihoodActions())
### TRANSITION: configure the transition model
# implement POMDPs.transition
function POMDPs.transition(mdp::MaxlikelihoodGrid, state::MaxlikelihoodState, action::Symbol)
return MaxlikelihoodTransitionModel(mdp, state, action)
end
### REWARD: utility of a (state,action,statePrime) datapoint in a world
# implement POMDPs.reward
function POMDPs.reward(mdp::MaxlikelihoodGrid, state::MaxlikelihoodState, action::Symbol, statePrime::MaxlikelihoodState)
return getMaxlikelihoodReward(mdp.reward_states, mdp.reward_values, state)
end