-
Notifications
You must be signed in to change notification settings - Fork 0
/
imarkov-mystery.jl
77 lines (65 loc) · 2.61 KB
/
imarkov-mystery.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#################################
#MARKOV DECISION PROCESS: MYSTERY!
#################################
using POMDPs
# importall POMDPs
using TabularTDLearning
using POMDPModels
using RDatasets, DataFrames
### STATE: class. Tuple (position, velocity)
# State measurements are given by integers with 500 possible position values and 100 possible velocity values (50,000 possible state measurements).
# 1+pos+500*vel gives the integer corresponding to a state with position pos and velocity vel.
# END: when the goal (the flag) is reached at the top of the hill
struct MysteryState
index::Int64 # 1+pos+500*vel
#position::Int64 # position
#velocity::Int64 # velocity
done::Bool # are we in a terminal state?
end
### ACTION: class
# There are 7 actions that represent different amounts of acceleration
# getMysteryActions = [1,2,3,4,5,6,7]
getMysteryActions = 1:125
getNumCarActions = 7
# n_actions(mdp::MysteryWorld) = 7
### ACTION: index
# 125 actions
#function getMysteryActionIndex(mdp::MysteryWorld, action::Int64)
# return action
#end
### WORLD (MYSTERY): class
type MysteryWorld <: MDP{MysteryState, Symbol} # MDP{StateType, ActionType} is parametrized by the State and the Action
jackpot::Float64 # reached the flag the top of the hill
data::DataFrames.DataFrame # the dataset to solve for
stateSpace::Vector{MysteryState} # the states
discount::Float64
end
### WORLD (MYSTERY): factory
# class constructor with default values
function MysteryWorld(;jackpot::Float64=0.0, # have not hit jackpot by default
data::DataFrames.DataFrame=dataframe, # from file
stateSpace::Vector{MysteryState}=stateSpace, # MysteryState[]
discount::Float64=1.0) # no discount
return MysteryWorld(jackpot, data, stateSpace, discount)
end
### STATE: is terminal?
function isMysteryTerminal(mdp::MysteryWorld, state::MysteryState) # vs s::Int64
return state.done
end
### STATE: get states
function getMysteryStates(mdp::MysteryWorld)
return mdp.stateSpace
end
### STATE: get states
# TODO: get from length of stateSpace
#function getNumMysteryStates(mdp::MysteryWorld)
# dataset = mdp.data
# columnNames = names(dataset)
# stateKey = columnNames[1]
# numRows = length(dataset[stateKey])
# return numRows
#end
### STATE: get state index
function getMysteryStateIndex(mdp::MysteryWorld, state::MysteryState)
return state.index
end