From 7c2562e28ee1edbabae29483ef98f2d3e8009414 Mon Sep 17 00:00:00 2001 From: Dylan Asmar Date: Wed, 24 Jan 2024 01:20:01 -0800 Subject: [PATCH] Added examples to documentation (followed outline of POMDPExamples.jl) --- docs/Project.toml | 6 + docs/make.jl | 10 + docs/src/example_defining_problems.md | 314 ++++++++++++++++++++++ docs/src/example_gridworld_mdp.md | 2 + docs/src/example_simulations.md | 174 ++++++++++++ docs/src/example_solvers.md | 108 ++++++++ docs/src/examples.md | 12 + docs/src/examples/crying_baby_examples.jl | 230 ++++++++++++++++ docs/src/examples/crying_baby_solvers.jl | 24 ++ docs/src/gallery.md | 17 ++ docs/src/index.md | 6 + 11 files changed, 903 insertions(+) create mode 100644 docs/src/example_defining_problems.md create mode 100644 docs/src/example_gridworld_mdp.md create mode 100644 docs/src/example_simulations.md create mode 100644 docs/src/example_solvers.md create mode 100644 docs/src/examples.md create mode 100644 docs/src/examples/crying_baby_examples.jl create mode 100644 docs/src/examples/crying_baby_solvers.jl create mode 100644 docs/src/gallery.md diff --git a/docs/Project.toml b/docs/Project.toml index f30f6ffe..7121be41 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,12 +1,18 @@ [deps] +BasicPOMCP = "d721219e-3fc6-5570-a8ef-e5402f47c49e" +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d" NamedTupleTools = "d9ec5142-1e00-5aa0-9d6a-321866360f50" +NativeSARSOP = "a07c76ea-660d-4c9a-8028-2e6dbd212cb8" +POMDPLinter = "f3bd98c0-eb40-45e2-9eb1-f2763262d755" POMDPModels = "355abbd5-f08e-5560-ac9e-8b5f2592a0ca" POMDPTools = "7588e00f-9cae-40de-98dc-e0c70c48cdd7" POMDPs = "a93abf59-7444-517b-a68a-c42f96afdd7d" +QMDP = "3aa3ecc9-5a5d-57c8-8188-3e47bd8068d2" QuickPOMDPs = "8af83fb2-a731-493c-9049-9e19dbce6165" +StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] Documenter = "1" diff --git a/docs/make.jl b/docs/make.jl index 6c1e2571..3f5e2a52 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -37,6 +37,15 @@ makedocs( "run_simulation.md", "policy_interaction.md" ], + + "Examples and Gallery" => [ + "examples.md", + "example_defining_problems.md", + "example_solvers.md", + "example_simulations.md", + "example_gridworld_mdp.md", + "gallery.md" + ], "POMDPTools" => [ "POMDPTools/index.md", @@ -59,4 +68,5 @@ makedocs( deploydocs( repo = "github.com/JuliaPOMDP/POMDPs.jl.git", + push_preview=true ) diff --git a/docs/src/example_defining_problems.md b/docs/src/example_defining_problems.md new file mode 100644 index 00000000..23fba8d2 --- /dev/null +++ b/docs/src/example_defining_problems.md @@ -0,0 +1,314 @@ +# Defining a POMDP +As mentioned in the [Defining POMDPs and MDPs](@ref defining_pomdps) section, there are verious ways to define a POMDP using POMDPs.jl. In this section, we provide more examples of how to define a POMDP using the different interfaces. + +There is a large variety of problems that can be expressed as MDPs and POMDPs and different solvers require different components of the POMDPs.jl interface to be defined. Therefore, these examples are not intended to cover all possible use cases. When deeloping a problem and you have an idea of what solver(s) you would like to use, it is recommended to use [POMDPLinter](https://github.com/JuliaPOMDP/POMDPLinter.jl) to help you to determine what components of the POMDPs.jl interface need to be defined. Reference the [Checking Requirements](@ref) section for an example of using POMDPLinter. + +## CryingBaby Problem Definition +For the examples, we will use the CryingBaby problem from [Algorithms for Decision Making](https://algorithmsbook.com/) by Mykel J. Kochenderfer, Tim A. Wheeler, and Kyle H. Wray. + +!!! note + This craying baby problem follows the description in Algorithsm for Decision Making and is different than `BabyPOMDP` defined in [POMDPModels.jl](https://github.com/JuliaPOMDP/POMDPModels.jl). + +From [Appendix F](https://algorithmsbook.com/files/appendix-f.pdf) of Algorithms for Decision Making: +> The crying baby problem is a simple POMDP with two states, three actions, and two observations. Our goal is to care for a baby, and we do so by choosing at each time step whether to feed the baby, sing to the baby, or ignore the baby. +> +> The baby becomes hungry over time. We do not directly observe whether the baby is hungry; instead, we receive a noisy observation in the form of whether the baby is crying. The state, action, and observation spaces are as follows: +> ```math +> \begin{align*} +> \mathcal{S} &= \{\text{sated}, \text{hungry} \}\\ +> \mathcal{A} &= \{\text{feed}, \text{sing}, \text{ignore} \} \\ +> \mathcal{O} &= \{\text{crying}, \text{quiet} \} +> \end{align*} +> ``` +> +> Feeding will always sate the baby. Ignoring the baby risks a sated baby becoming hungry, and ensures that a hungry baby remains hungry. Singing to the baby is an information-gathering action with the same transition dynamics as ignoring, but without the potential for crying when sated (not hungry) and with an increased chance of crying when hungry. +> +> The transition dynamics are as follows: +> ```math +> \begin{align*} +> & T(\text{sated} \mid \text{hungry}, \text{feed}) = 100\% \\ +> & T(\text{hungry} \mid \text{hungry}, \text{sing}) = 100\% \\ +> & T(\text{hungry} \mid \text{hungry}, \text{ignore}) = 100\% \\ +> & T(\text{sated} \mid \text{sated}, \text{feed}) = 100\% \\ +> & T(\text{hungry} \mid \text{sated}, \text{sing}) = 10\% \\ +> & T(\text{hungry} \mid \text{sated}, \text{ignore}) = 10\% +> \end{align*} +> ``` +> +> The observation dynamics are as follows: +> ```math +> \begin{align*} +> & O(\text{crying} \mid \text{feed}, \text{hungry}) = 80\% \\ +> & O(\text{crying} \mid \text{sing}, \text{hungry}) = 90\% \\ +> & O(\text{crying} \mid \text{ignore}, \text{hungry}) = 80\% \\ +> & O(\text{crying} \mid \text{feed}, \text{sated}) = 10\% \\ +> & O(\text{crying} \mid \text{sing}, \text{sated}) = 0\% \\ +> & O(\text{crying} \mid \text{ignore}, \text{sated}) = 10\% +> \end{align*} +> ``` +> +> The reward function assigns ``−10`` reward if the baby is hungry, independent of the action taken. The effort of feeding the baby adds a further ``−5`` reward, whereas singing adds ``−0.5`` reward. As baby caregivers, we seek the optimal infinite-horizon policy with discount factor ``\gamma = 0.9``. + +## [QuickPOMDP Interface](@id quick_crying) +```julia +using POMDPs +using POMDPTools +using QuickPOMDPs + +quick_crying_baby_pomdp = QuickPOMDP( + states = [:sated, :hungry], + actions = [:feed, :sing, :ignore], + observations = [:quiet, :crying], + initialstate = Deterministic(:sated), + discount = 0.9, + transition = function (s, a) + if a == :feed + return Deterministic(:sated) + elseif s == :sated # :sated and a != :feed + return SparseCat([:sated, :hungry], [0.9, 0.1]) + else # s == :hungry and a != :feed + return Deterministic(:hungry) + end + end, + observation = function (a, sp) + if sp == :hungry + if a == :sing + return SparseCat([:crying, :quiet], [0.9, 0.1]) + else # a == :ignore || a == :feed + return SparseCat([:crying, :quiet], [0.8, 0.2]) + end + else # sp = :sated + if a == :sing + return Deterministic(:quiet) + else # a == :ignore || a == :feed + return SparseCat([:crying, :quiet], [0.1, 0.9]) + end + + end + end, + reward = function (s, a) + r = 0.0 + if s == :hungry + r += -10.0 + end + if a == :feed + r += -5.0 + elseif a == :sing + r+= -0.5 + end + return r + end +) +``` + +## [Explicit Interface](@id explicit_crying) +```julia +using POMDPs +using POMDPTools + +struct CryingBabyState + hungry::Bool +end + +struct CryingBabyPOMDP <: POMDP{CryingBabyState, Symbol, Symbol} + p_sated_to_hungry::Float64 + p_cry_feed_hungry::Float64 + p_cry_sing_hungry::Float64 + p_cry_ignore_hungry::Float64 + p_cry_feed_sated::Float64 + p_cry_sing_sated::Float64 + p_cry_ignore_sated::Float64 + reward_hungry::Float64 + reward_feed::Float64 + reward_sing::Float64 + discount_factor::Float64 +end + +function CryingBabyPOMDP(; + p_sated_to_hungry=0.1, + p_cry_feed_hungry=0.8, + p_cry_sing_hungry=0.9, + p_cry_ignore_hungry=0.8, + p_cry_feed_sated=0.1, + p_cry_sing_sated=0.0, + p_cry_ignore_sated=0.1, + reward_hungry=-10.0, + reward_feed=-5.0, + reward_sing=-0.5, + discount_factor=0.9 +) + return CryingBabyPOMDP(p_sated_to_hungry, p_cry_feed_hungry, + p_cry_sing_hungry, p_cry_ignore_hungry, p_cry_feed_sated, + p_cry_sing_sated, p_cry_ignore_sated, reward_hungry, + reward_feed, reward_sing, discount_factor) +end + +POMDPs.actions(::CryingBabyPOMDP) = [:feed, :sing, :ignore] +POMDPs.states(::CryingBabyPOMDP) = [CryingBabyState(false), CryingBabyState(true)] +POMDPs.observations(::CryingBabyPOMDP) = [:crying, :quiet] +POMDPs.stateindex(::CryingBabyPOMDP, s::CryingBabyState) = s.hungry ? 2 : 1 +POMDPs.obsindex(::CryingBabyPOMDP, o::Symbol) = o == :crying ? 1 : 2 +POMDPs.actionindex(::CryingBabyPOMDP, a::Symbol) = a == :feed ? 1 : a == :sing ? 2 : 3 + +function POMDPs.transition(pomdp::CryingBabyPOMDP, s::CryingBabyState, a::Symbol) + if a == :feed + return Deterministic(CryingBabyState(false)) + elseif s == :sated # :sated and a != :feed + return SparseCat([CryingBabyState(false), CryingBabyState(true)], [1 - pomdp.p_sated_to_hungry, pomdp.p_sated_to_hungry]) + else # s == :hungry and a != :feed + return Deterministic(CryingBabyState(true)) + end +end + +function POMDPs.observation(pomdp::CryingBabyPOMDP, a::Symbol, sp::CryingBabyState) + if sp.hungry + if a == :sing + return SparseCat([:crying, :quiet], [pomdp.p_cry_sing_hungry, 1 - pomdp.p_cry_sing_hungry]) + elseif a== :ignore + return SparseCat([:crying, :quiet], [pomdp.p_cry_ignore_hungry, 1 - pomdp.p_cry_ignore_hungry]) + else # a == :feed + return SparseCat([:crying, :quiet], [pomdp.p_cry_feed_hungry, 1 - pomdp.p_cry_feed_hungry]) + end + else # sated + if a == :sing + return SparseCat([:crying, :quiet], [pomdp.p_cry_sing_sated, 1 - pomdp.p_cry_sing_sated]) + elseif a== :ignore + return SparseCat([:crying, :quiet], [pomdp.p_cry_ignore_sated, 1 - pomdp.p_cry_ignore_sated]) + else # a == :feed + return SparseCat([:crying, :quiet], [pomdp.p_cry_feed_sated, 1 - pomdp.p_cry_feed_sated]) + end + end +end + +function POMDPs.reward(pomdp::CryingBabyPOMDP, s::CryingBabyState, a::Symbol) + r = 0.0 + if s.hungry + r += pomdp.reward_hungry + end + if a == :feed + r += pomdp.reward_feed + elseif a == :sing + r += pomdp.reward_sing + end + return r +end + +POMDPs.discount(pomdp::CryingBabyPOMDP) = pomdp.discount_factor + +POMDPs.initialstate(::CryingBabyPOMDP) = Deterministic(CryingBabyState(false)) + +explicit_crying_baby_pomdp = CryingBabyPOMDP() +``` + +## [Generative Interface](@id gen_crying) +This crying baby problem should not be implemented using the generative interface. However, this exmple is provided for pedagogical purposes. + +```julia +using POMDPs +using POMDPTools +using Random + +struct GenCryingBabyState + hungry::Bool +end + +struct GenCryingBabyPOMDP <: POMDP{CryingBabyState, Symbol, Symbol} + p_sated_to_hungry::Float64 + p_cry_feed_hungry::Float64 + p_cry_sing_hungry::Float64 + p_cry_ignore_hungry::Float64 + p_cry_feed_sated::Float64 + p_cry_sing_sated::Float64 + p_cry_ignore_sated::Float64 + reward_hungry::Float64 + reward_feed::Float64 + reward_sing::Float64 + discount_factor::Float64 + + GenCryingBabyPOMDP() = new(0.1, 0.8, 0.9, 0.8, 0.1, 0.0, 0.1, -10.0, -5.0, -0.5, 0.9) +end + +function POMDPs.gen(pomdp::GenCryingBabyPOMDP, s::CryingBabyState, a::Symbol, rng::AbstractRNG) + + if a == :feed + sp = GenCryingBabyState(false) + else + sp = rand(rng) < pomdp.p_sated_to_hungry ? GenCryingBabyState(true) : GenCryingBabyState(false) + end + + if sp.hungry + if a == :sing + o = rand(rng) < pomdp.p_cry_sing_hungry ? :crying : :quiet + elseif a== :ignore + o = rand(rng) < pomdp.p_cry_ignore_hungry ? :crying : :quiet + else # a == :feed + o = rand(rng) < pomdp.p_cry_feed_hungry ? :crying : :quiet + end + else # sated + if a == :sing + o = rand(rng) < pomdp.p_cry_sing_sated ? :crying : :quiet + elseif a== :ignore + o = rand(rng) < pomdp.p_cry_ignore_sated ? :crying : :quiet + else # a == :feed + o = rand(rng) < pomdp.p_cry_feed_sated ? :crying : :quiet + end + end + + r = 0.0 + if sp.hungry + r += pomdp.reward_hungry + end + if a == :feed + r += pomdp.reward_feed + elseif a == :sing + r += pomdp.reward_sing + end + + return (sp=sp, o=o, r=r) +end + +POMDPs.initialstate(::GenCryingBabyPOMDP) = Deterministic(GenCryingBabyState(false)) + +gen_crying_baby_pomdp = GenCryingBabyPOMDP() +``` + +## [Probability Tables](@id tab_crying) +For this implementaion we will use the following indexes: +- States + - `:sated` = 1 + - `:hungry` = 2 +- Actions + - `:feed` = 1 + - `:sing` = 2 + - `:ignore` = 3 +- Observations + - `:crying` = 1 + - `:quiet` = 2 + +```julia +using POMDPModels + +T = zeros(2, 3, 2) # |S| x |A| x |S'|, T[sp, a, s] = p(sp | a, s) +T[:, 1, :] = [1.0 1.0; + 0.0 0.0] +T[:, 2, :] = [0.9 0.0; + 0.1 1.0] +T[:, 3, :] = [0.9 0.0; + 0.1 1.0] + +O = zeros(2, 3, 2) # |O| x |A| x |S'|, O[o, a, sp] = p(o | a, sp) +O[:, 1, :] = [0.1 0.8; + 0.9 0.2] +O[:, 2, :] = [0.0 0.9; + 1.0 0.1] +O[:, 3, :] = [0.1 0.8; + 0.9 0.2] + +R = zeros(2, 3) # |S| x |A| +R = [-5.0 -0.5 0.0; + -15.0 -10.5 0.0] + +discount = 0.9 + +tabular_crying_baby_pomdp = TabularPOMDP(T, R, O, discount) +``` \ No newline at end of file diff --git a/docs/src/example_gridworld_mdp.md b/docs/src/example_gridworld_mdp.md new file mode 100644 index 00000000..34fa3e7b --- /dev/null +++ b/docs/src/example_gridworld_mdp.md @@ -0,0 +1,2 @@ +# GridWorld MDP using Value Iteration and MCTS + diff --git a/docs/src/example_simulations.md b/docs/src/example_simulations.md new file mode 100644 index 00000000..cd6b5e95 --- /dev/null +++ b/docs/src/example_simulations.md @@ -0,0 +1,174 @@ + +# Simulations Examples + +In these simulation examples, we will use the crying baby POMDPs defined in the [Defining a POMDP](@ref) section (i.e. [`quick_crying_baby_pomdp`](@ref quick_crying), [`explicit_crying_baby_pomdp`](@ref explicit_crying), [`gen_crying_baby_pomdp`](@ref gen_crying), and [`tabular_crying_baby_pomdp`](@ref tab_crying)). + +```@setup crying_sim +include("examples/crying_baby_examples.jl") +include("examples/crying_baby_solvers.jl") +``` + +## Stepthrough +The stepthrough simulater provides a window into the simulation with a for-loop syntax. + +Within the body of the for loop, we have access to the belief, the action, the observation, and the reward, in each step. We also calculate the sum of the rewards in this example, but note that this is _not_ the _discounted reward_. + +```@example crying_sim +function run_step_through_simulation() # hide +policy = RandomPolicy(quick_crying_baby_pomdp) +r_sum = 0.0 +step = 0 +for (b, s, a, o, r) in stepthrough(quick_crying_baby_pomdp, policy, DiscreteUpdater(quick_crying_baby_pomdp), "b,s,a,o,r"; max_steps=4) + step += 1 + println("Step $step") + println("b = sated => $(b.b[1]), hungry => $(b.b[2])") + @show s + @show a + @show o + @show r + r_sum += r + @show r_sum + println() +end +end #hide + +run_step_through_simulation() # hide +``` + +## Rollout Simulations +While stepthrough is a flexible and convenient tool for many user-facing demonstrations, it is often less error-prone to use the standard simulate function with a `Simulator` object. The simplest Simulator is the `RolloutSimulator`. It simply runs a simulation and returns the discounted reward. + +```@example crying_sim +function run_rollout_simulation() # hide +policy = RandomPolicy(explicit_crying_baby_pomdp) +sim = RolloutSimulator(max_steps=10) +r_sum = simulate(sim, explicit_crying_baby_pomdp, policy) +println("Total discounted reward: $r_sum") +end # hide +run_rollout_simulation() # hide +``` + +## Recording Histories +Sometimes it is important to record the entire history of a simulation for further examination. This can be accomplished with a `HistoryRecorder`. + +```@example crying_sim +policy = RandomPolicy(tabular_crying_baby_pomdp) +hr = HistoryRecorder(max_steps=5) +history = simulate(hr, tabular_crying_baby_pomdp, policy, DiscreteUpdater(tabular_crying_baby_pomdp), Deterministic(1)) +nothing # hide +``` + +The history object produced by a `HistoryRecorder` is a `SimHistory`, documented in the POMDPTools simulater section [Histories](@ref). The information in this object can be accessed in several ways. For example, there is a function: +```@example crying_sim +discounted_reward(history) +``` +Accessor functions like `state_hist` and `action_hist` can also be used to access parts of the history: +```@example crying_sim +state_hist(history) +``` +``` @example crying_sim +collect(action_hist(history)) +``` + +Keeping track of which states, actions, and observations belong together can be tricky (for example, since there is a starting state, and ending state, but no action is taken from the ending state, the list of actions has a different length than the list of states). It is often better to think of histories in terms of steps that include both starting and ending states. + +The most powerful function for accessing the information in a `SimHistory` is the `eachstep` function which returns an iterator through named tuples representing each step in the history. The `eachstep` function is similar to the `stepthrough` function above except that it iterates through the immutable steps of a previously simulated history instead of conducting the simulation as the for loop is being carried out. + +```@example crying_sim +function demo_eachstep(sim_history) # hide +r_sum = 0.0 +step = 0 +for step_i in eachstep(sim_history, "b,s,a,o,r") + step += 1 + println("Step $step") + println("step_i.b = sated => $(step_i.b.b[1]), hungry => $(step_i.b.b[2])") + @show step_i.s + @show step_i.a + @show step_i.o + @show step_i.r + r_sum += step_i.r + @show r_sum + println() +end +end # hide +demo_eachstep(history) # hide +``` + +## Parallel Simulations +It is often useful to evaluate a policy by running many simulations. The parallel simulator is the most effective tool for this. To use the parallel simulator, first create a list of `Sim` objects, each of which contains all of the information needed to run a simulation. Then then run the simulations using `run_parallel`, which will return a `DataFrame` with the results. + +In this example, we will compare the performance of the polcies we computed in the [Using Different Solvers](@ref) section (i.e. `sarsop_policy`, `pomcp_planner`, and `heuristic_policy`). To evaluate the policies, we will run 100 simulations for each policy. We can do this by adding 100 `Sim` objects of each policy to the list. + +```@example crying_sim +using DataFrames +using StatsBase: std + +# Defining paramters for the simulations +number_of_sim_to_run = 100 +max_steps = 20 +starting_seed = 1 + +# We will also compare against a random policy +rand_policy = RandomPolicy(quick_crying_baby_pomdp, rng=MersenneTwister(1)) + +# Create the list of Sim objects +sim_list = [] + +# Add 100 Sim objects of each policy to the list. +for sim_number in 1:number_of_sim_to_run + seed = starting_seed + sim_number + + # Add the SARSOP policy + push!(sim_list, Sim( + quick_crying_baby_pomdp, + rng=MersenneTwister(seed), + sarsop_policy, + max_steps=max_steps, + metadata=Dict(:policy => "sarsop", :seed => seed)) + ) + + # Add the POMCP policy + push!(sim_list, Sim( + quick_crying_baby_pomdp, + rng=MersenneTwister(seed), + pomcp_planner, + max_steps=max_steps, + metadata=Dict(:policy => "pomcp", :seed => seed)) + ) + + # Add the heuristic policy + push!(sim_list, Sim( + quick_crying_baby_pomdp, + rng=MersenneTwister(seed), + heuristic_policy, + max_steps=max_steps, + metadata=Dict(:policy => "heuristic", :seed => seed)) + ) + + # Add the random policy + push!(sim_list, Sim( + quick_crying_baby_pomdp, + rng=MersenneTwister(seed), + rand_policy, + max_steps=max_steps, + metadata=Dict(:policy => "random", :seed => seed)) + ) +end + +# Run the simulations in parallel +data = run_parallel(sim_list) + +# Define a function to calculate the mean and confidence interval +function mean_and_ci(x) + m = mean(x) + ci = 1.96 * std(x) / sqrt(length(x)) # 95% confidence interval + return (mean = m, ci = ci) +end + +# Calculate the mean and confidence interval for each policy +grouped_df = groupby(data, :policy) +result = combine(grouped_df, :reward => mean_and_ci => AsTable) + +``` + +By default, the parallel simulator only returns the reward from each simulation, but more information can be gathered by specifying a function to analyze the `Sim`-history pair and record additional statistics. Reference the POMDPTools simulator section for more information ([Specifying information to be recorded](@ref)). \ No newline at end of file diff --git a/docs/src/example_solvers.md b/docs/src/example_solvers.md new file mode 100644 index 00000000..99690a99 --- /dev/null +++ b/docs/src/example_solvers.md @@ -0,0 +1,108 @@ +# Using Different Solvers +There are various solvers implemented for use out-of-the-box. Please reference the repository README for a list of [MDP Solvers](https://github.com/JuliaPOMDP/POMDPs.jl?tab=readme-ov-file#mdp-solvers) and [POMDP Solvers](https://github.com/JuliaPOMDP/POMDPs.jl?tab=readme-ov-file#pomdp-solvers) implemented and maintained by the JuliaPOMDP community. We provide a few examples of how to use a small subset of these solvers. + +```@setup crying_sim +include("examples/crying_baby_examples.jl") +``` + +## Checking Requirements +Before using a solver, it is prudent to ensure the problem meets the requirements of the solver. Please reference the solver documentation for detailed information about the requirements of each solver. + +We can use [POMDPLInter](https://github.com/JuliaPOMDP/POMDPLinter.jl) to help us determine if we have all of the required components defined for a particular solver. However, not all solvers have the requirements implemented. If/when you encounter a solver that does not have the requirements implemented, please open an issue on the solver's repository. + +Let's check if we have all of the required components of our problems for the QMDP solver. + +```@example crying_sim +using POMDPLinter +using QMDP + +qmdp_solver = QMDPSolver() + +println("Quick Crying Baby POMDP") +@show_requirements POMDPs.solve(qmdp_solver, quick_crying_baby_pomdp) + +println("\nExplicit Crying Baby POMDP") +@show_requirements POMDPs.solve(qmdp_solver, explicit_crying_baby_pomdp) + +println("\nTabular Crying Baby POMDP") +@show_requirements POMDPs.solve(qmdp_solver, tabular_crying_baby_pomdp) + +println("\nGen Crying Baby POMDP") +# We don't have an actions(::GenGryingBabyPOMDP) implemented +try + @show_requirements POMDPs.solve(qmdp_solver, gen_crying_baby_pomdp) +catch err_msg + println(err_msg) +end +``` + +## Offline (SARSOP) +In this example, we will use the [NativeSARSOP](https://github.com/JuliaPOMDP/NativeSARSOP.jl) solver. We are generating the policy offline, so we will also save the policy to a file so we can use it at a later time without having to recompute it. + +```@example crying_sim +using NativeSARSOP + +# Define the solver with the desired paramters +sarsop_solver = SARSOPSolver(; max_time=10.0) + +# Solve the problem by calling POMDPs.solve. SARSOP will compute the policy and return an `AlphaVectorPolicy` +sarsop_policy = POMDPs.solve(sarsop_solver, quick_crying_baby_pomdp) + +# We can query the policy using the `action` function +b = initialstate(quick_crying_baby_pomdp) +a = action(sarsop_policy, b) + +@show a + +``` + +## Online (POMCP) +For the online solver, we will use Particle Monte Carlo Planning ([POMCP](https://github.com/JuliaPOMDP/BasicPOMCP.jl)). For online solvers, we first define the solver similar to offline solvers. However, when we call `POMDPs.solve`, we are returned an online plannner. Similar to the offline solver, we can query the policy using the `action` function and that is when the online solver will compute the action. + +```@example crying_sim +using BasicPOMCP + +pomcp_solver = POMCPSolver(; c=5.0, tree_queries=1000, rng=MersenneTwister(1)) +pomcp_planner = POMDPs.solve(pomcp_solver, quick_crying_baby_pomdp) + +b = initialstate(quick_crying_baby_pomdp) +a = action(pomcp_planner, b) + +@show a + +``` + +## Heuristic Policy +While we often want to use a solver to compute a policy, sometimes we might want to use a heuristic policy. For example, we may want to use a heuristic policy during our rollouts for online solvers or to use as a baseline. In this example, we will define a simple heuristic policy that feeds the baby if our belief of the baby being hungry is greater than 50%, otherwise we will randomly ignore or sing to the baby. + +```@example crying_sim +struct HeuristicFeedPolicy{P<:POMDP} <: Policy + pomdp::P +end + +# We need to implement the action function for our policy +function POMDPs.action(policy::HeuristicFeedPolicy, b) + if pdf(b, :hungry) > 0.5 + return :feed + else + return rand([:ignore, :sing]) + end +end + +# Let's also define the default updater for our policy +function POMDPs.updater(policy::HeuristicFeedPolicy) + return DiscreteUpdater(policy.pomdp) +end + +heuristic_policy = HeuristicFeedPolicy(quick_crying_baby_pomdp) + +# Let's query the policy a few times +b = SparseCat([:sated, :hungry], [0.1, 0.9]) +a1 = action(heuristic_policy, b) + +b = SparseCat([:sated, :hungry], [0.9, 0.1]) +a2 = action(heuristic_policy, b) + +@show [a1, a2] + +``` \ No newline at end of file diff --git a/docs/src/examples.md b/docs/src/examples.md new file mode 100644 index 00000000..b9cbf1f1 --- /dev/null +++ b/docs/src/examples.md @@ -0,0 +1,12 @@ +# Examples + +This section contains examples of how to use POMDPs.jl. For specific informaiton about the interface and functions used in the examples, please reference the correpsonding area in the documenation or the [API Documentation](@ref). + +The examples are organized by topic. The exmaples are designed to build through each step. First, we have to define a POMDP. Then we need to solve the POMDP to get a policy. Finally, we can simulate the policy to see how it performs. The examples are designed to be exeucted in order. For example, the examples in [Simulations Examples](@ref) assume that the POMDPs defined in the [Defining a POMDP](@ref) section have been defined and we have a policy we would like to simulate that we computed in the [Using Different Solvers](@ref) section. + +The [GridWorld MDP using Value Iteration and MCTS](@ref) section is a standalone example that does not require any of the other examples. + +## Outline +```@contents +Pages = ["example_defining_problems.md", "example_solvers.md", "example_simulations.md", "example_gridworld_mdp.md"] +``` \ No newline at end of file diff --git a/docs/src/examples/crying_baby_examples.jl b/docs/src/examples/crying_baby_examples.jl new file mode 100644 index 00000000..212b6f26 --- /dev/null +++ b/docs/src/examples/crying_baby_examples.jl @@ -0,0 +1,230 @@ +using POMDPs +using POMDPTools +using POMDPModels +using QuickPOMDPs +using Random + +quick_crying_baby_pomdp = QuickPOMDP( + states = [:sated, :hungry], + actions = [:feed, :sing, :ignore], + observations = [:quiet, :crying], + initialstate = Deterministic(:sated), + discount = 0.9, + transition = function (s, a) + if a == :feed + return Deterministic(:sated) + elseif s == :sated # :sated and a != :feed + return SparseCat([:sated, :hungry], [0.9, 0.1]) + else # s == :hungry and a != :feed + return Deterministic(:hungry) + end + end, + observation = function (a, sp) + if sp == :hungry + if a == :sing + return SparseCat([:crying, :quiet], [0.9, 0.1]) + else # a == :ignore || a == :feed + return SparseCat([:crying, :quiet], [0.8, 0.2]) + end + else # sp = :sated + if a == :sing + return Deterministic(:quiet) + else # a == :ignore || a == :feed + return SparseCat([:crying, :quiet], [0.1, 0.9]) + end + + end + end, + reward = function (s, a) + r = 0.0 + if s == :hungry + r += -10.0 + end + if a == :feed + r += -5.0 + elseif a == :sing + r+= -0.5 + end + return r + end +) + +struct CryingBabyState + hungry::Bool +end + +struct CryingBabyPOMDP <: POMDP{CryingBabyState, Symbol, Symbol} + p_sated_to_hungry::Float64 + p_cry_feed_hungry::Float64 + p_cry_sing_hungry::Float64 + p_cry_ignore_hungry::Float64 + p_cry_feed_sated::Float64 + p_cry_sing_sated::Float64 + p_cry_ignore_sated::Float64 + reward_hungry::Float64 + reward_feed::Float64 + reward_sing::Float64 + discount_factor::Float64 +end + +function CryingBabyPOMDP(; + p_sated_to_hungry=0.1, + p_cry_feed_hungry=0.8, + p_cry_sing_hungry=0.9, + p_cry_ignore_hungry=0.8, + p_cry_feed_sated=0.1, + p_cry_sing_sated=0.0, + p_cry_ignore_sated=0.1, + reward_hungry=-10.0, + reward_feed=-5.0, + reward_sing=-0.5, + discount_factor=0.9 +) + return CryingBabyPOMDP(p_sated_to_hungry, p_cry_feed_hungry, + p_cry_sing_hungry, p_cry_ignore_hungry, p_cry_feed_sated, + p_cry_sing_sated, p_cry_ignore_sated, reward_hungry, + reward_feed, reward_sing, discount_factor) +end + +POMDPs.actions(::CryingBabyPOMDP) = [:feed, :sing, :ignore] +POMDPs.states(::CryingBabyPOMDP) = [CryingBabyState(false), CryingBabyState(true)] +POMDPs.observations(::CryingBabyPOMDP) = [:crying, :quiet] +POMDPs.stateindex(::CryingBabyPOMDP, s::CryingBabyState) = s.hungry ? 2 : 1 +POMDPs.obsindex(::CryingBabyPOMDP, o::Symbol) = o == :crying ? 1 : 2 +POMDPs.actionindex(::CryingBabyPOMDP, a::Symbol) = a == :feed ? 1 : a == :sing ? 2 : 3 + +function POMDPs.transition(pomdp::CryingBabyPOMDP, s::CryingBabyState, a::Symbol) + if a == :feed + return Deterministic(CryingBabyState(false)) + elseif s == :sated # :sated and a != :feed + return SparseCat([CryingBabyState(false), CryingBabyState(true)], [1 - pomdp.p_sated_to_hungry, pomdp.p_sated_to_hungry]) + else # s == :hungry and a != :feed + return Deterministic(CryingBabyState(true)) + end +end + +function POMDPs.observation(pomdp::CryingBabyPOMDP, a::Symbol, sp::CryingBabyState) + if sp.hungry + if a == :sing + return SparseCat([:crying, :quiet], [pomdp.p_cry_sing_hungry, 1 - pomdp.p_cry_sing_hungry]) + elseif a== :ignore + return SparseCat([:crying, :quiet], [pomdp.p_cry_ignore_hungry, 1 - pomdp.p_cry_ignore_hungry]) + else # a == :feed + return SparseCat([:crying, :quiet], [pomdp.p_cry_feed_hungry, 1 - pomdp.p_cry_feed_hungry]) + end + else # sated + if a == :sing + return SparseCat([:crying, :quiet], [pomdp.p_cry_sing_sated, 1 - pomdp.p_cry_sing_sated]) + elseif a== :ignore + return SparseCat([:crying, :quiet], [pomdp.p_cry_ignore_sated, 1 - pomdp.p_cry_ignore_sated]) + else # a == :feed + return SparseCat([:crying, :quiet], [pomdp.p_cry_feed_sated, 1 - pomdp.p_cry_feed_sated]) + end + end +end + +function POMDPs.reward(pomdp::CryingBabyPOMDP, s::CryingBabyState, a::Symbol) + r = 0.0 + if s.hungry + r += pomdp.reward_hungry + end + if a == :feed + r += pomdp.reward_feed + elseif a == :sing + r += pomdp.reward_sing + end + return r +end + +POMDPs.discount(pomdp::CryingBabyPOMDP) = pomdp.discount_factor + +POMDPs.initialstate(::CryingBabyPOMDP) = Deterministic(CryingBabyState(false)) + +explicit_crying_baby_pomdp = CryingBabyPOMDP() + +struct GenCryingBabyState + hungry::Bool +end + +struct GenCryingBabyPOMDP <: POMDP{CryingBabyState, Symbol, Symbol} + p_sated_to_hungry::Float64 + p_cry_feed_hungry::Float64 + p_cry_sing_hungry::Float64 + p_cry_ignore_hungry::Float64 + p_cry_feed_sated::Float64 + p_cry_sing_sated::Float64 + p_cry_ignore_sated::Float64 + reward_hungry::Float64 + reward_feed::Float64 + reward_sing::Float64 + discount_factor::Float64 + + GenCryingBabyPOMDP() = new(0.1, 0.8, 0.9, 0.8, 0.1, 0.0, 0.1, -10.0, -5.0, -0.5, 0.9) +end + +function POMDPs.gen(pomdp::GenCryingBabyPOMDP, s::CryingBabyState, a::Symbol, rng::AbstractRNG) + + if a == :feed + sp = GenCryingBabyState(false) + else + sp = rand(rng) < pomdp.p_sated_to_hungry ? GenCryingBabyState(true) : GenCryingBabyState(false) + end + + if sp.hungry + if a == :sing + o = rand(rng) < pomdp.p_cry_sing_hungry ? :crying : :quiet + elseif a== :ignore + o = rand(rng) < pomdp.p_cry_ignore_hungry ? :crying : :quiet + else # a == :feed + o = rand(rng) < pomdp.p_cry_feed_hungry ? :crying : :quiet + end + else # sated + if a == :sing + o = rand(rng) < pomdp.p_cry_sing_sated ? :crying : :quiet + elseif a== :ignore + o = rand(rng) < pomdp.p_cry_ignore_sated ? :crying : :quiet + else # a == :feed + o = rand(rng) < pomdp.p_cry_feed_sated ? :crying : :quiet + end + end + + r = 0.0 + if sp.hungry + r += pomdp.reward_hungry + end + if a == :feed + r += pomdp.reward_feed + elseif a == :sing + r += pomdp.reward_sing + end + + return (sp=sp, o=o, r=r) +end + +POMDPs.initialstate(::GenCryingBabyPOMDP) = Deterministic(GenCryingBabyState(false)) + +gen_crying_baby_pomdp = GenCryingBabyPOMDP() + +T = zeros(2, 3, 2) # |S| x |A| x |S'|, T[sp, a, s] = p(sp | a, s) +T[:, 1, :] = [1.0 1.0; + 0.0 0.0] +T[:, 2, :] = [0.9 0.0; + 0.1 1.0] +T[:, 3, :] = [0.9 0.0; + 0.1 1.0] + +O = zeros(2, 3, 2) # |O| x |A| x |S'|, O[o, a, sp] = p(o | a, sp) +O[:, 1, :] = [0.1 0.8; + 0.9 0.2] +O[:, 2, :] = [0.0 0.9; + 1.0 0.1] +O[:, 3, :] = [0.1 0.8; + 0.9 0.2] + +R = zeros(2, 3) # |S| x |A| +R = [-5.0 -0.5 0.0; + -15.0 -10.5 0.0] + +discount = 0.9 + +tabular_crying_baby_pomdp = TabularPOMDP(T, R, O, discount) diff --git a/docs/src/examples/crying_baby_solvers.jl b/docs/src/examples/crying_baby_solvers.jl new file mode 100644 index 00000000..5c1115c3 --- /dev/null +++ b/docs/src/examples/crying_baby_solvers.jl @@ -0,0 +1,24 @@ +using BasicPOMCP +using NativeSARSOP + +sarsop_solver = SARSOPSolver(; max_time=10.0) +sarsop_policy = POMDPs.solve(sarsop_solver, quick_crying_baby_pomdp) + +pomcp_solver = POMCPSolver(; c=5.0, tree_queries=1000, rng=MersenneTwister(1)) +pomcp_planner = POMDPs.solve(pomcp_solver, quick_crying_baby_pomdp) + +struct HeuristicFeedPolicy{P<:POMDP} <: Policy + pomdp::P +end +function POMDPs.updater(policy::HeuristicFeedPolicy) + return DiscreteUpdater(policy.pomdp) +end +function POMDPs.action(policy::HeuristicFeedPolicy, b) + if pdf(b, :hungry) > 0.5 + return :feed + else + return rand([:ignore, :sing]) + end +end + +heuristic_policy = HeuristicFeedPolicy(quick_crying_baby_pomdp) diff --git a/docs/src/gallery.md b/docs/src/gallery.md new file mode 100644 index 00000000..30089568 --- /dev/null +++ b/docs/src/gallery.md @@ -0,0 +1,17 @@ +# Gallery of POMDPs.jl Problems + +## ContinuumWorld + +## EscapeRoomba + +## DroneSureillance + +## TagPOMDPProblem + +## LaserTag + +## QuickMountainCar + +## RockSample + +## VDPTag \ No newline at end of file diff --git a/docs/src/index.md b/docs/src/index.md index e9b82e32..0767474b 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -49,6 +49,12 @@ Pages = [ "def_solver.md", "offline_solver.md", "online_solver.md", "def_updater Pages = [ "simulation.md", "run_simulation.md", "policy_interaction.md" ] ``` +### Examples and Gallery + +```@contents +Pages = [ "examples.md", "example_defining_problems.md", "example_solvers.md", "example_simulations.md", "example_gridworld_mdp.md", "gallery.md"] +``` + ### POMDPTools - the standard library for POMDPs.jl ```@contents