Skip to content

Commit

Permalink
-updates for version 0.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
samuelnehrer02 committed Oct 28, 2024
1 parent f88e9f7 commit d818e3a
Show file tree
Hide file tree
Showing 7 changed files with 140 additions and 44 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/CI_small.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@ on:
push:
branches:
- dev
- fitting
tags: ['*']
pull_request:
branches:
- dev
- fitting
workflow_dispatch:
concurrency:
# Skip intermediate builds: always.
Expand Down
3 changes: 2 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "ActiveInference"
uuid = "688b0e7a-0122-4325-8669-5ff08899a59e"
authors = ["Jonathan Ehrenreich Laursen", "Samuel William Nehrer"]
version = "0.0.4"
version = "0.1.0"

[deps]
ActionModels = "320cf53b-cc3b-4b34-9a10-0ecb113566a3"
Expand All @@ -19,4 +19,5 @@ IterTools = "1.10"
LinearAlgebra = "1"
LogExpFunctions = "0.3"
Random = "1"
ReverseDiff = "1.15"
julia = "1.10"
2 changes: 1 addition & 1 deletion src/ActionModelsExtensions/give_inputs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ function ActionModels.single_input!(aif::AIF, obs::Vector)
# if there is only one factor
if num_factors == 1
# Sample action from the action distribution
action = rand(action_distributions[1])
action = rand(action_distributions)

# If the agent has not taken any actions yet
if isempty(aif.action)
Expand Down
109 changes: 87 additions & 22 deletions src/pomdp/POMDP.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
"""
This module contains models of Partially Observable Markov Decision Processes under Active Inference
action_pomdp!(agent, obs)
This function wraps the POMDP action-perception loop used for simulating and fitting the data.
Arguments:
- `agent::Agent`: An instance of ActionModels `Agent` type, which contains AIF type object as a substruct.
- `obs::Vector{Int64}`: A vector of observations, where each observation is an integer.
- `obs::Tuple{Vararg{Int}}`: A tuple of observations, where each observation is an integer.
- `obs::Int64`: A single observation, which is an integer.
- `aif::AIF`: An instance of the `AIF` type, which contains the agent's state, parameters, and substructures.
Outputs:
- Returns a `Distributions.Categorical` distribution or a vector of distributions, representing the probability distributions for actions per each state factor.
"""

### Action Model: Returns probability distributions for actions per factor
Expand All @@ -21,9 +31,9 @@ function action_pomdp!(agent::Agent, obs::Vector{Int64})
#Extract it
previous_action = agent.states["action"]

#If it is not a vector, make it one
# If it is not a vector, make it one
if !(previous_action isa Vector)
previous_action = [previous_action]
previous_action = collect(previous_action)
end

#Store the action in the AIF substruct
Expand All @@ -35,23 +45,34 @@ function action_pomdp!(agent::Agent, obs::Vector{Int64})
# Run state inference
infer_states!(agent.substruct, obs)

update_A!(agent.substruct, obs)

# If action is empty, update D vectors
if ismissing(agent.states["action"]) && agent.substruct.pD !== nothing
qs_t1 = get_history(agent.substruct)["posterior_states"][1]
update_D!(aif, qs_t1)
end

#=
if !ismissing(agent.states["action"])
# If learning of the B matrix is enabled and agent has a previous action
if !ismissing(agent.states["action"]) && agent.substruct.pB !== nothing

#Get the posterior over states from the previous time step
# Get the posterior over states from the previous time step
states_posterior = get_history(agent.substruct)["posterior_states"][end-1]

# Update Transition Matrix
update_B!(agent.substruct, states_posterior)
end
=#

# If learning of the A matrix is enabled
if agent.substruct.pA !== nothing
update_A!(agent.substruct, obs)
end

# Run policy inference
infer_policies!(agent.substruct)


### Retrieve log marginal probabilities of actions
log_action_marginals = get_log_action_marginals(agent.substruct)

### Pass action marginals through softmax function to get action probabilities
for factor in 1:n_factors
action_p[factor] = softmax(log_action_marginals[factor] * alpha, dims=1)
Expand All @@ -72,14 +93,14 @@ function action_pomdp!(agent::Agent, obs::Tuple{Vararg{Int}})

### Get parameters
alpha = agent.substruct.parameters["alpha"]
n_factors = length(aif.settings["num_controls"])
n_factors = length(agent.substruct.settings["num_controls"])

# Initialize empty arrays for action distribution per factor
action_p = Vector{Any}(undef, n_factors)
action_distribution = Vector(undef, n_factors)
action_distribution = Vector{Distributions.Categorical}(undef, n_factors)

#If there was a previous action
if !ismissing(agent.states["action"])
#If there was a previous action
if !ismissing(agent.states["action"])

#Extract it
previous_action = agent.states["action"]
Expand All @@ -96,20 +117,42 @@ function action_pomdp!(agent::Agent, obs::Tuple{Vararg{Int}})
### Infer states & policies

# Run state inference
infer_states!(aif, obs)
infer_states!(agent.substruct, obs)

# If action is empty and pD is not nothing, update D vectors
if ismissing(agent.states["action"]) && agent.substruct.pD !== nothing
qs_t1 = get_history(agent.substruct)["posterior_states"][1]
update_D!(aif, qs_t1)
end

# If learning of the B matrix is enabled and agent has a previous action
if !ismissing(agent.states["action"]) && agent.substruct.pB !== nothing

# Get the posterior over states from the previous time step
states_posterior = get_history(agent.substruct)["posterior_states"][end-1]

# Update Transition Matrix
update_B!(agent.substruct, states_posterior)
end

# If learning of the A matrix is enabled
if agent.substruct.pA !== nothing
update_A!(agent.substruct, obs)
end

# Run policy inference
infer_policies!(aif)
infer_policies!(agent.substruct)

### Retrieve log marginal probabilities of actions
log_action_marginals = get_log_action_marginals(aif)

### Retrieve log marginal probabilities of actions
log_action_marginals = get_log_action_marginals(agent.substruct)

### Pass action marginals through softmax function to get action probabilities
for factor in 1:n_factors
action_p[factor] = softmax(log_action_marginals[factor] * alpha, dims=1)
action_distribution[factor] = Distributions.Categorical(action_p[factor])
end

return n_factors == 1 ? action_distribution[1] : action_distribution
end

Expand All @@ -119,18 +162,40 @@ function action_pomdp!(aif::AIF, obs::Vector{Int64})
alpha = aif.parameters["alpha"]
n_factors = length(aif.settings["num_controls"])

# Initialize an empty arrays for action distribution per factor
# Initialize empty arrays for action distribution per factor
action_p = Vector{Any}(undef, n_factors)
action_distribution = Vector(undef, n_factors)
action_distribution = Vector{Distributions.Categorical}(undef, n_factors)

### Infer states & policies

# Run state inference
infer_states!(aif, obs)

# If action is empty, update D vectors
if ismissing(get_states(aif)["action"]) && aif.pD !== nothing
qs_t1 = get_history(aif)["posterior_states"][1]
update_D!(aif, qs_t1)
end

# If learning of the B matrix is enabled and agent has a previous action
if !ismissing(get_states(aif)["action"]) && aif.pB !== nothing

# Get the posterior over states from the previous time step
states_posterior = get_history(aif)["posterior_states"][end-1]

# Update Transition Matrix
update_B!(aif, states_posterior)
end

# If learning of the A matrix is enabled
if aif.pA !== nothing
update_A!(aif, obs)
end

# Run policy inference
infer_policies!(aif)


### Retrieve log marginal probabilities of actions
log_action_marginals = get_log_action_marginals(aif)

Expand All @@ -140,7 +205,7 @@ function action_pomdp!(aif::AIF, obs::Vector{Int64})
action_distribution[factor] = Distributions.Categorical(action_p[factor])
end

return action_distribution
return n_factors == 1 ? action_distribution[1] : action_distribution
end

function action_pomdp!(agent::Agent, obs::Int64)
Expand Down
33 changes: 25 additions & 8 deletions src/pomdp/inference.jl
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ function fixed_point_iteration(
prior::Union{Nothing, Vector{Vector{T}}} where T <: Real = nothing,
num_iter::Int=num_iter, dF::Float64=1.0, dF_tol::Float64=dF_tol
)
# Get model dimensions (NOTE Sam: We need to save model dimensions in the AIF struct in the future)
n_modalities = length(num_obs)
n_factors = length(num_states)

Expand All @@ -151,6 +152,7 @@ function fixed_point_iteration(
qs[factor] = ones(num_states[factor]) / num_states[factor]
end

# If no prior is provided, create a default prior with uniform distribution
if prior === nothing
prior = create_matrix_templates(num_states)
end
Expand All @@ -166,26 +168,38 @@ function fixed_point_iteration(
if n_factors == 1
qL = dot_product(likelihood, qs[1])
return [softmax(qL .+ prior[1], dims=1)]

# If there are more factors
else
# Run for a given number of iterations
for iteration in 1:num_iter
### Fixed-Point Iteration ###
curr_iter = 0
### Sam NOTE: We need check if ReverseDiff might potantially have issues with this while loop ###
while curr_iter < num_iter && dF >= dF_tol
qs_all = qs[1]
# Loop over each factor starting from the second one
for factor in 2:n_factors
# Reshape and multiply qs_all with the current factor's qs
qs_all = qs_all .* reshape(qs[factor], tuple(ones(Real, factor - 1)..., :, 1))
end

# Compute the log-likelihood
LL_tensor = likelihood .* qs_all

# Update each factor's qs
for factor in 1:n_factors
qL = zeros(Real,size(qs[factor]))
# Initialize qL for the current factor
qL = zeros(Real, size(qs[factor]))

# Compute qL for each state in the current factor
for i in 1:size(qs[factor], 1)
qL[i] = sum([LL_tensor[indices...] / qs[factor][i] for indices in Iterators.product([1:size(LL_tensor, dim) for dim in 1:n_factors]...) if indices[factor] == i])
end

# If qs is tracked by ReverseDiff, get the value
if ReverseDiff.istracked(softmax(qL .+ prior[factor], dims=1))
qs[factor] = ReverseDiff.value(softmax(qL .+ prior[factor], dims=1))

# Otherwise, proceed as normal
else
# Otherwise, proceed as normal
qs[factor] = softmax(qL .+ prior[factor], dims=1)
end
end
Expand All @@ -196,6 +210,9 @@ function fixed_point_iteration(
# Update stopping condition
dF = abs(prev_vfe - vfe)
prev_vfe = vfe

# Increment iteration
curr_iter += 1
end

return qs
Expand Down Expand Up @@ -477,14 +494,14 @@ function compute_accuracy_new(log_likelihood, qs::Vector{Vector{Real}})
return results
end

""" Calculate SAPE """
function calc_SAPE(aif::AIF)
""" Calculate State-Action Prediction Error """
function calculate_SAPE(aif::AIF)

qs_pi_all = get_expected_states(aif.qs_current, aif.B, aif.policies)
qs_bma = bayesian_model_average(qs_pi_all, aif.Q_pi)

if length(aif.states["bayesian_model_averages"]) != 0
sape = kl_div(qs_bma, aif.states["bayesian_model_averages"][end])
sape = kl_divergence(qs_bma, aif.states["bayesian_model_averages"][end])
push!(aif.states["SAPE"], sape)
end

Expand Down
6 changes: 3 additions & 3 deletions src/pomdp/struct.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ mutable struct AIF
qs_current::Vector{Vector{T}} where T <: Real # Current beliefs about states
prior::Vector{Vector{T}} where T <: Real # Prior beliefs about states
Q_pi::Vector{T} where T <:Real # Posterior beliefs over policies
G::Vector{T} where T <:Real # Expected free energy of policy
G::Vector{T} where T <:Real # Expected free energy of policies
action::Vector{Int} # Last action
use_utility::Bool # Utility Boolean Flag
use_states_info_gain::Bool # States Information Gain Boolean Flag
Expand Down Expand Up @@ -56,8 +56,8 @@ function create_aif(A, B;
fr_pD = 1.0,
modalities_to_learn = "all",
factors_to_learn = "all",
gamma=16.0,
alpha=16.0,
gamma=1.0,
alpha=1.0,
policy_len=1,
num_controls=nothing,
control_fac_idx=nothing,
Expand Down
29 changes: 20 additions & 9 deletions src/utils/maths.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ function capped_log(x::Real)
end

"""
capped_log(x::AbstractArray{T}) where T <: Real
# Arguments
- `array::AbstractArray{T}`: An array of real numbers.
capped_log(array::Array{Float64})
capped_log(array::Array{T}) where T <: Real
capped_log(array::Vector{Real})
Return the natural logarithm of x, capped at the machine epsilon value of x.
"""

function capped_log(array::Array{Float64})
Expand All @@ -49,7 +49,6 @@ function capped_log(array::Vector{Real})

array = log.(max.(array, epsilon))
# Return the log of the array values capped at epsilon
#@show typeof(array)
return array
end

Expand Down Expand Up @@ -256,15 +255,27 @@ function bayesian_model_average(qs_pi_all, q_pi)
return qs_bma
end

function kl_div(P::Vector{Vector{Vector{Real}}}, Q::Vector{Vector{Vector{Real}}})
eps_val=1e-16
dkl = 0.0
"""
kl_divergence(P::Vector{Vector{Vector{Float64}}}, Q::Vector{Vector{Vector{Float64}}})
# Arguments
- `P::Vector{Vector{Vector{Real}}}`
- `Q::Vector{Vector{Vector{Real}}}`
Return the Kullback-Leibler (KL) divergence between two probability distributions.
"""
function kl_divergence(P::Vector{Vector{Vector{Real}}}, Q::Vector{Vector{Vector{Real}}})
eps_val = 1e-16 # eps constant to avoid log(0)
dkl = 0.0 # Initialize KL divergence to zero

for j in 1:length(P)
for i in 1:length(P[j])
# Compute the dot product of P[j][i] and the difference of logs of P[j][i] and Q[j][i]
dkl += dot(P[j][i], log.(P[j][i] .+ eps_val) .- log.(Q[j][i] .+ eps_val))
end
end
return dkl

return dkl # Return KL divergence
end


Expand Down

0 comments on commit d818e3a

Please sign in to comment.