-updates for version 0.1.0

ilabcode · Oct 28, 2024 · d818e3a · d818e3a
1 parent f88e9f7
commit d818e3a
Show file tree

Hide file tree

Showing 7 changed files with 140 additions and 44 deletions.
diff --git a/.github/workflows/CI_small.yml b/.github/workflows/CI_small.yml
@@ -3,10 +3,12 @@ on:
   push:
     branches:
       - dev
+      - fitting
     tags: ['*']
   pull_request:
     branches:
       - dev
+      - fitting
   workflow_dispatch:
 concurrency:
   # Skip intermediate builds: always.

diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "ActiveInference"
 uuid = "688b0e7a-0122-4325-8669-5ff08899a59e"
 authors = ["Jonathan Ehrenreich Laursen", "Samuel William Nehrer"]
-version = "0.0.4"
+version = "0.1.0"
 
 [deps]
 ActionModels = "320cf53b-cc3b-4b34-9a10-0ecb113566a3"
@@ -19,4 +19,5 @@ IterTools = "1.10"
 LinearAlgebra = "1"
 LogExpFunctions = "0.3"
 Random = "1"
+ReverseDiff = "1.15"
 julia = "1.10"
diff --git a/src/ActionModelsExtensions/give_inputs.jl b/src/ActionModelsExtensions/give_inputs.jl
@@ -22,7 +22,7 @@ function ActionModels.single_input!(aif::AIF, obs::Vector)
     # if there is only one factor
     if num_factors == 1
         # Sample action from the action distribution
-        action = rand(action_distributions[1])
+        action = rand(action_distributions)
 
         # If the agent has not taken any actions yet
         if isempty(aif.action)

diff --git a/src/pomdp/POMDP.jl b/src/pomdp/POMDP.jl
@@ -1,6 +1,16 @@
 """
-This module contains models of Partially Observable Markov Decision Processes under Active Inference
-
+    action_pomdp!(agent, obs)
+This function wraps the POMDP action-perception loop used for simulating and fitting the data.
+
+Arguments:
+- `agent::Agent`: An instance of ActionModels `Agent` type, which contains AIF type object as a substruct.
+- `obs::Vector{Int64}`: A vector of observations, where each observation is an integer.
+- `obs::Tuple{Vararg{Int}}`: A tuple of observations, where each observation is an integer.
+- `obs::Int64`: A single observation, which is an integer.
+- `aif::AIF`: An instance of the `AIF` type, which contains the agent's state, parameters, and substructures.
+
+Outputs:
+- Returns a `Distributions.Categorical` distribution or a vector of distributions, representing the probability distributions for actions per each state factor.
 """
 
 ### Action Model:  Returns probability distributions for actions per factor
@@ -21,9 +31,9 @@ function action_pomdp!(agent::Agent, obs::Vector{Int64})
         #Extract it
         previous_action = agent.states["action"]
 
-        #If it is not a vector, make it one
+        # If it is not a vector, make it one
         if !(previous_action isa Vector)
-            previous_action = [previous_action]
+            previous_action = collect(previous_action)
         end
 
         #Store the action in the AIF substruct
@@ -35,23 +45,34 @@ function action_pomdp!(agent::Agent, obs::Vector{Int64})
     # Run state inference 
     infer_states!(agent.substruct, obs)
 
-    update_A!(agent.substruct, obs)
-
+    # If action is empty, update D vectors
+    if ismissing(agent.states["action"]) && agent.substruct.pD !== nothing
+        qs_t1 = get_history(agent.substruct)["posterior_states"][1]
+        update_D!(aif, qs_t1)
+    end
 
-    #=
-    if !ismissing(agent.states["action"])
+    # If learning of the B matrix is enabled and agent has a previous action
+    if !ismissing(agent.states["action"]) && agent.substruct.pB !== nothing
 
-        #Get the posterior over states from the previous time step
+        # Get the posterior over states from the previous time step
         states_posterior = get_history(agent.substruct)["posterior_states"][end-1]
 
         # Update Transition Matrix
         update_B!(agent.substruct, states_posterior)
     end
-    =#
+
+    # If learning of the A matrix is enabled
+    if agent.substruct.pA !== nothing
+        update_A!(agent.substruct, obs)
+    end
+
     # Run policy inference 
     infer_policies!(agent.substruct)
+
+
     ### Retrieve log marginal probabilities of actions
     log_action_marginals = get_log_action_marginals(agent.substruct)
+
     ### Pass action marginals through softmax function to get action probabilities
     for factor in 1:n_factors
         action_p[factor] = softmax(log_action_marginals[factor] * alpha, dims=1)
@@ -72,14 +93,14 @@ function action_pomdp!(agent::Agent, obs::Tuple{Vararg{Int}})
 
     ### Get parameters 
     alpha = agent.substruct.parameters["alpha"]
-    n_factors = length(aif.settings["num_controls"])
+    n_factors = length(agent.substruct.settings["num_controls"])
 
     # Initialize empty arrays for action distribution per factor
     action_p = Vector{Any}(undef, n_factors)
-    action_distribution = Vector(undef, n_factors)
+    action_distribution = Vector{Distributions.Categorical}(undef, n_factors)
 
-    #If there was a previous action
-    if !ismissing(agent.states["action"])
+     #If there was a previous action
+     if !ismissing(agent.states["action"])
 
         #Extract it
         previous_action = agent.states["action"]
@@ -96,20 +117,42 @@ function action_pomdp!(agent::Agent, obs::Tuple{Vararg{Int}})
     ### Infer states & policies
 
     # Run state inference 
-    infer_states!(aif, obs)
+    infer_states!(agent.substruct, obs)
+
+    # If action is empty and pD is not nothing, update D vectors
+    if ismissing(agent.states["action"]) && agent.substruct.pD !== nothing
+        qs_t1 = get_history(agent.substruct)["posterior_states"][1]
+        update_D!(aif, qs_t1)
+    end
+
+    # If learning of the B matrix is enabled and agent has a previous action
+    if !ismissing(agent.states["action"]) && agent.substruct.pB !== nothing
+
+        # Get the posterior over states from the previous time step
+        states_posterior = get_history(agent.substruct)["posterior_states"][end-1]
+
+        # Update Transition Matrix
+        update_B!(agent.substruct, states_posterior)
+    end
+
+    # If learning of the A matrix is enabled
+    if agent.substruct.pA !== nothing
+        update_A!(agent.substruct, obs)
+    end
 
     # Run policy inference 
-    infer_policies!(aif)
+    infer_policies!(agent.substruct)
 
-    ### Retrieve log marginal probabilities of actions
-    log_action_marginals = get_log_action_marginals(aif)
 
+    ### Retrieve log marginal probabilities of actions
+    log_action_marginals = get_log_action_marginals(agent.substruct)
+
     ### Pass action marginals through softmax function to get action probabilities
     for factor in 1:n_factors
         action_p[factor] = softmax(log_action_marginals[factor] * alpha, dims=1)
         action_distribution[factor] = Distributions.Categorical(action_p[factor])
     end
-    
+
     return n_factors == 1 ? action_distribution[1] : action_distribution
 end
 
@@ -119,18 +162,40 @@ function action_pomdp!(aif::AIF, obs::Vector{Int64})
     alpha = aif.parameters["alpha"]
     n_factors = length(aif.settings["num_controls"])
 
-    # Initialize an empty arrays for action distribution per factor
+    # Initialize empty arrays for action distribution per factor
     action_p = Vector{Any}(undef, n_factors)
-    action_distribution = Vector(undef, n_factors)
+    action_distribution = Vector{Distributions.Categorical}(undef, n_factors)
 
     ### Infer states & policies
 
     # Run state inference 
     infer_states!(aif, obs)
 
+    # If action is empty, update D vectors
+    if ismissing(get_states(aif)["action"]) && aif.pD !== nothing
+        qs_t1 = get_history(aif)["posterior_states"][1]
+        update_D!(aif, qs_t1)
+    end
+
+    # If learning of the B matrix is enabled and agent has a previous action
+    if !ismissing(get_states(aif)["action"]) && aif.pB !== nothing
+
+        # Get the posterior over states from the previous time step
+        states_posterior = get_history(aif)["posterior_states"][end-1]
+
+        # Update Transition Matrix
+        update_B!(aif, states_posterior)
+    end
+
+    # If learning of the A matrix is enabled
+    if aif.pA !== nothing
+        update_A!(aif, obs)
+    end
+
     # Run policy inference 
     infer_policies!(aif)
 
+
     ### Retrieve log marginal probabilities of actions
     log_action_marginals = get_log_action_marginals(aif)
 
@@ -140,7 +205,7 @@ function action_pomdp!(aif::AIF, obs::Vector{Int64})
         action_distribution[factor] = Distributions.Categorical(action_p[factor])
     end
 
-    return action_distribution
+    return n_factors == 1 ? action_distribution[1] : action_distribution
 end
 
 function action_pomdp!(agent::Agent, obs::Int64)

diff --git a/src/pomdp/inference.jl b/src/pomdp/inference.jl
@@ -138,6 +138,7 @@ function fixed_point_iteration(
     prior::Union{Nothing, Vector{Vector{T}}} where T <: Real = nothing, 
     num_iter::Int=num_iter, dF::Float64=1.0, dF_tol::Float64=dF_tol
 )
+    # Get model dimensions (NOTE Sam: We need to save model dimensions in the AIF struct in the future)
     n_modalities = length(num_obs)
     n_factors = length(num_states)
 
@@ -151,6 +152,7 @@ function fixed_point_iteration(
         qs[factor] = ones(num_states[factor]) / num_states[factor]
     end
 
+    # If no prior is provided, create a default prior with uniform distribution
     if prior === nothing
         prior = create_matrix_templates(num_states)
     end
@@ -166,26 +168,38 @@ function fixed_point_iteration(
     if n_factors == 1
         qL = dot_product(likelihood, qs[1])  
         return [softmax(qL .+ prior[1], dims=1)]
+
+    # If there are more factors
     else
-        # Run for a given number of iterations
-        for iteration in 1:num_iter
+        ### Fixed-Point Iteration ###
+        curr_iter = 0
+        ### Sam NOTE: We need check if ReverseDiff might potantially have issues with this while loop ###
+        while curr_iter < num_iter && dF >= dF_tol
             qs_all = qs[1]
+            # Loop over each factor starting from the second one
             for factor in 2:n_factors
+                # Reshape and multiply qs_all with the current factor's qs
                 qs_all = qs_all .* reshape(qs[factor], tuple(ones(Real, factor - 1)..., :, 1))
             end
+
+            # Compute the log-likelihood
             LL_tensor = likelihood .* qs_all
 
+            # Update each factor's qs
             for factor in 1:n_factors
-                qL = zeros(Real,size(qs[factor]))
+                # Initialize qL for the current factor
+                qL = zeros(Real, size(qs[factor]))
+
+                # Compute qL for each state in the current factor
                 for i in 1:size(qs[factor], 1)
                     qL[i] = sum([LL_tensor[indices...] / qs[factor][i] for indices in Iterators.product([1:size(LL_tensor, dim) for dim in 1:n_factors]...) if indices[factor] == i])
                 end
+
                 # If qs is tracked by ReverseDiff, get the value
                 if ReverseDiff.istracked(softmax(qL .+ prior[factor], dims=1))
                     qs[factor] = ReverseDiff.value(softmax(qL .+ prior[factor], dims=1))
-
-                # Otherwise, proceed as normal
                 else
+                    # Otherwise, proceed as normal
                     qs[factor] = softmax(qL .+ prior[factor], dims=1)
                 end
             end
@@ -196,6 +210,9 @@ function fixed_point_iteration(
             # Update stopping condition
             dF = abs(prev_vfe - vfe)
             prev_vfe = vfe
+
+            # Increment iteration
+            curr_iter += 1
         end
 
         return qs
@@ -477,14 +494,14 @@ function compute_accuracy_new(log_likelihood, qs::Vector{Vector{Real}})
     return results
 end
 
-""" Calculate SAPE """
-function calc_SAPE(aif::AIF)
+""" Calculate State-Action Prediction Error """
+function calculate_SAPE(aif::AIF)
 
     qs_pi_all = get_expected_states(aif.qs_current, aif.B, aif.policies)
     qs_bma = bayesian_model_average(qs_pi_all, aif.Q_pi)
 
     if length(aif.states["bayesian_model_averages"]) != 0
-        sape = kl_div(qs_bma, aif.states["bayesian_model_averages"][end])
+        sape = kl_divergence(qs_bma, aif.states["bayesian_model_averages"][end])
         push!(aif.states["SAPE"], sape)
     end
 

diff --git a/src/pomdp/struct.jl b/src/pomdp/struct.jl
@@ -26,7 +26,7 @@ mutable struct AIF
     qs_current::Vector{Vector{T}} where T <: Real # Current beliefs about states
     prior::Vector{Vector{T}} where T <: Real # Prior beliefs about states
     Q_pi::Vector{T} where T <:Real # Posterior beliefs over policies
-    G::Vector{T} where T <:Real # Expected free energy of policy
+    G::Vector{T} where T <:Real # Expected free energy of policies
     action::Vector{Int} # Last action
     use_utility::Bool # Utility Boolean Flag
     use_states_info_gain::Bool # States Information Gain Boolean Flag
@@ -56,8 +56,8 @@ function create_aif(A, B;
                     fr_pD = 1.0, 
                     modalities_to_learn = "all", 
                     factors_to_learn = "all", 
-                    gamma=16.0, 
-                    alpha=16.0, 
+                    gamma=1.0, 
+                    alpha=1.0, 
                     policy_len=1, 
                     num_controls=nothing, 
                     control_fac_idx=nothing, 

diff --git a/src/utils/maths.jl b/src/utils/maths.jl
@@ -18,12 +18,12 @@ function capped_log(x::Real)
 end
 
 """
-    capped_log(x::AbstractArray{T}) where T <: Real
-
-# Arguments
-- `array::AbstractArray{T}`: An array of real numbers.
+    capped_log(array::Array{Float64}) 
+    capped_log(array::Array{T}) where T <: Real 
+    capped_log(array::Vector{Real})
 
 Return the natural logarithm of x, capped at the machine epsilon value of x.
+
 """
 
 function capped_log(array::Array{Float64}) 
@@ -49,7 +49,6 @@ function capped_log(array::Vector{Real})
 
     array = log.(max.(array, epsilon))
     # Return the log of the array values capped at epsilon
-    #@show typeof(array)
     return array
 end
 
@@ -256,15 +255,27 @@ function bayesian_model_average(qs_pi_all, q_pi)
     return qs_bma
 end
 
-function kl_div(P::Vector{Vector{Vector{Real}}}, Q::Vector{Vector{Vector{Real}}})
-    eps_val=1e-16
-    dkl = 0.0
+"""
+    kl_divergence(P::Vector{Vector{Vector{Float64}}}, Q::Vector{Vector{Vector{Float64}}})
+
+# Arguments
+- `P::Vector{Vector{Vector{Real}}}`
+- `Q::Vector{Vector{Vector{Real}}}`
+
+Return the Kullback-Leibler (KL) divergence between two probability distributions.
+"""
+function kl_divergence(P::Vector{Vector{Vector{Real}}}, Q::Vector{Vector{Vector{Real}}})
+    eps_val = 1e-16  # eps constant to avoid log(0)
+    dkl = 0.0  # Initialize KL divergence to zero
+
     for j in 1:length(P)
         for i in 1:length(P[j])
+            # Compute the dot product of P[j][i] and the difference of logs of P[j][i] and Q[j][i]
             dkl += dot(P[j][i], log.(P[j][i] .+ eps_val) .- log.(Q[j][i] .+ eps_val))
         end
     end
-    return dkl
+
+    return dkl  # Return KL divergence
 end