-
Notifications
You must be signed in to change notification settings - Fork 1
/
BayesHMC.jl
134 lines (122 loc) · 5.22 KB
/
BayesHMC.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#=
Implementation of AdvancedHMC.jl example for the neural network code. The neural network code here is a slightly modified version compared to what is in BayesVI.jl - this is in order to make it work with HMC sampling.
=#
module BayesHMC
using AdvancedHMC, Distributions, ForwardDiff, Zygote
using Random
using OMEinsum
using Plots
using OrderedCollections
#=
This function separates the means and log standard deviations in a 1xD parameters vector.
The function simply splits the input vector into two, treats the first half as means, the second half as log standard deviations.
Input: 1xD vector of floats
Output: 1x(D/2) vector of means, 1x(D/2) vector of log standard deviations.
=#
function unpack_params(parameters)
mean, log_std = parameters[1:Int(length(parameters)/2)], parameters[Int(length(parameters)/2) + 1:end]
return mean, log_std
end
#= Function to specify, and produce the Bayesian Neural Network.
Input:
layer_sizes = vector of integers specifying number of nodes in each layer. Example: [1, 20, 20, 1]
L2_reg = a single float value specifying the L2 regularisation.
noise_variance = float value, used for shrinkage effect on likelihood term.
nonlinearity = function used as non-linearity between layers.
Output:
num_weights = total number of weights in the produced neural network.
predictions = forward pass function.
logprob = log probability function.
=#
function make_neural_network_functions(layer_sizes, L2_reg, noise_variance, nonlinearity)
shapes = collect(zip(layer_sizes[1:end-1], layer_sizes[2:end]))
num_weights = sum((m+1)*n for (m, n) in shapes)
function unpack_layers(weights)
W = OrderedDict()
b = OrderedDict()
i=1
for (m, n) in shapes
W[i] = reshape(weights[1:m*n], (m, n))
b[i] = reshape(weights[m*n:(m*n+n)-1], (1, n))
weights = weights[(m+1)*n:end]
i += 1
end
return (W, b)
end
# Outputs the predictions for each number of models sampled from posterior.
# inputs dimension: observations x features.
# weights dimensions:
function predictions(weights, inputs)
#inputs = reshape(inputs, 1, size(inputs)...)
params = unpack_layers(weights)
Ws = params[1]
bs = params[2]
#Go through all samples for each layer. (W,b) is the collection of all samples of weights and biases for a particular layer.
for j in range(1, length(Ws))
W = Ws[j]
b = bs[j]
outputs = ein"ij,jk->ik"(inputs, W) .+ b
inputs = nonlinearity(outputs)
if j == length(Ws)
return outputs
end
end
end
# This method returns a vector of length [mc_samples] containing the log probability value for each sample.
# weights = mc_samples x num_weights (non-variational)
# outputs = vector of likelihoods of size mc_samples
function logprob(weights, inputs, targets)
log_prior = -L2_reg * sum(weights.^2, dims=1)
preds = predictions(weights, inputs)
log_lik = -sum((preds .- targets).^2, dims=1)[:,1]./noise_variance
return log_prior[1] + log_lik[1]
end
return num_weights, predictions, logprob
end
#=
RBF activation function.
=#
function rbf(x)
exp.(-x.^2)
end
#=
Linear activation function.
=#
function linear(x)
x
end
#=
Executes HMC sampler on the bayesian network. This is mostly from the example in the AdvancedHMC.jl page,
adapted to work with the neural network's posterior instead.
Inputs:
n_samples: number of samples
n_adapts: number of adapts
num_weights: number of weights in the neural network
log_posterior: log posterior of the neural network
Outputs:
samples: samples from the posterior
stats: statistics from AdvancedHMC.jl sampling process
=#
function run_hmc_sampler(n_samples, n_adapts, num_weights, log_posterior)
# Choose parameter dimensionality and initial parameter value
D = num_weights
initial_θ = rand(D)
# Define the target distribution
ℓπ(θ) = log_posterior(θ)
# Configure and run HMC sampler.
metric = DiagEuclideanMetric(D)
hamiltonian = Hamiltonian(metric, ℓπ, Zygote)
initial_ϵ = find_good_stepsize(hamiltonian, initial_θ)
integrator = Leapfrog(initial_ϵ)
proposal = NUTS{MultinomialTS, GeneralisedNoUTurn}(integrator)
adaptor = StanHMCAdaptor(MassMatrixAdaptor(metric), StepSizeAdaptor(0.8, integrator))
# Samples stores the samples, stats contains diagnostic statistics information.
samples, stats = sample(hamiltonian, proposal, initial_θ, n_samples, adaptor, n_adapts; progress=false, drop_warmup=true);
return samples, stats
end
export unpack_params
export make_neural_network_functions
export rbf
export linear
export run_hmc_sampler
end