-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcustomstan.R
246 lines (203 loc) · 8.15 KB
/
customstan.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
# custom stan
dat <- gamSim(1, n = 100)
m1 <- gam(y ~ s(x2), data = dat)
plot(m1)
library(rstan)
b1 <- stan(file = "customstan.stan",
data = list(N = length(dat$x2),
x = dat$x2,
y = dat$y),
iter = 400, chains = 2, cores = 2)
plot(b1)
library(brms)
dummy <- brm(y ~ gp(x2), data = dat,
iter = 400, chains = 2, cores = 2)
#marginal_smooths(dummy)
marginal_effects(dummy) # effects is needed for gaussian process
class(dummy$fit)
class(b1)
dummy2 <- dummy
dummy2$fit <- b1
marginal_smooths(dummy2)
marginal_effects(dummy2)
stancode(dummy)
b1_extract <- extract(b1)
# define a model in terms of the joint distribution of the observed and unobserved
# data
newdata <- seq(from = 0.01, to = 1, by = 0.01)
dat$newdata <- newdata
b2 <- stan(file="customstan_predict.stan",
data=list(x1=dat$x2, y1=dat$y, N1=length(dat$x2),
x2=dat$newdata, N2=length(dat$newdata)),
iter=400, chains=2, cores = 2)
wot <- extract(b2)
y_new <- colMeans(wot$y2)
plot(y_new~newdata)
points(y_new3~newdata, add = TRUE, col = "red")
# predictions can be made through newdata syntax.
# 2 steps left:
# custom covariance functions
# multidimensional gaussian processes
# trying out self implemented squared exponential function
b3 <- stan(file = "customstan_cov_exp_quad.stan",
data = list(x1=dat$x2, y1=dat$y, N1=length(dat$x2),
x2=dat$newdata, N2=length(dat$newdata)),
iter = 400, chains = 2, cores = 2)
wot3 <- extract(b3)
y_new3 <- colMeans(wot3$y2)
plot(y_new3~newdata)
points(y_new~newdata, add = TRUE, col = "red")
# with abs instead of squared distance it seems to rough
# b4 is with sqrt(squared distance)
# should not make a difference but lets see
b4 <- stan(file = "customstan_cov_exp_quad.stan",
data = list(x1=dat$x2, y1=dat$y, N1=length(dat$x2),
x2=dat$newdata, N2=length(dat$newdata)),
iter = 400, chains = 2, cores = 2)
wot4 <- extract(b4)
y_new4 <- colMeans(wot4$y2)
plot(y_new4~newdata)
points(y_new~newdata, add = TRUE, col = "red")
# produces almost equivalent output but square root operations seems slower
plot(y_new4~newdata)
points(y_new3~newdata, add = TRUE, col = "red")
# there seems to be a slight difference between the default cov_exp_quad function
# and my implementation. but at least i know that custom functions to fill the
# matrix work like expected.
# trying out squaring rho
b5 <- stan(file = "customstan_cov_exp_quad.stan",
data = list(x1=dat$x2, y1=dat$y, N1=length(dat$x2),
x2=dat$newdata, N2=length(dat$newdata)),
iter = 400, chains = 2, cores = 2)
wot5 <- extract(b5)
y_new5 <- colMeans(wot5$y2)
plot(y_new5~newdata)
points(y_new~newdata, col = "red", add = TRUE)
# that seems to be the correct way.
# implementing custom covariance functions in the same way should be simple
# the two dimensional case should then be possible to be handled like in the
# code generated by brms
# trying 2 dimensional case
library(mgcv)
library(mgcViz)
library(brms)
set.seed(1)
dat <- gamSim(1, n = 150)
m1 <- gam(y ~ s(x1, x2, bs = "gp", m = 2), data = dat) # power exponential
library(mgcViz)
plot(getViz(m1))
b1 <- brm(y ~ gp(x1, x2), data = dat,
chains = 2, cores = 2, iter = 400) # uses power exponential by default
# 150 seconds for 1000 transitions using 10 steps
summary(b1)
plot(b1)
# saving and loading due to frequent RSession crashes
saveRDS(b1, "b1_customstan.RDS")
b1 <- readRDS("b1_customstan.RDS")
x1 <- seq(from = 0.01, to = 1, by = 0.025)
x2 <- seq(from = 0.01, to = 1, by = 0.025)
newdata <- expand.grid(x1, x2)
colnames(newdata) <- c("x1", "x2")
head(newdata)
preds <- predict(b1, newdata = newdata, nsamples = 10)
# takes like 10 seconds
z <- preds[,1]
mat <- matrix(z, nrow = 40, ncol = 40)
image(x1, x2, mat)
plot(getViz(m1))
# seems to have roughly the same shape
# trying to replicate the 2d gp behavior of brms by calculating the covariance manually
stancode_2d <- make_stancode(y ~ gp(x1, x2), data = dat,
chains = 2, cores = 2, iter = 400)
standata_2d <- make_standata(y ~ gp(x1, x2), data = dat,
chains = 2, cores = 2, iter = 400)
# lets take a look at the 2d stancode
stancode_2d
# included term to return generated mu values
b2d <- stan(file = "customstan_2d_exp_quad.stan",
data = standata_2d,
iter = 400, chains = 2, cores = 2)
summary(b2d)
b2d_extract <- extract(b2d)
plot(b2d)
mu_gen <- b2d_extract$mu_generated
# 150 point pairs generated 150 different distributions
mu_means <- colMeans(mu_gen)
mapdata <- data.frame(x1 = dat$x1, x2 = dat$x2, y = mu_means)
ggplot(data = dat, aes(x = x1, y = x2, col = y)) +
geom_point() +
scale_color_gradient(low = "yellow", high = "red")
ggplot(data = mapdata, aes(x = x1, y = x2, col = y)) +
geom_point() +
scale_color_gradient(low = "yellow", high = "red")
# something is wrong
# running model generated from brms, with same data generation process
test_datagen <- stan(file = "stancode_2d.stan",
data = standata_2d,
iter = 400, chains = 2, cores = 2)
test_extract <- extract(test_datagen)
mu_gen2 <- test_extract$mu_generated
str(mu_gen2) # same structure
mu_means2 <- colMeans(mu_gen2)
mapdata2 <- data.frame(x1 = dat$x1, x2 = dat$x2, y = mu_means2)
ggplot(data = mapdata2, aes(x = x1, y = x2, col = y)) +
geom_point() +
scale_color_gradient(low = "yellow", high = "red") +
ggtitle("BRMS 1")
# the brms version seems consistent.
# lets try to remove the spectral density stuff for clarity
test_datagen_nospectral <- stan(file = "stancode_2d_nospectral.stan",
data = standata_2d,
iter = 400, chains = 2, cores = 2)
nospectral_extract <- extract(test_datagen_nospectral)
mu_gen3 <- nospectral_extract$mu_generated
mu_means3 <- colMeans(mu_gen3)
mapdata3 <- data.frame(x1 = dat$x1, x2 = dat$x2, y = mu_means3)
ggplot(data = mapdata3, aes(x = x1, y = x2, col = y)) +
geom_point() +
scale_color_gradient(low = "yellow", high = "red") +
ggtitle("BRMS 2: No Spectral")
# we need to include the points at which to predict from the model in the stan data
str(standata_2d)
# trying out custom covariance function
test_datagen_nospectral_custom <- stan(file = "stancode_2d_nospectral_custom.stan",
data = standata_2d,
iter = 400, chains = 2, cores = 2)
extra <- extract(test_datagen_nospectral_custom)
mu_gen4 <- extra$mu_generated
mu_means4 <- colMeans(mu_gen4)
mapdata4 <- data.frame(x1 = dat$x1, x2 = dat$x2, y = mu_means4)
all.equal(mapdata3, mapdata4) # good
ggplot(data = mapdata4, aes(x = x1, y = x2, col = y)) +
geom_point() +
scale_color_gradient(low = "yellow", high = "red") +
ggtitle("BRMS 3: Custom Cov.")
# the stan code compiles, lets see if we can swap out the covariance functions.
test_datagen_nospectral_custom_2 <- stan(file = "stancode_2d_nospectral_custom.stan",
data = standata_2d,
iter = 400, chains = 1, cores = 1)
# does not work
test <- stan_model(file = "test.stan", allow_undefined = TRUE,
includes = paste0('\n#include "',
file.path(getwd(), 'gp_matern52_cov.hpp'), '"\n'))
# we need to use cmdstan for custom functions or fix the weird as heck array accessing
# does not work
# # trying out custom covariance function
# b3 <- stan(file="customstan_matern32_predict.stan",
# data=list(x1=dat$x2, y1=dat$y, N1=length(dat$x2),
# x2=dat$newdata, N2=length(dat$newdata)),
# iter=400, chains=2, cores = 2) # matern 3/2
# m1 <- gam(y ~ s(x2, bs = "gp", m = 2), data = dat) # power exponential
# plot(m1)
# marginal_effects(dummy)
# m3 <- gam(y ~ s(x2, bs = "gp", m = 3), data = dat) # matern 3/2
# plot(m3)
# # asdf <- predict(m1, newdata = data.frame(x2 = newdata))
# # plot(asdf~newdata)
# installing cmdstanr to use covariance functions
devtools::install_github("stan-dev/cmdstanr")
library(cmdstanr)
# requires working version of cmdstan
install_cmdstan()
# verify that version can be found
cmdstan_version()