customstan.R

# custom stan 
dat <- gamSim(1, n = 100)
m1 <- gam(y ~ s(x2), data = dat)
plot(m1)

library(rstan)
b1 <- stan(file = "customstan.stan", 
           data = list(N = length(dat$x2),
                       x = dat$x2,
                       y = dat$y),
           iter = 400, chains = 2, cores = 2)

plot(b1)

library(brms)
dummy <- brm(y ~ gp(x2), data = dat,
             iter = 400, chains = 2, cores = 2)

#marginal_smooths(dummy)
marginal_effects(dummy) # effects is needed for gaussian process
class(dummy$fit)
class(b1)

dummy2 <- dummy
dummy2$fit <- b1

marginal_smooths(dummy2)
marginal_effects(dummy2)

stancode(dummy)

b1_extract <- extract(b1)

# define a model in terms of the joint distribution of the observed and unobserved 
# data
newdata <- seq(from = 0.01, to = 1, by = 0.01)
dat$newdata <- newdata

b2 <- stan(file="customstan_predict.stan", 
           data=list(x1=dat$x2, y1=dat$y, N1=length(dat$x2), 
                    x2=dat$newdata, N2=length(dat$newdata)),
            iter=400, chains=2, cores = 2)

wot <- extract(b2)
y_new <- colMeans(wot$y2)
plot(y_new~newdata)
points(y_new3~newdata, add = TRUE, col = "red")
# predictions can be made through newdata syntax.

# 2 steps left: 
# custom covariance functions
# multidimensional gaussian processes

# trying out self implemented squared exponential function
b3 <- stan(file = "customstan_cov_exp_quad.stan",
           data = list(x1=dat$x2, y1=dat$y, N1=length(dat$x2), 
                       x2=dat$newdata, N2=length(dat$newdata)),
           iter = 400, chains = 2, cores = 2)
wot3 <- extract(b3)
y_new3 <- colMeans(wot3$y2)
plot(y_new3~newdata)
points(y_new~newdata, add = TRUE, col = "red")
# with abs instead of squared distance it seems to rough
# b4 is with sqrt(squared distance)
# should not make a difference but lets see
b4 <- stan(file = "customstan_cov_exp_quad.stan",
           data = list(x1=dat$x2, y1=dat$y, N1=length(dat$x2), 
                       x2=dat$newdata, N2=length(dat$newdata)),
           iter = 400, chains = 2, cores = 2)
wot4 <- extract(b4)
y_new4 <- colMeans(wot4$y2)
plot(y_new4~newdata)
points(y_new~newdata, add = TRUE, col = "red")
# produces almost equivalent output but square root operations seems slower
plot(y_new4~newdata)
points(y_new3~newdata, add = TRUE, col = "red")
# there seems to be a slight difference between the default cov_exp_quad function
# and my implementation. but at least i know that custom functions to fill the 
# matrix work like expected. 

# trying out squaring rho
b5 <- stan(file = "customstan_cov_exp_quad.stan",
           data = list(x1=dat$x2, y1=dat$y, N1=length(dat$x2), 
                       x2=dat$newdata, N2=length(dat$newdata)),
           iter = 400, chains = 2, cores = 2)
wot5 <- extract(b5)
y_new5 <- colMeans(wot5$y2)
plot(y_new5~newdata)
points(y_new~newdata, col = "red", add = TRUE)
# that seems to be the correct way. 
# implementing custom covariance functions in the same way should be simple
# the two dimensional case should then be possible to be handled like in the 
# code generated by brms 

# trying 2 dimensional case

library(mgcv)
library(mgcViz)
library(brms)

set.seed(1)
dat <- gamSim(1, n = 150)
m1 <- gam(y ~ s(x1, x2, bs = "gp", m = 2), data = dat) # power exponential
library(mgcViz)
plot(getViz(m1))

b1 <- brm(y ~ gp(x1, x2), data = dat, 
          chains = 2, cores = 2, iter = 400) # uses power exponential by default
# 150 seconds for 1000 transitions using 10 steps
summary(b1)
plot(b1)

# saving and loading due to frequent RSession crashes
saveRDS(b1, "b1_customstan.RDS")
b1 <- readRDS("b1_customstan.RDS")

x1 <- seq(from = 0.01, to = 1, by = 0.025)
x2 <- seq(from = 0.01, to = 1, by = 0.025)
newdata <- expand.grid(x1, x2)
colnames(newdata) <- c("x1", "x2")
head(newdata)

preds <- predict(b1, newdata = newdata, nsamples = 10)
# takes like 10 seconds
z <- preds[,1]

mat <- matrix(z, nrow = 40, ncol = 40)
image(x1, x2, mat)
plot(getViz(m1))
# seems to have roughly the same shape

# trying to replicate the 2d gp behavior of brms by calculating the covariance manually
stancode_2d <- make_stancode(y ~ gp(x1, x2), data = dat, 
                             chains = 2, cores = 2, iter = 400)
standata_2d <- make_standata(y ~ gp(x1, x2), data = dat, 
                             chains = 2, cores = 2, iter = 400)
# lets take a look at the 2d stancode
stancode_2d

# included term to return generated mu values
b2d <- stan(file = "customstan_2d_exp_quad.stan",
           data = standata_2d,
           iter = 400, chains = 2, cores = 2)

summary(b2d)

b2d_extract <- extract(b2d)
plot(b2d)
mu_gen <- b2d_extract$mu_generated
# 150 point pairs generated 150 different distributions
mu_means <- colMeans(mu_gen)

mapdata <- data.frame(x1 = dat$x1, x2 = dat$x2, y = mu_means)

ggplot(data = dat, aes(x = x1, y = x2, col = y)) +
  geom_point() +
  scale_color_gradient(low = "yellow", high = "red")

ggplot(data = mapdata, aes(x = x1, y = x2, col = y)) +
  geom_point() +
  scale_color_gradient(low = "yellow", high = "red")
# something is wrong
# running model generated from brms, with same data generation process
test_datagen <- stan(file = "stancode_2d.stan",
            data = standata_2d,
            iter = 400, chains = 2, cores = 2)

test_extract <- extract(test_datagen)
mu_gen2 <- test_extract$mu_generated
str(mu_gen2) # same structure
mu_means2 <- colMeans(mu_gen2)
mapdata2 <- data.frame(x1 = dat$x1, x2 = dat$x2, y = mu_means2)

ggplot(data = mapdata2, aes(x = x1, y = x2, col = y)) +
  geom_point() +
  scale_color_gradient(low = "yellow", high = "red") +
  ggtitle("BRMS 1")

# the brms version seems consistent. 
# lets try to remove the spectral density stuff for clarity
test_datagen_nospectral <- stan(file = "stancode_2d_nospectral.stan",
                     data = standata_2d,
                     iter = 400, chains = 2, cores = 2)

nospectral_extract <- extract(test_datagen_nospectral)
mu_gen3 <- nospectral_extract$mu_generated
mu_means3 <- colMeans(mu_gen3)
mapdata3 <- data.frame(x1 = dat$x1, x2 = dat$x2, y = mu_means3)

ggplot(data = mapdata3, aes(x = x1, y = x2, col = y)) +
  geom_point() +
  scale_color_gradient(low = "yellow", high = "red") +
  ggtitle("BRMS 2: No Spectral")

# we need to include the points at which to predict from the model in the stan data
str(standata_2d)

# trying out custom covariance function
test_datagen_nospectral_custom <- stan(file = "stancode_2d_nospectral_custom.stan",
                                data = standata_2d,
                                iter = 400, chains = 2, cores = 2)

extra <- extract(test_datagen_nospectral_custom)
mu_gen4 <- extra$mu_generated
mu_means4 <- colMeans(mu_gen4)
mapdata4 <- data.frame(x1 = dat$x1, x2 = dat$x2, y = mu_means4)

all.equal(mapdata3, mapdata4) # good

ggplot(data = mapdata4, aes(x = x1, y = x2, col = y)) +
  geom_point() +
  scale_color_gradient(low = "yellow", high = "red") +
  ggtitle("BRMS 3: Custom Cov.")
# the stan code compiles, lets see if we can swap out the covariance functions.
test_datagen_nospectral_custom_2 <- stan(file = "stancode_2d_nospectral_custom.stan",
                                       data = standata_2d,
                                       iter = 400, chains = 1, cores = 1)
# does not work

test <- stan_model(file = "test.stan", allow_undefined = TRUE,
                   includes = paste0('\n#include "', 
                                     file.path(getwd(), 'gp_matern52_cov.hpp'), '"\n'))

# we need to use cmdstan for custom functions or fix the weird as heck array accessing
# does not work

# # trying out custom covariance function
# b3 <- stan(file="customstan_matern32_predict.stan", 
#            data=list(x1=dat$x2, y1=dat$y, N1=length(dat$x2), 
#                      x2=dat$newdata, N2=length(dat$newdata)),
#            iter=400, chains=2, cores = 2) # matern 3/2
# m1 <- gam(y ~ s(x2, bs = "gp", m = 2), data = dat) # power exponential
# plot(m1)
# marginal_effects(dummy)
# m3 <- gam(y ~ s(x2, bs = "gp", m = 3), data = dat) # matern 3/2
# plot(m3)
# # asdf <- predict(m1, newdata = data.frame(x2 = newdata))
# # plot(asdf~newdata)

# installing cmdstanr to use covariance functions
devtools::install_github("stan-dev/cmdstanr")
library(cmdstanr)
# requires working version of cmdstan
install_cmdstan()
# verify that version can be found
cmdstan_version()