-
Notifications
You must be signed in to change notification settings - Fork 0
/
dr_fun.R
115 lines (94 loc) · 4.8 KB
/
dr_fun.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
match_models <- function(a, w, x, zip, a.vals, fmla, trim = 0.05) {
if (trim < 0 | trim > 0.5)
stop("trim < 0 | trim > 0.5")
## Matching Estimator
match_pop <- generate_pseudo_pop(Y = zip, w = a, c = x,
ci_appr = "matching",
pred_model = "sl",
gps_model = "parametric",
use_cov_transform = TRUE,
transformers = list("pow2", "pow3"),
sl_lib = c("m_xgboost"),
params = list(xgb_nrounds = c(50)),
nthread = 12, # number of cores, you can change,
covar_bl_method = "absolute",
covar_bl_trs = 0.1,
covar_bl_trs_type = "mean",
trim_quantiles = c(trim, 1 - trim), # trimed, you can change,
optimized_compile = TRUE, #created a column counter for how many times matched,
max_attempt = 5,
matching_fun = "matching_l1",
delta_n = 0.2, # you can change this to the one you used in previous analysis,
scale = 1.0)
# merge individual level data
pseudo <- match_pop$pseudo_pop
match_data <- merge(w, data.frame(zip = pseudo$zip,
year = pseudo$year,
a = pseudo$w,
counter = pseudo$counter),
by = c("zip", "year"), all = FALSE)
match_data <- subset(match_data, counter > 0)
match_curve <- mgcv::bam(fmla, data = match_data, offset = log(time_count), family = poisson(link = "log"), weights = counter)
estimate <- sapply(a.vals, function(a.tmp, ...) {
match_estimate <- predict(match_curve, newdata = data.frame(a = a.tmp, w), type = "response")
return(weighted.mean(match_estimate, w = w$time_count, na.rm = TRUE))
})
return(list(estimate = estimate, match_data = match_data,
adjusted_corr_results = match_pop$adjusted_corr_results,
original_corr_results = match_pop$original_corr_results))
}
tmle_glm <- function(a, w, x, y, offset, a.vals, trt.var, trim = 0.01){
# set up evaluation points & matrices for predictions
n <- nrow(x)
id <- which(colnames(w) == trt.var)
# estimate nuisance outcome model with splines
fmla <- formula(paste0( "y ~ ns(a, 4) ", paste0(colnames(w[,-id]), collapse = "+")))
mumod <- glm(fmla, data = data.frame(w, a = w[,id]), offset = offset, family = poisson(link = "log"))
muhat <- exp(log(mumod$fitted.values) - offset)
# estimate nuisance GPS parameters with lm
pimod <- lm(a ~ 0 + ., data = data.frame(x))
pimod.vals <- c(pimod$fitted.values, predict(pimod, newdata = data.frame(w)))
pi2mod.vals <- sigma(pimod)^2
# parametric density
# pihat <- dnorm(a, pimod.vals, sqrt(pi2mod.vals))
# pihat.mat <- sapply(a.vals, function(a.tmp, ...) {
# dnorm(a, pimod.vals, sqrt(pi2mod.vals))
# })
# phat <- predict(smooth.spline(a.vals, colMeans(pihat.mat, na.rm = T)), x = a)$y
# phat[phat<0] <- 1e-4
# nonparametric denisty
a.std <- c(c(a, w[,id]) - pimod.vals) / sqrt(pi2mod.vals)
dens <- density(a.std[1:n])
pihat <- approx(x = dens$x, y = dens$y, xout = a.std)$y / sqrt(pi2mod.vals)
pihat.mat <- sapply(a.vals, function(a.tmp, ...) {
std <- c(a.tmp - pimod.vals) / sqrt(pi2mod.vals)
approx(x = dens$x, y = dens$y, xout = std)$y / sqrt(pi2mod.vals)
})
phat <- predict(smooth.spline(a.vals, colMeans(pihat.mat[1:n,], na.rm = T)), x = c(a, w[,id]))$y
phat[phat<0] <- 1e-4
# TMLE update
nsa <- ns(a, df = 4, intercept = TRUE)
weights <- phat/pihat
trim0 <- quantile(weights[1:n], trim)
trim1 <- quantile(weights[1:n], 1 - trim)
weights[weights < trim0] <- trim0
weights[weights > trim1] <- trim1
base <- predict(nsa, newx = w[,id])*weights[-(1:n)]
new_mod <- glm(y ~ 0 + base, offset = log(muhat) + offset,
family = poisson(link = "log"))
param <- coef(new_mod)
# predict spline basis and impute
estimate <- sapply(1:length(a.vals), function(k, ...) {
print(k)
w$a <- a.vals[k]
muhat.tmp <- predict(mumod, newdata = data.frame(w))
pihat.tmp <- pihat.mat[,k]
a.tmp <- a.vals[k]
wts <- c(mean(pihat.tmp[1:n], na.rm = TRUE)/pihat.tmp[-(1:n)])
wts[wts < trim0] <- trim0
wts[wts > trim1] <- trim1
mat <- predict(nsa, newx = rep(a.tmp, length(wts)))*wts
return(weighted.mean(exp(log(muhat.tmp) + c(mat%*%param)), w = exp(offset), na.rm = TRUE))
})
return(list(estimate = estimate, weights = weights[-(1:n)]))
}