Skip to content

Commit

Permalink
run grkstyle::grk_style_dir(".")
Browse files Browse the repository at this point in the history
  • Loading branch information
malcolmbarrett committed Dec 23, 2024
1 parent 9177f59 commit f781293
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 56 deletions.
59 changes: 44 additions & 15 deletions R/ggdag-mask.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,52 @@
# TODO: when `geom_dag_label_repel2` exists, add to namespace as 1 then delete this first bit
# copied from source to avoid recursion issue in overriding in ggdag namsespace
ggdag_geom_dag_label_repel <- function(
mapping = NULL, data = NULL, parse = FALSE, ...,
box.padding = grid::unit(0.35, "lines"), label.padding = grid::unit(0.25, "lines"),
point.padding = grid::unit(1.5, "lines"), label.r = grid::unit(0.15, "lines"),
label.size = 0.25, segment.color = "grey50", segment.size = 0.5, arrow = NULL,
force = 1, max.iter = 2000, nudge_x = 0, nudge_y = 0, na.rm = FALSE,
show.legend = NA, inherit.aes = TRUE) {
mapping = NULL,
data = NULL,
parse = FALSE,
...,
box.padding = grid::unit(0.35, "lines"),
label.padding = grid::unit(0.25, "lines"),
point.padding = grid::unit(1.5, "lines"),
label.r = grid::unit(0.15, "lines"),
label.size = 0.25,
segment.color = "grey50",
segment.size = 0.5,
arrow = NULL,
force = 1,
max.iter = 2000,
nudge_x = 0,
nudge_y = 0,
na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE
) {
ggplot2::layer(
data = data, mapping = mapping, stat = ggdag:::StatNodesRepel,
geom = ggrepel::GeomLabelRepel, position = "identity",
show.legend = show.legend, inherit.aes = inherit.aes,
data = data,
mapping = mapping,
stat = ggdag:::StatNodesRepel,
geom = ggrepel::GeomLabelRepel,
position = "identity",
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
parse = parse, box.padding = box.padding,
label.padding = label.padding, point.padding = point.padding,
label.r = label.r, label.size = label.size, segment.colour = segment.color %||%
segment.colour, segment.size = segment.size,
arrow = arrow, na.rm = na.rm, force = force, max.iter = max.iter,
nudge_x = nudge_x, nudge_y = nudge_y, segment.alpha = 1, ...
parse = parse,
box.padding = box.padding,
label.padding = label.padding,
point.padding = point.padding,
label.r = label.r,
label.size = label.size,
segment.colour = segment.color %||%
segment.colour,
segment.size = segment.size,
arrow = arrow,
na.rm = na.rm,
force = force,
max.iter = max.iter,
nudge_x = nudge_x,
nudge_y = nudge_y,
segment.alpha = 1,
...
)
)
}
Expand Down
14 changes: 10 additions & 4 deletions R/setup.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,26 +61,32 @@ est_ci <- function(.df, rsample = FALSE) {

# based on https://github.com/hadley/r-pkgs/blob/main/common.R
status <- function(type) {
status <- switch(type,
status <- switch(
type,
unstarted = "is unstarted, but don't worry, it's on our roadmap",
polishing = "has its foundations written but is still undergoing changes",
wip = "is actively undergoing work and may be restructured or changed. It may also be incomplete",
complete = "is mostly complete, but we might make small tweaks or copyedits",
stop("Invalid `type`", call. = FALSE)
)

class <- switch(type,
class <- switch(
type,
complete = ,
polishing = "callout-note",
wip = "callout-warning",
unstarted = "callout-warning"
)

knitr::asis_output(paste0(
"::: ", class, "\n",
"::: ",
class,
"\n",
"## Work-in-progress 🚧\n",
"You are reading the work-in-progress first edition of *Causal Inference in R*. ",
"This chapter ", status, ". \n",
"This chapter ",
status,
". \n",
":::\n"
))
}
3 changes: 2 additions & 1 deletion chapters/05-not-just-a-stats-problem.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,8 @@ causal_quartet_time |>
mutate(
adjusted_effect =
coef(
lm(outcome_followup ~ exposure_baseline + covariate_baseline,
lm(
outcome_followup ~ exposure_baseline + covariate_baseline,
data = data
)
)[2]
Expand Down
66 changes: 35 additions & 31 deletions chapters/06-stats-models-ci.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,14 @@ satisfaction1 <- tibble(
# Weekly (0) vs Daily (1)
update_frequency = rbinom(n, 1, p_exposure),
# generate the "true" average treatment effect of 0
# to do this, we are going to generate the
# to do this, we are going to generate the
# potential outcomes, first if exposure = 0
# `y0` = `satisfaction(weekly)`
# notice `update_frequency` is not in the equation below
# we use rnorm(n) to add the random error term that is normally
# distributed with a mean of 0 and a standard deviation of 1
y0 = customer_type + rnorm(n),
# because the true effect is 0, the potential outcome
# because the true effect is 0, the potential outcome
# if exposure = 1 is identical
y1 = y0,
# in practice, we will only see one of these
Expand All @@ -97,11 +97,11 @@ satisfaction1 <- tibble(
) |>
mutate(
update_frequency = factor(
update_frequency,
update_frequency,
labels = c("weekly", "daily")
),
customer_type = factor(
customer_type,
customer_type,
labels = c("free", "premium")
)
)
Expand Down Expand Up @@ -134,7 +134,7 @@ In this case, there is only one such set: `customer_type`.
satisfaction_strat <- satisfaction1 |>
group_by(customer_type, update_frequency) |>
summarise(
avg_satisfaction = mean(satisfaction),
avg_satisfaction = mean(satisfaction),
.groups = "drop"
)
Expand All @@ -161,7 +161,7 @@ We can now take the overall average, giving us an effect close to 0.

```{r}
satisfaction_strat_est |>
# note: we would need to weight this if the confounder
# note: we would need to weight this if the confounder
# groups were not equally sized
summarise(estimate = mean(estimate))
```
Expand Down Expand Up @@ -192,8 +192,8 @@ dag2 <- dagify(
ggdag(dag2, use_text = FALSE) +
geom_dag_text(
aes(label = label),
nudge_y = c(-.35, -.35, .35, .35, .35),
aes(label = label),
nudge_y = c(-.35, -.35, .35, .35, .35),
color = "black"
) +
theme_dag()
Expand All @@ -216,27 +216,27 @@ satisfaction2 <- tibble(
# Weekly (0) vs Daily (1)
update_frequency = rbinom(n, 1, p_exposure),
# More likely during business hours
customer_service_prob = business_hours * 0.9 +
customer_service_prob = business_hours * 0.9 +
(1 - business_hours) * 0.2,
customer_service = rbinom(n, 1, prob = customer_service_prob),
satisfaction = 70 + 10 * customer_type +
15 * customer_service + rnorm(n),
) |>
mutate(
customer_type = factor(
customer_type,
customer_type,
labels = c("free", "premium")
),
business_hours = factor(
business_hours,
business_hours,
labels = c("no", "yes")
),
update_frequency = factor(
update_frequency,
update_frequency,
labels = c("weekly", "daily")
),
customer_service = factor(
customer_service,
customer_service,
labels = c("no", "yes")
)
)
Expand All @@ -258,7 +258,7 @@ satisfaction2_strat <- satisfaction2 |>
.groups = "drop"
)
satisfaction2_strat |>
satisfaction2_strat |>
select(avg_satisfaction, everything())
```

Expand Down Expand Up @@ -339,7 +339,7 @@ satisfaction3 <- tibble(
) |>
mutate(
update_frequency = factor(
update_frequency,
update_frequency,
labels = c("weekly", "daily")
)
)
Expand Down Expand Up @@ -409,7 +409,7 @@ It also works well for continuous confounders, as we no longer need to bin it to

```{r}
lm(
satisfaction ~ update_frequency + num_users,
satisfaction ~ update_frequency + num_users,
data = satisfaction3
) |>
tidy(conf.int = TRUE) |>
Expand Down Expand Up @@ -537,24 +537,24 @@ satisfaction_randomized <- tibble(
customer_service_prob = business_hours *
0.9 + (1 - business_hours) * 0.2,
customer_service = rbinom(n, 1, prob = customer_service_prob),
satisfaction = 70 + 10 * customer_type +
satisfaction = 70 + 10 * customer_type +
15 * customer_service + rnorm(n),
) |>
mutate(
customer_type = factor(
customer_type,
customer_type,
labels = c("free", "premium")
),
business_hours = factor(
business_hours,
business_hours,
labels = c("no", "yes")
),
update_frequency = factor(
update_frequency,
update_frequency,
labels = c("weekly", "daily")
),
customer_service = factor(
customer_service,
customer_service,
labels = c("no", "yes")
)
)
Expand All @@ -564,21 +564,21 @@ plot_estimates <- function(d) {
tidy(conf.int = TRUE) |>
mutate(term = ifelse(
term == "update_frequencydaily",
"update_frequency",
"update_frequency",
term
)) |>
filter(term == "update_frequency") |>
mutate(model = "unadjusted")
adj_model <- lm(
satisfaction ~ update_frequency + business_hours +
customer_type,
customer_type,
data = d
) |>
tidy(conf.int = TRUE) |>
mutate(term = ifelse(
term == "update_frequencydaily",
"update_frequency",
"update_frequency",
term
)) |>
filter(term == "update_frequency") |>
Expand Down Expand Up @@ -608,11 +608,14 @@ plot_estimates <- function(d) {
)
models <- bind_rows(unadj_model, adj_model, psw_model) |>
mutate(model = factor(model, levels = c(
"unadjusted",
"direct\nadjustment",
"inverse\nprobability\nweighting"
)))
mutate(model = factor(
model,
levels = c(
"unadjusted",
"direct\nadjustment",
"inverse\nprobability\nweighting"
)
))
models |>
select(model, estimate, std.error, starts_with("conf")) |>
Expand All @@ -624,7 +627,8 @@ plot_estimates <- function(d) {
conf.low = ifelse(statistic == "std.error", NA, conf.low),
conf.high = ifelse(statistic == "std.error", NA, conf.high),
statistic = case_match(
statistic, "estimate" ~ "estimate (95% CI)",
statistic,
"estimate" ~ "estimate (95% CI)",
"std.error" ~ "standard error"
)
) |>
Expand All @@ -634,7 +638,7 @@ plot_estimates <- function(d) {
aes(xmin = conf.low, xmax = conf.high),
height = 0
) +
facet_wrap(~ statistic, scales = "free_x") +
facet_wrap(~statistic, scales = "free_x") +
theme(axis.title.y = element_blank())
}
Expand Down
11 changes: 7 additions & 4 deletions chapters/13-g-comp.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -314,9 +314,10 @@ Next, we write a function which will complete step 3: from a random sample from
# contrast gives exposure (default 60) and control group (default 30) settings
# n_sample is the size of the baseline resample of .data
simulate_process <- function(
fit_obj,
contrast = c(60, 30),
n_sample = 10000) {
fit_obj,
contrast = c(60, 30),
n_sample = 10000
) {
# Draw a random sample of baseline variables
df_baseline <- fit_obj |>
pluck(".data") |>
Expand Down Expand Up @@ -377,7 +378,9 @@ compute_stats <- function(sim_obj) {
names_prefix = "x_"
) |>
summarize(
x_60, x_30, x_60 - x_30
x_60,
x_30,
x_60 - x_30
)
}
```
Expand Down
2 changes: 1 addition & 1 deletion chapters/15-missingness-and-measurement.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -779,7 +779,7 @@ calib_model <- lm(
seven_dwarves_calib <- calib_model |>
augment(
data = seven_dwarfs_train_2018 |>
mutate(wait_minutes_actual_avg = log1p(wait_minutes_actual_avg)) |>
mutate(wait_minutes_actual_avg = log1p(wait_minutes_actual_avg)) |>
drop_na()
) |>
rename(wait_minutes_actual_calib = .fitted) |>
Expand Down

0 comments on commit f781293

Please sign in to comment.