run grkstyle::grk_style_dir(".")

r-causal · Dec 23, 2024 · f781293 · f781293
1 parent 9177f59
commit f781293
Show file tree

Hide file tree

Showing 6 changed files with 99 additions and 56 deletions.
diff --git a/R/ggdag-mask.R b/R/ggdag-mask.R
@@ -2,23 +2,52 @@
 # TODO: when `geom_dag_label_repel2` exists, add to namespace as 1 then delete this first bit
 # copied from source to avoid recursion issue in overriding in ggdag namsespace
 ggdag_geom_dag_label_repel <- function(
-    mapping = NULL, data = NULL, parse = FALSE, ...,
-    box.padding = grid::unit(0.35, "lines"), label.padding = grid::unit(0.25, "lines"),
-    point.padding = grid::unit(1.5, "lines"), label.r = grid::unit(0.15, "lines"),
-    label.size = 0.25, segment.color = "grey50", segment.size = 0.5, arrow = NULL,
-    force = 1, max.iter = 2000, nudge_x = 0, nudge_y = 0, na.rm = FALSE,
-    show.legend = NA, inherit.aes = TRUE) {
+  mapping = NULL,
+  data = NULL,
+  parse = FALSE,
+  ...,
+  box.padding = grid::unit(0.35, "lines"),
+  label.padding = grid::unit(0.25, "lines"),
+  point.padding = grid::unit(1.5, "lines"),
+  label.r = grid::unit(0.15, "lines"),
+  label.size = 0.25,
+  segment.color = "grey50",
+  segment.size = 0.5,
+  arrow = NULL,
+  force = 1,
+  max.iter = 2000,
+  nudge_x = 0,
+  nudge_y = 0,
+  na.rm = FALSE,
+  show.legend = NA,
+  inherit.aes = TRUE
+) {
   ggplot2::layer(
-    data = data, mapping = mapping, stat = ggdag:::StatNodesRepel,
-    geom = ggrepel::GeomLabelRepel, position = "identity",
-    show.legend = show.legend, inherit.aes = inherit.aes,
+    data = data,
+    mapping = mapping,
+    stat = ggdag:::StatNodesRepel,
+    geom = ggrepel::GeomLabelRepel,
+    position = "identity",
+    show.legend = show.legend,
+    inherit.aes = inherit.aes,
     params = list(
-      parse = parse, box.padding = box.padding,
-      label.padding = label.padding, point.padding = point.padding,
-      label.r = label.r, label.size = label.size, segment.colour = segment.color %||%
-        segment.colour, segment.size = segment.size,
-      arrow = arrow, na.rm = na.rm, force = force, max.iter = max.iter,
-      nudge_x = nudge_x, nudge_y = nudge_y, segment.alpha = 1, ...
+      parse = parse,
+      box.padding = box.padding,
+      label.padding = label.padding,
+      point.padding = point.padding,
+      label.r = label.r,
+      label.size = label.size,
+      segment.colour = segment.color %||%
+        segment.colour,
+      segment.size = segment.size,
+      arrow = arrow,
+      na.rm = na.rm,
+      force = force,
+      max.iter = max.iter,
+      nudge_x = nudge_x,
+      nudge_y = nudge_y,
+      segment.alpha = 1,
+      ...
     )
   )
 }

diff --git a/R/setup.R b/R/setup.R
@@ -61,26 +61,32 @@ est_ci <- function(.df, rsample = FALSE) {
 
 # based on https://github.com/hadley/r-pkgs/blob/main/common.R
 status <- function(type) {
-  status <- switch(type,
+  status <- switch(
+    type,
     unstarted = "is unstarted, but don't worry, it's on our roadmap",
     polishing = "has its foundations written but is still undergoing changes",
     wip = "is actively undergoing work and may be restructured or changed. It may also be incomplete",
     complete = "is mostly complete, but we might make small tweaks or copyedits",
     stop("Invalid `type`", call. = FALSE)
   )
 
-  class <- switch(type,
+  class <- switch(
+    type,
     complete = ,
     polishing = "callout-note",
     wip = "callout-warning",
     unstarted = "callout-warning"
   )
 
   knitr::asis_output(paste0(
-    "::: ", class, "\n",
+    "::: ",
+    class,
+    "\n",
     "## Work-in-progress 🚧\n",
     "You are reading the work-in-progress first edition of *Causal Inference in R*. ",
-    "This chapter ", status, ". \n",
+    "This chapter ",
+    status,
+    ". \n",
     ":::\n"
   ))
 }
diff --git a/chapters/05-not-just-a-stats-problem.qmd b/chapters/05-not-just-a-stats-problem.qmd
@@ -528,7 +528,8 @@ causal_quartet_time |>
   mutate(
     adjusted_effect =
       coef(
-        lm(outcome_followup ~ exposure_baseline + covariate_baseline,
+        lm(
+          outcome_followup ~ exposure_baseline + covariate_baseline,
           data = data
         )
       )[2]

diff --git a/chapters/06-stats-models-ci.qmd b/chapters/06-stats-models-ci.qmd
@@ -76,14 +76,14 @@ satisfaction1 <- tibble(
   # Weekly (0) vs Daily (1)
   update_frequency = rbinom(n, 1, p_exposure),
   # generate the "true" average treatment effect of 0
-  # to do this, we are going to generate the 
+  # to do this, we are going to generate the
   # potential outcomes, first if exposure = 0
   # `y0` = `satisfaction(weekly)`
   # notice `update_frequency` is not in the equation below
   # we use rnorm(n) to add the random error term that is normally
   # distributed with a mean of 0 and a standard deviation of 1
   y0 = customer_type + rnorm(n),
-  # because the true effect is 0, the potential outcome 
+  # because the true effect is 0, the potential outcome
   # if exposure = 1 is identical
   y1 = y0,
   # in practice, we will only see one of these
@@ -97,11 +97,11 @@ satisfaction1 <- tibble(
 ) |>
   mutate(
     update_frequency = factor(
-      update_frequency, 
+      update_frequency,
       labels = c("weekly", "daily")
     ),
     customer_type = factor(
-      customer_type, 
+      customer_type,
       labels = c("free", "premium")
     )
   )
@@ -134,7 +134,7 @@ In this case, there is only one such set: `customer_type`.
 satisfaction_strat <- satisfaction1 |>
   group_by(customer_type, update_frequency) |>
   summarise(
-    avg_satisfaction = mean(satisfaction), 
+    avg_satisfaction = mean(satisfaction),
     .groups = "drop"
   )
 
@@ -161,7 +161,7 @@ We can now take the overall average, giving us an effect close to 0.
 
 ```{r}
 satisfaction_strat_est |>
-  # note: we would need to weight this if the confounder 
+  # note: we would need to weight this if the confounder
   # groups were not equally sized
   summarise(estimate = mean(estimate))
 ```
@@ -192,8 +192,8 @@ dag2 <- dagify(
 
 ggdag(dag2, use_text = FALSE) +
   geom_dag_text(
-    aes(label = label), 
-    nudge_y = c(-.35, -.35, .35, .35, .35), 
+    aes(label = label),
+    nudge_y = c(-.35, -.35, .35, .35, .35),
     color = "black"
   ) +
   theme_dag()
@@ -216,27 +216,27 @@ satisfaction2 <- tibble(
   # Weekly (0) vs Daily (1)
   update_frequency = rbinom(n, 1, p_exposure),
   # More likely during business hours
-  customer_service_prob = business_hours * 0.9 + 
+  customer_service_prob = business_hours * 0.9 +
     (1 - business_hours) * 0.2,
   customer_service = rbinom(n, 1, prob = customer_service_prob),
   satisfaction = 70 + 10 * customer_type +
     15 * customer_service + rnorm(n),
 ) |>
   mutate(
     customer_type = factor(
-      customer_type, 
+      customer_type,
       labels = c("free", "premium")
     ),
     business_hours = factor(
-      business_hours, 
+      business_hours,
       labels = c("no", "yes")
     ),
     update_frequency = factor(
-      update_frequency, 
+      update_frequency,
       labels = c("weekly", "daily")
     ),
     customer_service = factor(
-      customer_service, 
+      customer_service,
       labels = c("no", "yes")
     )
   )
@@ -258,7 +258,7 @@ satisfaction2_strat <- satisfaction2 |>
     .groups = "drop"
   )
 
-satisfaction2_strat |> 
+satisfaction2_strat |>
   select(avg_satisfaction, everything())
 ```
 
@@ -339,7 +339,7 @@ satisfaction3 <- tibble(
 ) |>
   mutate(
     update_frequency = factor(
-      update_frequency, 
+      update_frequency,
       labels = c("weekly", "daily")
     )
   )
@@ -409,7 +409,7 @@ It also works well for continuous confounders, as we no longer need to bin it to
 
 ```{r}
 lm(
-  satisfaction ~ update_frequency + num_users, 
+  satisfaction ~ update_frequency + num_users,
   data = satisfaction3
 ) |>
   tidy(conf.int = TRUE) |>
@@ -537,24 +537,24 @@ satisfaction_randomized <- tibble(
   customer_service_prob = business_hours *
     0.9 + (1 - business_hours) * 0.2,
   customer_service = rbinom(n, 1, prob = customer_service_prob),
-  satisfaction = 70 + 10 * customer_type + 
+  satisfaction = 70 + 10 * customer_type +
     15 * customer_service + rnorm(n),
 ) |>
   mutate(
     customer_type = factor(
-      customer_type, 
+      customer_type,
       labels = c("free", "premium")
     ),
     business_hours = factor(
-      business_hours, 
+      business_hours,
       labels = c("no", "yes")
     ),
     update_frequency = factor(
-      update_frequency, 
+      update_frequency,
       labels = c("weekly", "daily")
     ),
     customer_service = factor(
-      customer_service, 
+      customer_service,
       labels = c("no", "yes")
     )
   )
@@ -564,21 +564,21 @@ plot_estimates <- function(d) {
     tidy(conf.int = TRUE) |>
     mutate(term = ifelse(
       term == "update_frequencydaily",
-      "update_frequency", 
+      "update_frequency",
       term
     )) |>
     filter(term == "update_frequency") |>
     mutate(model = "unadjusted")
 
   adj_model <- lm(
     satisfaction ~ update_frequency + business_hours +
-      customer_type, 
+      customer_type,
     data = d
   ) |>
     tidy(conf.int = TRUE) |>
     mutate(term = ifelse(
       term == "update_frequencydaily",
-      "update_frequency", 
+      "update_frequency",
       term
     )) |>
     filter(term == "update_frequency") |>
@@ -608,11 +608,14 @@ plot_estimates <- function(d) {
   )
 
   models <- bind_rows(unadj_model, adj_model, psw_model) |>
-    mutate(model = factor(model, levels = c(
-      "unadjusted",
-      "direct\nadjustment",
-      "inverse\nprobability\nweighting"
-    )))
+    mutate(model = factor(
+      model,
+      levels = c(
+        "unadjusted",
+        "direct\nadjustment",
+        "inverse\nprobability\nweighting"
+      )
+    ))
 
   models |>
     select(model, estimate, std.error, starts_with("conf")) |>
@@ -624,7 +627,8 @@ plot_estimates <- function(d) {
       conf.low = ifelse(statistic == "std.error", NA, conf.low),
       conf.high = ifelse(statistic == "std.error", NA, conf.high),
       statistic = case_match(
-        statistic, "estimate" ~ "estimate (95% CI)",
+        statistic,
+        "estimate" ~ "estimate (95% CI)",
         "std.error" ~ "standard error"
       )
     ) |>
@@ -634,7 +638,7 @@ plot_estimates <- function(d) {
       aes(xmin = conf.low, xmax = conf.high),
       height = 0
     ) +
-    facet_wrap(~ statistic, scales = "free_x") +
+    facet_wrap(~statistic, scales = "free_x") +
     theme(axis.title.y = element_blank())
 }
 

diff --git a/chapters/13-g-comp.qmd b/chapters/13-g-comp.qmd
@@ -314,9 +314,10 @@ Next, we write a function which will complete step 3: from a random sample from
 # contrast gives exposure (default 60) and control group (default 30) settings
 # n_sample is the size of the baseline resample of .data
 simulate_process <- function(
-    fit_obj,
-    contrast = c(60, 30),
-    n_sample = 10000) {
+  fit_obj,
+  contrast = c(60, 30),
+  n_sample = 10000
+) {
   # Draw a random sample of baseline variables
   df_baseline <- fit_obj |>
     pluck(".data") |>
@@ -377,7 +378,9 @@ compute_stats <- function(sim_obj) {
       names_prefix = "x_"
     ) |>
     summarize(
-      x_60, x_30, x_60 - x_30
+      x_60,
+      x_30,
+      x_60 - x_30
     )
 }
 ```

diff --git a/chapters/15-missingness-and-measurement.qmd b/chapters/15-missingness-and-measurement.qmd
@@ -779,7 +779,7 @@ calib_model <- lm(
 seven_dwarves_calib <- calib_model |>
   augment(
     data = seven_dwarfs_train_2018 |>
-      mutate(wait_minutes_actual_avg = log1p(wait_minutes_actual_avg)) |> 
+      mutate(wait_minutes_actual_avg = log1p(wait_minutes_actual_avg)) |>
       drop_na()
   ) |>
   rename(wait_minutes_actual_calib = .fitted) |>