feat: Harden telemetry code against invalid arguments (#321)

* feat: Harden telemetry code against invalid arguments * meta_call() * Explicitly report failure if matrix job fails * More careful revision parsing
tidyverse · Nov 5, 2024 · 2bfa487 · 2bfa487
1 parent d44c268
commit 2bfa487
Show file tree

Hide file tree

Showing 22 changed files with 147 additions and 28 deletions.
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -66,10 +66,18 @@ jobs:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: |
           # Check status of this workflow
+          set -x
           state="pending"
-          sha=${{ inputs.ref }}
+          sha=""
           if [ -z "${sha}" ]; then
-            sha=${{ github.head_ref }}
+            if git rev_parse ${{ inputs.ref }}; then
+              sha=${{ inputs.ref }}
+            fi
+          fi
+          if [ -z "${sha}" ]; then
+            if git rev_parse ${{ github.head_ref }}; then
+              sha=${{ github.head_ref }}
+            fi
           fi
           if [ -z "${sha}" ]; then
             sha=${{ github.sha }}
@@ -171,6 +179,7 @@ jobs:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: |
           # Check status of this workflow
+          set -x
           if [ "${{ job.status }}" == "success" ]; then
             state="success"
           else
@@ -179,15 +188,19 @@ jobs:
 
           sha=${{ steps.commit.outputs.sha }}
           if [ -z "${sha}" ]; then
-            sha=${{ inputs.ref }}
+            if git rev_parse ${{ inputs.ref }}; then
+              sha=${{ inputs.ref }}
+            fi
           fi
           if [ -z "${sha}" ]; then
-            sha=${{ github.head_ref }}
+            if git rev_parse ${{ github.head_ref }}; then
+              sha=${{ github.head_ref }}
+            fi
           fi
           if [ -z "${sha}" ]; then
             sha=${{ github.sha }}
           fi
-            sha=$(git rev-parse ${sha})
+          sha=$(git rev-parse ${sha})
 
           html_url=$(gh api \
             -H "Accept: application/vnd.github+json" \
@@ -267,6 +280,51 @@ jobs:
         with:
           results: ${{ runner.os }}-r${{ matrix.r }}
 
+      - name: Update status for rcc
+        # FIXME: Wrap into action
+        if: always()
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          # Check status of this workflow
+          set -x
+          if [ "${{ job.status }}" == "success" ]; then
+            state="success"
+          else
+            state="failure"
+          fi
+
+          sha=${{ steps.commit.outputs.sha }}
+          if [ -z "${sha}" ]; then
+            if git rev_parse ${{ inputs.ref }}; then
+              sha=${{ inputs.ref }}
+            fi
+          fi
+          if [ -z "${sha}" ]; then
+            if git rev_parse ${{ github.head_ref }}; then
+              sha=${{ github.head_ref }}
+            fi
+          fi
+          if [ -z "${sha}" ]; then
+            sha=${{ github.sha }}
+          fi
+          sha=$(git rev-parse ${sha})
+
+          html_url=$(gh api \
+            -H "Accept: application/vnd.github+json" \
+            -H "X-GitHub-Api-Version: 2022-11-28" \
+            repos/${{ github.repository }}/actions/runs/${{ github.run_id }} | jq -r .html_url)
+
+          description="${{ github.workflow }} / ${{ github.job }}"
+
+          gh api \
+            --method POST \
+            -H "Accept: application/vnd.github+json" \
+            -H "X-GitHub-Api-Version: 2022-11-28" \
+            repos/${{ github.repository }}/statuses/${sha} \
+            -f "state=${state}" -f "target_url=${html_url}" -f "description=${description}" -f "context=rcc"
+        shell: bash
+
   rcc-suggests:
     needs:
       - rcc-smoke
@@ -340,3 +398,48 @@ jobs:
       - uses: ./.github/workflows/check
         with:
           results: ${{ matrix.package }}
+
+      - name: Update status for rcc
+        # FIXME: Wrap into action
+        if: always()
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          # Check status of this workflow
+          set -x
+          if [ "${{ job.status }}" == "success" ]; then
+            state="success"
+          else
+            state="failure"
+          fi
+
+          sha=${{ steps.commit.outputs.sha }}
+          if [ -z "${sha}" ]; then
+            if git rev_parse ${{ inputs.ref }}; then
+              sha=${{ inputs.ref }}
+            fi
+          fi
+          if [ -z "${sha}" ]; then
+            if git rev_parse ${{ github.head_ref }}; then
+              sha=${{ github.head_ref }}
+            fi
+          fi
+          if [ -z "${sha}" ]; then
+            sha=${{ github.sha }}
+          fi
+          sha=$(git rev-parse ${sha})
+
+          html_url=$(gh api \
+            -H "Accept: application/vnd.github+json" \
+            -H "X-GitHub-Api-Version: 2022-11-28" \
+            repos/${{ github.repository }}/actions/runs/${{ github.run_id }} | jq -r .html_url)
+
+          description="${{ github.workflow }} / ${{ github.job }}"
+
+          gh api \
+            --method POST \
+            -H "Accept: application/vnd.github+json" \
+            -H "X-GitHub-Api-Version: 2022-11-28" \
+            repos/${{ github.repository }}/statuses/${sha} \
+            -f "state=${state}" -f "target_url=${html_url}" -f "description=${description}" -f "context=rcc"
+        shell: bash
diff --git a/R/aaa-meta.R b/R/aaa-meta.R
@@ -1,6 +1,5 @@
 # Overwritten in meta.R
-meta_call_start <- function(...) {}
-meta_call_end <- function(...) {}
+meta_call <- function(...) {}
 meta_ext_register <- function(...) {}
 meta_rel_register <- function(...) {}
 meta_rel_register_df <- function(...) {}

diff --git a/R/anti_join.R b/R/anti_join.R
@@ -9,7 +9,7 @@ anti_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, ..., na_matches
   na_matches <- check_na_matches(na_matches, error_call = error_call)
 
   # Our implementation
-  rel_try(list(name = "anti_join", x = x, y = y, args = list(by = if (!is.null(by) && !is_cross_by(by)) as_join_by(by), copy = copy, na_matches = na_matches)),
+  rel_try(list(name = "anti_join", x = x, y = y, args = try_list(by = if (!is.null(by) && !is_cross_by(by)) as_join_by(by), copy = copy, na_matches = na_matches)),
     "No restrictions" = FALSE,
     {
       out <- rel_join_impl(x, y, by, "anti", na_matches, error_call = error_call)

diff --git a/R/arrange.R b/R/arrange.R
@@ -6,7 +6,7 @@ arrange.duckplyr_df <- function(.data, ..., .by_group = FALSE, .locale = NULL) {
   dots <- enquos(...)
   dots <- unname(dots)
 
-  rel_try(list(name = "arrange", x = .data, args = list(dots = dots, .by_group = .by_group)),
+  rel_try(list(name = "arrange", x = .data, args = try_list(dots = dots, .by_group = .by_group)),
     ".by_group = TRUE not supported" = !identical(.by_group, FALSE),
     ".locale argument not supported" = !is.null(.locale),
     "dplyr.legacy_locale not supported" = isTRUE(getOption("dplyr.legacy_locale")),

diff --git a/R/count.R b/R/count.R
@@ -17,7 +17,7 @@ count.duckplyr_df <- function(x, ..., wt = NULL, sort = FALSE, name = NULL, .dro
   }
 
   # Passing `name` reliably is surprisingly complicated.
-  rel_try(list(name = "count", x = x, args = list(dots = enquos(...), wt = enquo(wt), sort = sort, .drop = .drop)),
+  rel_try(list(name = "count", x = x, args = try_list(dots = enquos(...), wt = enquo(wt), sort = sort, .drop = .drop)),
     "count() needs all(is_name)" = !all(is_name),
     "count() only implemented for .drop = TRUE" = !.drop,
     "count() only implemented for sort = FALSE" = sort,

diff --git a/R/distinct.R b/R/distinct.R
@@ -6,7 +6,7 @@ distinct.duckplyr_df <- function(.data, ..., .keep_all = FALSE) {
   dots <- enquos(..., .named = TRUE)
 
   # Our implementation
-  rel_try(list(name = "distinct", x = .data, args = list(dots = dots, .keep_all = .keep_all)),
+  rel_try(list(name = "distinct", x = .data, args = try_list(dots = dots, .keep_all = .keep_all)),
     "Implemented for all cases?" = FALSE,
     {
       # FIXME: avoid column duplication in a cleaner way

diff --git a/R/filter.R b/R/filter.R
@@ -8,7 +8,7 @@ filter.duckplyr_df <- function(.data, ..., .by = NULL, .preserve = FALSE) {
 
   by <- enquo(.by)
 
-  rel_try(list(name = "filter", x = .data, args = list(dots = dots, by = by, preserve = .preserve)),
+  rel_try(list(name = "filter", x = .data, args = try_list(dots = dots, by = by, preserve = .preserve)),
     "Can't use relational with zero-column result set." = (length(.data) == 0),
     "Can't use relational without filter conditions." = (length(dots) == 0),
     "Can't use relational with grouped operation." = (!quo_is_null(by)), # (length(by$names) > 0),

diff --git a/R/full_join.R b/R/full_join.R
@@ -6,7 +6,7 @@ full_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x"
   y <- auto_copy(x, y, copy = copy)
 
   # Our implementation
-  rel_try(list(name = "full_join", x = x, y = y, args = list(by = if (!is.null(by) && !is_cross_by(by)) as_join_by(by), copy = copy, keep = keep, na_matches = na_matches, multiple = multiple, relationship = relationship)),
+  rel_try(list(name = "full_join", x = x, y = y, args = try_list(by = if (!is.null(by) && !is_cross_by(by)) as_join_by(by), copy = copy, keep = keep, na_matches = na_matches, multiple = multiple, relationship = relationship)),
     "No implicit cross joins for full_join()" = is_cross_by(by),
     "`multiple` not supported" = !identical(multiple, "all"),
     {

diff --git a/R/head.R b/R/head.R
@@ -2,7 +2,7 @@
 head.duckplyr_df <- function(x, n = 6L, ...) {
   stopifnot(is_integerish(n))
 
-  rel_try(list(name = "head", x = x, args = list(n = n)),
+  rel_try(list(name = "head", x = x, args = try_list(n = n)),
     "Can't process negative n" = (n < 0),
     {
       rel <- duckdb_rel_from_df(x)

diff --git a/R/inner_join.R b/R/inner_join.R
@@ -6,7 +6,7 @@ inner_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x
   y <- auto_copy(x, y, copy = copy)
 
   # Our implementation
-  rel_try(list(name = "inner_join", x = x, y = y, args = list(by = if (!is.null(by) && !is_cross_by(by)) as_join_by(by), copy = copy, keep = keep, na_matches = na_matches, multiple = multiple, unmatched = unmatched, relationship = relationship)),
+  rel_try(list(name = "inner_join", x = x, y = y, args = try_list(by = if (!is.null(by) && !is_cross_by(by)) as_join_by(by), copy = copy, keep = keep, na_matches = na_matches, multiple = multiple, unmatched = unmatched, relationship = relationship)),
     "No implicit cross joins for inner_join()" = is_cross_by(by),
     "`multiple` not supported" = !identical(multiple, "all"),
     "`unmatched` not supported" = !identical(unmatched, "drop"),

diff --git a/R/left_join.R b/R/left_join.R
@@ -6,7 +6,7 @@ left_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x"
   y <- auto_copy(x, y, copy = copy)
 
   # Our implementation
-  rel_try(list(name = "left_join", x = x, y = y, args = list(by = if (!is.null(by) && !is_cross_by(by)) as_join_by(by), copy = copy, keep = keep, na_matches = na_matches, multiple = multiple, unmatched = unmatched, relationship = relationship)),
+  rel_try(list(name = "left_join", x = x, y = y, args = try_list(by = if (!is.null(by) && !is_cross_by(by)) as_join_by(by), copy = copy, keep = keep, na_matches = na_matches, multiple = multiple, unmatched = unmatched, relationship = relationship)),
     "No implicit cross joins for left_join()" = is_cross_by(by),
     "`multiple` not supported" = !identical(multiple, "all"),
     "`unmatched` not supported" = !identical(unmatched, "drop"),

diff --git a/R/meta.R b/R/meta.R
@@ -6,6 +6,11 @@ macro_cache <- collections::dict()
 df_cache <- collections::dict()
 rel_cache <- collections::dict()
 
+meta_call <- function(name) {
+  meta_call_start(name)
+  withr::defer_parent(meta_call_end())
+}
+
 meta_call_start <- function(name) {
   call_stack$push(name)
 }

diff --git a/R/mutate.R b/R/mutate.R
@@ -7,7 +7,7 @@ mutate.duckplyr_df <- function(.data, ..., .by = NULL, .keep = c("all", "used",
   by_names <- eval_select_by(by_arg, .data)
 
   # Our implementation
-  rel_try(list(name = "mutate", x = .data, args = list(dots = enquos(...), .by = by_arg, .keep = .keep)),
+  rel_try(list(name = "mutate", x = .data, args = try_list(dots = enquos(...), .by = by_arg, .keep = .keep)),
     "Implemented for all cases?" = FALSE,
     {
       rel <- duckdb_rel_from_df(.data)

diff --git a/R/relational.R b/R/relational.R
@@ -1,10 +1,7 @@
 rel_try <- function(call, rel, ...) {
   call_name <- as.character(sys.call(-1)[[1]])
 
-  if (!is.null(call$name)) {
-    meta_call_start(call$name)
-    withr::defer(meta_call_end())
-  }
+  meta_call(call_name)
 
   # Avoid error when called via dplyr:::filter.data.frame() (in yamlet)
   if (length(call_name) == 1 && !(call_name %in% stats$calls)) {

diff --git a/R/relocate.R b/R/relocate.R
@@ -13,7 +13,7 @@ relocate.duckplyr_df <- function(.data, ..., .before = NULL, .after = NULL) {
   exprs <- exprs_from_loc(.data, loc)
 
   # Ensure `relocate()` appears in call stack
-  rel_try(list(name = "relocate", x = .data, args = list(dots = enquos(...), .before = enquo(.before), .after = enquo(.after))),
+  rel_try(list(name = "relocate", x = .data, args = try_list(dots = enquos(...), .before = enquo(.before), .after = enquo(.after))),
     "Can't use relational with zero-column result set." = (length(exprs) == 0),
     {
       rel <- duckdb_rel_from_df(.data)

diff --git a/R/rename.R b/R/rename.R
@@ -11,7 +11,7 @@ rename.duckplyr_df <- function(.data, ...) {
 
   exprs <- exprs_from_loc(.data, proj)
 
-  rel_try(list(name = "rename", x = .data, args = list(dots = enquos(...))),
+  rel_try(list(name = "rename", x = .data, args = try_list(dots = enquos(...))),
     "Can't use relational with zero-column result set." = (length(exprs) == 0),
     {
       rel <- duckdb_rel_from_df(.data)

diff --git a/R/right_join.R b/R/right_join.R
@@ -6,7 +6,7 @@ right_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x
   y <- auto_copy(x, y, copy = copy)
 
   # Our implementation
-  rel_try(list(name = "right_join", x = x, y = y, args = list(by = if (!is.null(by) && !is_cross_by(by)) as_join_by(by), copy = copy, keep = keep, na_matches = na_matches, multiple = multiple, unmatched = unmatched, relationship = relationship)),
+  rel_try(list(name = "right_join", x = x, y = y, args = try_list(by = if (!is.null(by) && !is_cross_by(by)) as_join_by(by), copy = copy, keep = keep, na_matches = na_matches, multiple = multiple, unmatched = unmatched, relationship = relationship)),
     "No implicit cross joins for right_join()" = is_cross_by(by),
     "`multiple` not supported" = !identical(multiple, "all"),
     "`unmatched` not supported" = !identical(unmatched, "drop"),

diff --git a/R/select.R b/R/select.R
@@ -13,7 +13,7 @@ select.duckplyr_df <- function(.data, ...) {
 
   exprs <- exprs_from_loc(.data, loc)
 
-  rel_try(list(name = "select", x = .data, args = list(dots = enquos(...))),
+  rel_try(list(name = "select", x = .data, args = try_list(dots = enquos(...))),
     "Can't use relational with zero-column result set." = (length(exprs) == 0),
     {
       rel <- duckdb_rel_from_df(.data)

diff --git a/R/semi_join.R b/R/semi_join.R
@@ -9,7 +9,7 @@ semi_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, ..., na_matches
   na_matches <- check_na_matches(na_matches, error_call = error_call)
 
   # Our implementation
-  rel_try(list(name = "semi_join", x = x, y = y, args = list(by = if (!is.null(by) && !is_cross_by(by)) as_join_by(by), copy = copy, na_matches = na_matches)),
+  rel_try(list(name = "semi_join", x = x, y = y, args = try_list(by = if (!is.null(by) && !is_cross_by(by)) as_join_by(by), copy = copy, na_matches = na_matches)),
     "No restrictions" = FALSE,
     {
       out <- rel_join_impl(x, y, by, "semi", na_matches, error_call = error_call)

diff --git a/R/summarise.R b/R/summarise.R
@@ -7,7 +7,7 @@ summarise.duckplyr_df <- function(.data, ..., .by = NULL, .groups = NULL) {
 
   by <- eval_select_by(enquo(.by), .data)
 
-  rel_try(list(name = "summarise", x = .data, args = list(dots = enquos(...), by = syms(by), .groups = .groups)),
+  rel_try(list(name = "summarise", x = .data, args = try_list(dots = enquos(...), by = syms(by), .groups = .groups)),
     'summarize(.groups = "rowwise") not supported' = identical(.groups, "rowwise"),
     {
       rel <- duckdb_rel_from_df(.data)

diff --git a/R/telemetry.R b/R/telemetry.R
@@ -1,5 +1,20 @@
 telemetry <- new_environment()
 
+try_list <- function(...) {
+  out <- vector("list", length = ...length())
+  for (i in seq_len(...length())) {
+    # Single-bracket assignment for the handling of NULLs
+    out[i] <- list(tryCatch(
+      ...elt(i),
+      error = function(e) {
+        paste0("<Error: ", conditionMessage(e), ">")
+      }
+    ))
+  }
+  names(out) <- ...names()
+  out
+}
+
 tel_fallback_logging <- function() {
   val <- Sys.getenv("DUCKPLYR_FALLBACK_COLLECT")
   if (val == "") {

diff --git a/R/transmute.R b/R/transmute.R
@@ -7,7 +7,7 @@ transmute.duckplyr_df <- function(.data, ...) {
   dots <- dplyr_quosures(!!!dots)
   dots <- fix_auto_name(dots)
 
-  rel_try(list(name = "transmute", x = .data, args = list(dots = enquos(...))),
+  rel_try(list(name = "transmute", x = .data, args = try_list(dots = enquos(...))),
     "Can't use relational with zero-column result set." = (length(dots) == 0),
     {
       exprs <- rel_translate_dots(dots, .data)