Skip to content

Commit

Permalink
feat(r): Use classed warnings to signal that a lossy conversion occur…
Browse files Browse the repository at this point in the history
…red (#298)

This lets a caller take appropriate action (or give an error, or give a
better warning) if a lossy conversion occurs.

Closes #297.

``` r
library(nanoarrow)

array <- as_nanoarrow_array(2^54, schema = na_int64())

convert_array(array, double())
#> Warning in convert_array.default(array, double()): 1 value(s) may have incurred
#> loss of precision in conversion to double()
#> [1] 1.80144e+16

withCallingHandlers(
  convert_array(array, double()),
  nanoarrow_warning_lossy_conversion = function(x) {
    warning(
      "This is a better explanation of what happened!",
      call. = conditionCall(x)
    )
    tryInvokeRestart("muffleWarning")
  }
)
#> Warning in (function (x) : This is a better explanation of what happened!
#> [1] 1.80144e+16
```

<sup>Created on 2023-09-18 with [reprex
v2.0.2](https://reprex.tidyverse.org)</sup>
  • Loading branch information
paleolimbot authored Sep 19, 2023
1 parent 804630d commit cc10a28
Show file tree
Hide file tree
Showing 9 changed files with 33 additions and 14 deletions.
9 changes: 9 additions & 0 deletions r/R/util.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,15 @@ assert_arrow_installed <- function(reason) {
}
}

warn_lossy_conversion <- function(count, msg) {
cnd <- simpleWarning(
sprintf("%d value(s) %s", count, msg),
call = sys.call(-1)
)
class(cnd) <- union("nanoarrow_warning_lossy_conversion", class(cnd))

warning(cnd)
}

# Internally we use R_PreserveObject() and R_ReleaseObject() to manage R objects
# that must be kept alive for ArrowArray buffers to stay valid. This count
Expand Down
3 changes: 1 addition & 2 deletions r/src/as_array.c
Original file line number Diff line number Diff line change
Expand Up @@ -250,8 +250,7 @@ static void as_array_dbl(SEXP x_sexp, struct ArrowArray* array, SEXP schema_xptr
}

if (n_overflow > 0) {
Rf_warning("%ld value(s) overflowed in double -> na_int32() creation",
(long)n_overflow);
warn_lossy_conversion(n_overflow, "overflowed in double -> na_int32() creation");
}

buffer->size_bytes = len * sizeof(int32_t);
Expand Down
11 changes: 11 additions & 0 deletions r/src/materialize_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@

#include "nanoarrow.h"

#include "util.h"

// Vector types that have some special casing internally to avoid unnecessary allocations
// or looping at the R level. Some of these types also need an SEXP ptype to communicate
// additional information.
Expand Down Expand Up @@ -105,4 +107,13 @@ struct RConverter {
struct RConverter** children;
};

static inline void warn_lossy_conversion(int64_t count, const char* msg) {
SEXP fun = PROTECT(Rf_install("warn_lossy_conversion"));
SEXP count_sexp = PROTECT(Rf_ScalarReal(count));
SEXP msg_sexp = PROTECT(Rf_mkString(msg));
SEXP call = PROTECT(Rf_lang3(fun, count_sexp, msg_sexp));
Rf_eval(call, nanoarrow_ns_pkg);
UNPROTECT(4);
}

#endif
8 changes: 4 additions & 4 deletions r/src/materialize_dbl.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ static inline int nanoarrow_materialize_dbl(struct RConverter* converter) {
for (R_xlen_t i = 0; i < dst->length; i++) {
double value = ArrowArrayViewGetDoubleUnsafe(src->array_view, src->offset + i);
if (value > MAX_DBL_AS_INTEGER || value < -MAX_DBL_AS_INTEGER) {
n_bad_values++;
// Content of null slot is undefined
n_bad_values += is_valid == NULL || ArrowBitGet(is_valid, raw_src_offset + i);
}

result[dst->offset + i] = value;
Expand All @@ -112,9 +113,8 @@ static inline int nanoarrow_materialize_dbl(struct RConverter* converter) {
}

if (n_bad_values > 0) {
Rf_warning(
"%ld value(s) may have incurred loss of precision in conversion to double()",
(long)n_bad_values);
warn_lossy_conversion(
n_bad_values, "may have incurred loss of precision in conversion to double()");
}

return NANOARROW_OK;
Expand Down
2 changes: 1 addition & 1 deletion r/src/materialize_int.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ static inline int nanoarrow_materialize_int(struct ArrayViewSlice* src,
}

if (n_bad_values > 0) {
Rf_warning("%ld value(s) outside integer range set to NA", (long)n_bad_values);
warn_lossy_conversion(n_bad_values, "outside integer range set to NA");
}

return NANOARROW_OK;
Expand Down
2 changes: 1 addition & 1 deletion r/src/materialize_int64.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ static inline int nanoarrow_materialize_int64(struct ArrayViewSlice* src,
}

if (n_bad_values > 0) {
Rf_warning("%ld value(s) outside integer64 range set to NA", (long)n_bad_values);
warn_lossy_conversion(n_bad_values, "outside integer64 range set to NA");
}

return NANOARROW_OK;
Expand Down
2 changes: 1 addition & 1 deletion r/src/materialize_unspecified.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ static inline int nanoarrow_materialize_unspecified(struct ArrayViewSlice* src,
}

if (n_bad_values > 0) {
Rf_warning("%ld non-null value(s) set to NA", (long)n_bad_values);
warn_lossy_conversion(n_bad_values, "that were non-null set to NA");
}
}

Expand Down
2 changes: 1 addition & 1 deletion r/tests/testthat/test-as-array.R
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ test_that("as_nanoarrow_array() works for double() -> na_int32()", {
# With overflow
expect_warning(
as_nanoarrow_array(.Machine$integer.max + as.double(1:5), schema = na_int32()),
"5 value\\(s\\) overflowed"
class = "nanoarrow_warning_lossy_conversion"
)
})

Expand Down
8 changes: 4 additions & 4 deletions r/tests/testthat/test-convert-array.R
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ test_that("convert to vector works for unspecified()", {
convert_array(array, vctrs::unspecified()),
vctrs::vec_cast(rep(NA, 10), vctrs::unspecified())
),
"1 non-null value\\(s\\) set to NA"
class = "nanoarrow_warning_lossy_conversion"
)
})

Expand Down Expand Up @@ -391,13 +391,13 @@ test_that("convert to vector warns for invalid integer()", {
array <- as_nanoarrow_array(.Machine$integer.max + 1)
expect_warning(
expect_identical(convert_array(array, integer()), NA_integer_),
"1 value\\(s\\) outside integer range set to NA"
class = "nanoarrow_warning_lossy_conversion"
)

array <- as_nanoarrow_array(c(NA, .Machine$integer.max + 1))
expect_warning(
expect_identical(convert_array(array, integer()), c(NA_integer_, NA_integer_)),
"1 value\\(s\\) outside integer range set to NA"
class = "nanoarrow_warning_lossy_conversion"
)
})

Expand Down Expand Up @@ -519,7 +519,7 @@ test_that("convert to vector warns for possibly invalid double()", {
array <- as_nanoarrow_array(2^54, schema = na_int64())
expect_warning(
convert_array(array, double()),
"1 value\\(s\\) may have incurred loss of precision in conversion to double()"
class = "nanoarrow_warning_lossy_conversion"
)
})

Expand Down

0 comments on commit cc10a28

Please sign in to comment.