diff --git a/r/R/util.R b/r/R/util.R index 260f34a7c..15f029210 100644 --- a/r/R/util.R +++ b/r/R/util.R @@ -38,6 +38,15 @@ assert_arrow_installed <- function(reason) { } } +warn_lossy_conversion <- function(count, msg) { + cnd <- simpleWarning( + sprintf("%d value(s) %s", count, msg), + call = sys.call(-1) + ) + class(cnd) <- union("nanoarrow_warning_lossy_conversion", class(cnd)) + + warning(cnd) +} # Internally we use R_PreserveObject() and R_ReleaseObject() to manage R objects # that must be kept alive for ArrowArray buffers to stay valid. This count diff --git a/r/src/as_array.c b/r/src/as_array.c index 5098e2332..afba1f288 100644 --- a/r/src/as_array.c +++ b/r/src/as_array.c @@ -250,8 +250,7 @@ static void as_array_dbl(SEXP x_sexp, struct ArrowArray* array, SEXP schema_xptr } if (n_overflow > 0) { - Rf_warning("%ld value(s) overflowed in double -> na_int32() creation", - (long)n_overflow); + warn_lossy_conversion(n_overflow, "overflowed in double -> na_int32() creation"); } buffer->size_bytes = len * sizeof(int32_t); diff --git a/r/src/materialize_common.h b/r/src/materialize_common.h index 1be73a907..4f5c52f88 100644 --- a/r/src/materialize_common.h +++ b/r/src/materialize_common.h @@ -23,6 +23,8 @@ #include "nanoarrow.h" +#include "util.h" + // Vector types that have some special casing internally to avoid unnecessary allocations // or looping at the R level. Some of these types also need an SEXP ptype to communicate // additional information. @@ -105,4 +107,13 @@ struct RConverter { struct RConverter** children; }; +static inline void warn_lossy_conversion(int64_t count, const char* msg) { + SEXP fun = PROTECT(Rf_install("warn_lossy_conversion")); + SEXP count_sexp = PROTECT(Rf_ScalarReal(count)); + SEXP msg_sexp = PROTECT(Rf_mkString(msg)); + SEXP call = PROTECT(Rf_lang3(fun, count_sexp, msg_sexp)); + Rf_eval(call, nanoarrow_ns_pkg); + UNPROTECT(4); +} + #endif diff --git a/r/src/materialize_dbl.h b/r/src/materialize_dbl.h index 9269d88b4..a69b4eb70 100644 --- a/r/src/materialize_dbl.h +++ b/r/src/materialize_dbl.h @@ -91,7 +91,8 @@ static inline int nanoarrow_materialize_dbl(struct RConverter* converter) { for (R_xlen_t i = 0; i < dst->length; i++) { double value = ArrowArrayViewGetDoubleUnsafe(src->array_view, src->offset + i); if (value > MAX_DBL_AS_INTEGER || value < -MAX_DBL_AS_INTEGER) { - n_bad_values++; + // Content of null slot is undefined + n_bad_values += is_valid == NULL || ArrowBitGet(is_valid, raw_src_offset + i); } result[dst->offset + i] = value; @@ -112,9 +113,8 @@ static inline int nanoarrow_materialize_dbl(struct RConverter* converter) { } if (n_bad_values > 0) { - Rf_warning( - "%ld value(s) may have incurred loss of precision in conversion to double()", - (long)n_bad_values); + warn_lossy_conversion( + n_bad_values, "may have incurred loss of precision in conversion to double()"); } return NANOARROW_OK; diff --git a/r/src/materialize_int.h b/r/src/materialize_int.h index 60ead5a4d..57dff58f6 100644 --- a/r/src/materialize_int.h +++ b/r/src/materialize_int.h @@ -131,7 +131,7 @@ static inline int nanoarrow_materialize_int(struct ArrayViewSlice* src, } if (n_bad_values > 0) { - Rf_warning("%ld value(s) outside integer range set to NA", (long)n_bad_values); + warn_lossy_conversion(n_bad_values, "outside integer range set to NA"); } return NANOARROW_OK; diff --git a/r/src/materialize_int64.h b/r/src/materialize_int64.h index f63617b5b..ad83671e5 100644 --- a/r/src/materialize_int64.h +++ b/r/src/materialize_int64.h @@ -120,7 +120,7 @@ static inline int nanoarrow_materialize_int64(struct ArrayViewSlice* src, } if (n_bad_values > 0) { - Rf_warning("%ld value(s) outside integer64 range set to NA", (long)n_bad_values); + warn_lossy_conversion(n_bad_values, "outside integer64 range set to NA"); } return NANOARROW_OK; diff --git a/r/src/materialize_unspecified.h b/r/src/materialize_unspecified.h index 72bcc2379..40e622ccb 100644 --- a/r/src/materialize_unspecified.h +++ b/r/src/materialize_unspecified.h @@ -52,7 +52,7 @@ static inline int nanoarrow_materialize_unspecified(struct ArrayViewSlice* src, } if (n_bad_values > 0) { - Rf_warning("%ld non-null value(s) set to NA", (long)n_bad_values); + warn_lossy_conversion(n_bad_values, "that were non-null set to NA"); } } diff --git a/r/tests/testthat/test-as-array.R b/r/tests/testthat/test-as-array.R index e30c73f58..52b144c03 100644 --- a/r/tests/testthat/test-as-array.R +++ b/r/tests/testthat/test-as-array.R @@ -172,7 +172,7 @@ test_that("as_nanoarrow_array() works for double() -> na_int32()", { # With overflow expect_warning( as_nanoarrow_array(.Machine$integer.max + as.double(1:5), schema = na_int32()), - "5 value\\(s\\) overflowed" + class = "nanoarrow_warning_lossy_conversion" ) }) diff --git a/r/tests/testthat/test-convert-array.R b/r/tests/testthat/test-convert-array.R index e2c39941e..49265a8e6 100644 --- a/r/tests/testthat/test-convert-array.R +++ b/r/tests/testthat/test-convert-array.R @@ -208,7 +208,7 @@ test_that("convert to vector works for unspecified()", { convert_array(array, vctrs::unspecified()), vctrs::vec_cast(rep(NA, 10), vctrs::unspecified()) ), - "1 non-null value\\(s\\) set to NA" + class = "nanoarrow_warning_lossy_conversion" ) }) @@ -391,13 +391,13 @@ test_that("convert to vector warns for invalid integer()", { array <- as_nanoarrow_array(.Machine$integer.max + 1) expect_warning( expect_identical(convert_array(array, integer()), NA_integer_), - "1 value\\(s\\) outside integer range set to NA" + class = "nanoarrow_warning_lossy_conversion" ) array <- as_nanoarrow_array(c(NA, .Machine$integer.max + 1)) expect_warning( expect_identical(convert_array(array, integer()), c(NA_integer_, NA_integer_)), - "1 value\\(s\\) outside integer range set to NA" + class = "nanoarrow_warning_lossy_conversion" ) }) @@ -519,7 +519,7 @@ test_that("convert to vector warns for possibly invalid double()", { array <- as_nanoarrow_array(2^54, schema = na_int64()) expect_warning( convert_array(array, double()), - "1 value\\(s\\) may have incurred loss of precision in conversion to double()" + class = "nanoarrow_warning_lossy_conversion" ) })