From 49e48161226a4019aab57691740ad8efa1c3bbf2 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 21 Sep 2023 23:06:48 +0200 Subject: [PATCH] fix: Return `EOVERFLOW` when appending to a string or binary type would exeed 2 GB (#302) This aligns nanoarrow with the behaviour in the ADBC postgres driver. It is useful to differentiate this case because frequently a caller will want to finish the current array and start a new one with the new value. --- src/nanoarrow/array_inline.h | 6 +++--- src/nanoarrow/array_test.cc | 30 ++++++++++++++++++++++++++++++ src/nanoarrow/nanoarrow.h | 18 +++++++++++------- 3 files changed, 44 insertions(+), 10 deletions(-) diff --git a/src/nanoarrow/array_inline.h b/src/nanoarrow/array_inline.h index 208462cc1..96fdf573b 100644 --- a/src/nanoarrow/array_inline.h +++ b/src/nanoarrow/array_inline.h @@ -469,8 +469,8 @@ static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array, case NANOARROW_TYPE_STRING: case NANOARROW_TYPE_BINARY: offset = ((int32_t*)offset_buffer->data)[array->length]; - if ((offset + value.size_bytes) > INT32_MAX) { - return EINVAL; + if ((((int64_t)offset) + value.size_bytes) > INT32_MAX) { + return EOVERFLOW; } offset += (int32_t)value.size_bytes; @@ -618,7 +618,7 @@ static inline ArrowErrorCode ArrowArrayFinishElement(struct ArrowArray* array) { case NANOARROW_TYPE_MAP: child_length = array->children[0]->length; if (child_length > INT32_MAX) { - return EINVAL; + return EOVERFLOW; } NANOARROW_RETURN_NOT_OK( ArrowBufferAppendInt32(ArrowArrayBuffer(array, 1), (int32_t)child_length)); diff --git a/src/nanoarrow/array_test.cc b/src/nanoarrow/array_test.cc index ac99ecf42..eb3424355 100644 --- a/src/nanoarrow/array_test.cc +++ b/src/nanoarrow/array_test.cc @@ -878,6 +878,19 @@ TEST(ArrayTest, ArrayTestAppendToFixedSizeBinaryArray) { EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(expected_array.ValueUnsafe())); } +TEST(ArrayTest, ArrayTestAppendToBinaryArrayErrors) { + struct ArrowArray array; + + ASSERT_EQ(ArrowArrayInitFromType(&array, NANOARROW_TYPE_BINARY), NANOARROW_OK); + EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK); + struct ArrowBufferView item; + item.data.as_char = ""; + item.size_bytes = static_cast(INT_MAX) + 1; + EXPECT_EQ(ArrowArrayAppendBytes(&array, item), EOVERFLOW); + + array.release(&array); +} + TEST(ArrayTest, ArrayTestAppendToIntervalArrayYearMonth) { struct ArrowArray array; @@ -1317,6 +1330,23 @@ TEST(ArrayTest, ArrayTestAppendToFixedSizeListArray) { EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(expected_array.ValueUnsafe())); } +TEST(ArrayTest, ArrayTestAppendToListArrayErrors) { + struct ArrowArray array; + struct ArrowSchema schema; + struct ArrowError error; + + ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_LIST), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaSetType(schema.children[0], NANOARROW_TYPE_INT64), NANOARROW_OK); + ASSERT_EQ(ArrowArrayInitFromSchema(&array, &schema, nullptr), NANOARROW_OK); + ASSERT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK); + + array.children[0]->length = static_cast(INT32_MAX) + 1; + EXPECT_EQ(ArrowArrayFinishElement(&array), EOVERFLOW); + + array.release(&array); + schema.release(&schema); +} + TEST(ArrayTest, ArrayTestAppendToStructArray) { struct ArrowArray array; struct ArrowSchema schema; diff --git a/src/nanoarrow/nanoarrow.h b/src/nanoarrow/nanoarrow.h index 6f4d9052a..11bf6bb49 100644 --- a/src/nanoarrow/nanoarrow.h +++ b/src/nanoarrow/nanoarrow.h @@ -863,18 +863,21 @@ static inline ArrowErrorCode ArrowArrayAppendDouble(struct ArrowArray* array, /// \brief Append a string of bytes to an array /// /// Returns NANOARROW_OK if value can be exactly represented by -/// the underlying storage type or EINVAL otherwise (e.g., -/// the underlying array is not a binary, string, large binary, large string, -/// or fixed-size binary array, or value is the wrong size for a fixed-size -/// binary array). +/// the underlying storage type, EOVERFLOW if appending value would overflow +/// the offset type (e.g., if the data buffer would be larger than 2 GB for a +/// non-large string type), or EINVAL otherwise (e.g., the underlying array is not a +/// binary, string, large binary, large string, or fixed-size binary array, or value is +/// the wrong size for a fixed-size binary array). static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array, struct ArrowBufferView value); /// \brief Append a string value to an array /// /// Returns NANOARROW_OK if value can be exactly represented by -/// the underlying storage type or EINVAL otherwise (e.g., -/// the underlying array is not a string or large string array). +/// the underlying storage type, EOVERFLOW if appending value would overflow +/// the offset type (e.g., if the data buffer would be larger than 2 GB for a +/// non-large string type), or EINVAL otherwise (e.g., the underlying array is not a +/// string or large string array). static inline ArrowErrorCode ArrowArrayAppendString(struct ArrowArray* array, struct ArrowStringView value); @@ -895,7 +898,8 @@ static inline ArrowErrorCode ArrowArrayAppendDecimal(struct ArrowArray* array, /// \brief Finish a nested array element /// /// Appends a non-null element to the array based on the first child's current -/// length. Returns NANOARROW_OK if the item was successfully added or EINVAL +/// length. Returns NANOARROW_OK if the item was successfully added, EOVERFLOW +/// if the child of a list or map array would exceed INT_MAX elements, or EINVAL /// if the underlying storage type is not a struct, list, large list, or fixed-size /// list, or if there was an attempt to add a struct or fixed-size list element where the /// length of the child array(s) did not match the expected length.