Skip to content

Commit

Permalink
fix: Return EOVERFLOW when appending to a string or binary type wou…
Browse files Browse the repository at this point in the history
…ld exeed 2 GB (#302)

This aligns nanoarrow with the behaviour in the ADBC postgres driver. It
is useful to differentiate this case because frequently a caller will
want to finish the current array and start a new one with the new value.
  • Loading branch information
paleolimbot authored Sep 21, 2023
1 parent 9343f1a commit 49e4816
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 10 deletions.
6 changes: 3 additions & 3 deletions src/nanoarrow/array_inline.h
Original file line number Diff line number Diff line change
Expand Up @@ -469,8 +469,8 @@ static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array,
case NANOARROW_TYPE_STRING:
case NANOARROW_TYPE_BINARY:
offset = ((int32_t*)offset_buffer->data)[array->length];
if ((offset + value.size_bytes) > INT32_MAX) {
return EINVAL;
if ((((int64_t)offset) + value.size_bytes) > INT32_MAX) {
return EOVERFLOW;
}

offset += (int32_t)value.size_bytes;
Expand Down Expand Up @@ -618,7 +618,7 @@ static inline ArrowErrorCode ArrowArrayFinishElement(struct ArrowArray* array) {
case NANOARROW_TYPE_MAP:
child_length = array->children[0]->length;
if (child_length > INT32_MAX) {
return EINVAL;
return EOVERFLOW;
}
NANOARROW_RETURN_NOT_OK(
ArrowBufferAppendInt32(ArrowArrayBuffer(array, 1), (int32_t)child_length));
Expand Down
30 changes: 30 additions & 0 deletions src/nanoarrow/array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -878,6 +878,19 @@ TEST(ArrayTest, ArrayTestAppendToFixedSizeBinaryArray) {
EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(expected_array.ValueUnsafe()));
}

TEST(ArrayTest, ArrayTestAppendToBinaryArrayErrors) {
struct ArrowArray array;

ASSERT_EQ(ArrowArrayInitFromType(&array, NANOARROW_TYPE_BINARY), NANOARROW_OK);
EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
struct ArrowBufferView item;
item.data.as_char = "";
item.size_bytes = static_cast<int64_t>(INT_MAX) + 1;
EXPECT_EQ(ArrowArrayAppendBytes(&array, item), EOVERFLOW);

array.release(&array);
}

TEST(ArrayTest, ArrayTestAppendToIntervalArrayYearMonth) {
struct ArrowArray array;

Expand Down Expand Up @@ -1317,6 +1330,23 @@ TEST(ArrayTest, ArrayTestAppendToFixedSizeListArray) {
EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(expected_array.ValueUnsafe()));
}

TEST(ArrayTest, ArrayTestAppendToListArrayErrors) {
struct ArrowArray array;
struct ArrowSchema schema;
struct ArrowError error;

ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_LIST), NANOARROW_OK);
ASSERT_EQ(ArrowSchemaSetType(schema.children[0], NANOARROW_TYPE_INT64), NANOARROW_OK);
ASSERT_EQ(ArrowArrayInitFromSchema(&array, &schema, nullptr), NANOARROW_OK);
ASSERT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);

array.children[0]->length = static_cast<int64_t>(INT32_MAX) + 1;
EXPECT_EQ(ArrowArrayFinishElement(&array), EOVERFLOW);

array.release(&array);
schema.release(&schema);
}

TEST(ArrayTest, ArrayTestAppendToStructArray) {
struct ArrowArray array;
struct ArrowSchema schema;
Expand Down
18 changes: 11 additions & 7 deletions src/nanoarrow/nanoarrow.h
Original file line number Diff line number Diff line change
Expand Up @@ -863,18 +863,21 @@ static inline ArrowErrorCode ArrowArrayAppendDouble(struct ArrowArray* array,
/// \brief Append a string of bytes to an array
///
/// Returns NANOARROW_OK if value can be exactly represented by
/// the underlying storage type or EINVAL otherwise (e.g.,
/// the underlying array is not a binary, string, large binary, large string,
/// or fixed-size binary array, or value is the wrong size for a fixed-size
/// binary array).
/// the underlying storage type, EOVERFLOW if appending value would overflow
/// the offset type (e.g., if the data buffer would be larger than 2 GB for a
/// non-large string type), or EINVAL otherwise (e.g., the underlying array is not a
/// binary, string, large binary, large string, or fixed-size binary array, or value is
/// the wrong size for a fixed-size binary array).
static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array,
struct ArrowBufferView value);

/// \brief Append a string value to an array
///
/// Returns NANOARROW_OK if value can be exactly represented by
/// the underlying storage type or EINVAL otherwise (e.g.,
/// the underlying array is not a string or large string array).
/// the underlying storage type, EOVERFLOW if appending value would overflow
/// the offset type (e.g., if the data buffer would be larger than 2 GB for a
/// non-large string type), or EINVAL otherwise (e.g., the underlying array is not a
/// string or large string array).
static inline ArrowErrorCode ArrowArrayAppendString(struct ArrowArray* array,
struct ArrowStringView value);

Expand All @@ -895,7 +898,8 @@ static inline ArrowErrorCode ArrowArrayAppendDecimal(struct ArrowArray* array,
/// \brief Finish a nested array element
///
/// Appends a non-null element to the array based on the first child's current
/// length. Returns NANOARROW_OK if the item was successfully added or EINVAL
/// length. Returns NANOARROW_OK if the item was successfully added, EOVERFLOW
/// if the child of a list or map array would exceed INT_MAX elements, or EINVAL
/// if the underlying storage type is not a struct, list, large list, or fixed-size
/// list, or if there was an attempt to add a struct or fixed-size list element where the
/// length of the child array(s) did not match the expected length.
Expand Down

0 comments on commit 49e4816

Please sign in to comment.