Skip to content

Commit

Permalink
feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
WillAyd committed Sep 20, 2024
1 parent 561de01 commit ea4208a
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 28 deletions.
10 changes: 6 additions & 4 deletions src/nanoarrow/common/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,11 @@ static ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array,
case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
case NANOARROW_TYPE_FIXED_SIZE_BINARY:
case NANOARROW_TYPE_DENSE_UNION:
array->n_buffers = 2;
break;
case NANOARROW_TYPE_BINARY_VIEW:
case NANOARROW_TYPE_STRING_VIEW:
array->n_buffers = 2;
array->n_buffers = NANOARROW_BINARY_VIEW_FIXED_BUFFERS;
break;
case NANOARROW_TYPE_STRING:
case NANOARROW_TYPE_LARGE_STRING:
Expand Down Expand Up @@ -482,7 +484,7 @@ static void ArrowArrayFlushInternalPointers(struct ArrowArray* array) {
const int32_t nvirt_buf = private_data->n_variadic_buffers;
private_data->buffer_data = (const void**)ArrowRealloc(
private_data->buffer_data, sizeof(void*) * (nfixed_buf + nvirt_buf + 1));
for (int32_t i = 0; i < nvirt_buf; ++i) {
for (int32_t i = 0; i < nvirt_buf; i++) {
private_data->buffer_data[nfixed_buf + i] = private_data->variadic_buffers[i].data;
}
private_data->buffer_data[nfixed_buf + nvirt_buf] =
Expand Down Expand Up @@ -737,7 +739,7 @@ static int ArrowArrayViewSetArrayInternal(struct ArrowArrayView* array_view,
int64_t buffers_required = 0;
const int nfixed_buf = array_view->storage_type == NANOARROW_TYPE_STRING_VIEW ||
array_view->storage_type == NANOARROW_TYPE_BINARY_VIEW
? 2
? NANOARROW_BINARY_VIEW_FIXED_BUFFERS
: NANOARROW_MAX_FIXED_BUFFERS;
for (int i = 0; i < nfixed_buf; i++) {
if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE) {
Expand All @@ -760,7 +762,7 @@ static int ArrowArrayViewSetArrayInternal(struct ArrowArrayView* array_view,
if (array_view->storage_type == NANOARROW_TYPE_STRING_VIEW ||
array_view->storage_type == NANOARROW_TYPE_BINARY_VIEW) {
const int64_t n_buffers = array->n_buffers;
const int32_t nfixed_buf = 2;
const int32_t nfixed_buf = NANOARROW_BINARY_VIEW_FIXED_BUFFERS;

int32_t nvariadic_buf = (int32_t)(n_buffers - nfixed_buf - 1);
nvariadic_buf = (nvariadic_buf < 0) ? 0 : nvariadic_buf;
Expand Down
56 changes: 41 additions & 15 deletions src/nanoarrow/common/inline_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,7 @@ static inline ArrowErrorCode ArrowArrayAppendDouble(struct ArrowArray* array,
return NANOARROW_OK;
}

#define NANOARROW_BINARY_VIEW_FIXED_BUFFERS 2
#define NANOARROW_BINARY_VIEW_INLINE_SIZE 12
#define NANOARROW_BINARY_VIEW_PREVIEW_SIZE 4
#define NANOARROW_BINARY_VIEW_BLOCK_SIZE (32 << 10) // 32KB
Expand All @@ -493,6 +494,35 @@ union ArrowBinaryViewType {
int64_t alignment_dummy;
};

static inline ArrowErrorCode ArrowArrayAddVariadicBuffers(struct ArrowArray* array,
int32_t nbuffers,
int64_t* new_buffer_start) {
struct ArrowArrayPrivateData* private_data =
(struct ArrowArrayPrivateData*)array->private_data;

const int32_t n_current_bufs = private_data->n_variadic_buffers;
const int32_t n_bufs_needed = n_current_bufs + nbuffers;

private_data->variadic_buffers = (struct ArrowBuffer*)ArrowRealloc(
private_data->variadic_buffers, sizeof(struct ArrowBuffer) * n_bufs_needed);
if (private_data->variadic_buffers == NULL) {
return ENOMEM;
}
private_data->variadic_buffer_sizes = (int64_t*)ArrowRealloc(
private_data->variadic_buffer_sizes, sizeof(int64_t) * n_bufs_needed);
if (private_data->variadic_buffer_sizes == NULL) {
return ENOMEM;
}

for (int32_t i = 0; i < nbuffers; i++) {
ArrowBufferInit(&private_data->variadic_buffers[n_current_bufs + i]);
}
private_data->n_variadic_buffers = n_bufs_needed;
*new_buffer_start = n_current_bufs;

return NANOARROW_OK;
}

static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array,
struct ArrowBufferView value) {
struct ArrowArrayPrivateData* private_data =
Expand All @@ -507,26 +537,21 @@ static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array,
if (value.size_bytes <= NANOARROW_BINARY_VIEW_INLINE_SIZE) {
memcpy(bvt.inlined.data, value.data.as_char, value.size_bytes);
} else {
int32_t n_vbufs = private_data->n_variadic_buffers;
const int32_t n_vbufs = private_data->n_variadic_buffers;
int64_t buf_index = n_vbufs - 1;
if (n_vbufs == 0 ||
private_data->variadic_buffers[n_vbufs - 1].size_bytes + value.size_bytes >
NANOARROW_BINARY_VIEW_BLOCK_SIZE) {
++n_vbufs;
private_data->variadic_buffers = (struct ArrowBuffer*)ArrowRealloc(
private_data->variadic_buffers, sizeof(struct ArrowBuffer) * (n_vbufs));
private_data->variadic_buffer_sizes = (int64_t*)ArrowRealloc(
private_data->variadic_buffer_sizes, sizeof(int64_t) * (n_vbufs));
ArrowBufferInit(&private_data->variadic_buffers[n_vbufs - 1]);
private_data->n_variadic_buffers = n_vbufs;
NANOARROW_RETURN_NOT_OK(ArrowArrayAddVariadicBuffers(array, 1, &buf_index));
}

struct ArrowBuffer* variadic_buf = &private_data->variadic_buffers[n_vbufs - 1];
struct ArrowBuffer* variadic_buf = &private_data->variadic_buffers[buf_index];
memcpy(bvt.ref.data, value.data.as_char, NANOARROW_BINARY_VIEW_PREVIEW_SIZE);
bvt.ref.buffer_index = n_vbufs - 1;
bvt.ref.buffer_index = (int32_t)buf_index;
bvt.ref.offset = (int32_t)variadic_buf->size_bytes;
NANOARROW_RETURN_NOT_OK(
ArrowBufferAppend(variadic_buf, value.data.as_char, value.size_bytes));
private_data->variadic_buffer_sizes[n_vbufs - 1] = variadic_buf->size_bytes;
private_data->variadic_buffer_sizes[buf_index] = variadic_buf->size_bytes;
}
NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &bvt, sizeof(bvt)));
} else {
Expand Down Expand Up @@ -876,7 +901,7 @@ static inline struct ArrowBufferView ArrowArrayViewBufferView(
const struct ArrowArrayView* array_view, int64_t i) {
if (array_view->storage_type == NANOARROW_TYPE_BINARY_VIEW ||
array_view->storage_type == NANOARROW_TYPE_STRING_VIEW) {
const int32_t nfixed_buf = 2;
const int32_t nfixed_buf = NANOARROW_BINARY_VIEW_FIXED_BUFFERS;
if (i < nfixed_buf) {
return array_view->buffer_views[i];
} else {
Expand All @@ -885,7 +910,8 @@ static inline struct ArrowBufferView ArrowArrayViewBufferView(
if (buf_vw.data.data == NULL) {
buf_vw.size_bytes = 0;
} else {
buf_vw.size_bytes = -1;
buf_vw.size_bytes =
array_view->variadic_buffer_sizes[i - NANOARROW_BINARY_VIEW_FIXED_BUFFERS];
}
return buf_vw;
}
Expand Down Expand Up @@ -1034,7 +1060,7 @@ static inline struct ArrowStringView ArrowArrayViewGetStringUnsafe(
sizeof(((union ArrowBinaryViewType*)0)->inlined.size);
} else {
const int32_t buf_index = bvt.ref.buffer_index;
const int32_t nfixed_buf = 2;
const int32_t nfixed_buf = NANOARROW_BINARY_VIEW_FIXED_BUFFERS;
const struct ArrowBufferView variadic_vw =
ArrowArrayViewBufferView(array_view, buf_index + nfixed_buf);
view.data = variadic_vw.data.as_char + bvt.ref.offset;
Expand Down Expand Up @@ -1088,7 +1114,7 @@ static inline struct ArrowBufferView ArrowArrayViewGetBytesUnsafe(
sizeof(((union ArrowBinaryViewType*)0)->inlined.size);
} else {
const int32_t buf_index = bvt.ref.buffer_index;
const int32_t nfixed_buf = 2;
const int32_t nfixed_buf = NANOARROW_BINARY_VIEW_FIXED_BUFFERS;
const struct ArrowBufferView variadic_vw =
ArrowArrayViewBufferView(array_view, buf_index + nfixed_buf);
view.data.as_uint8 = variadic_vw.data.as_uint8 + bvt.ref.offset;
Expand Down
2 changes: 1 addition & 1 deletion src/nanoarrow/common/inline_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -831,7 +831,7 @@ struct ArrowArrayPrivateData {

// The array of pointers to buffers. This must be updated after a sequence
// of appends to synchronize its values with the actual buffer addresses
// (which may have ben reallocated during that time)
// (which may have been reallocated during that time)
const void** buffer_data;

// The storage data type, or NANOARROW_TYPE_UNINITIALIZED if unknown
Expand Down
30 changes: 30 additions & 0 deletions src/nanoarrow/common/schema_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,10 @@ TEST(SchemaTest, SchemaInitSimple) {
ExpectSchemaInitOk(NANOARROW_TYPE_INTERVAL_MONTHS, month_interval());
ExpectSchemaInitOk(NANOARROW_TYPE_INTERVAL_DAY_TIME, day_time_interval());
ExpectSchemaInitOk(NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO, month_day_nano_interval());
#if defined(ARROW_VERSION_MAJOR) && ARROW_VERSION_MAJOR >= 15
ExpectSchemaInitOk(NANOARROW_TYPE_STRING_VIEW, utf8_view());
ExpectSchemaInitOk(NANOARROW_TYPE_BINARY_VIEW, binary_view());
#endif
}

TEST(SchemaTest, SchemaInitSimpleError) {
Expand Down Expand Up @@ -908,6 +912,32 @@ TEST(SchemaViewTest, SchemaViewInitBinaryAndString) {
EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
EXPECT_EQ(ArrowSchemaToStdString(&schema), "large_string");
ArrowSchemaRelease(&schema);

ARROW_EXPECT_OK(ExportType(*utf8_view(), &schema));
EXPECT_EQ(ArrowSchemaViewInit(&schema_view, &schema, &error), NANOARROW_OK);
EXPECT_EQ(schema_view.type, NANOARROW_TYPE_STRING_VIEW);
EXPECT_EQ(schema_view.storage_type, NANOARROW_TYPE_STRING_VIEW);
EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
EXPECT_EQ(schema_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_DATA_VIEW);
EXPECT_EQ(schema_view.layout.buffer_data_type[0], NANOARROW_TYPE_BOOL);
EXPECT_EQ(schema_view.layout.buffer_data_type[1], NANOARROW_TYPE_STRING_VIEW);
EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
EXPECT_EQ(schema_view.layout.element_size_bits[1], 128);
EXPECT_EQ(ArrowSchemaToStdString(&schema), "string_view");
ArrowSchemaRelease(&schema);

ARROW_EXPECT_OK(ExportType(*binary_view(), &schema));
EXPECT_EQ(ArrowSchemaViewInit(&schema_view, &schema, &error), NANOARROW_OK);
EXPECT_EQ(schema_view.type, NANOARROW_TYPE_BINARY_VIEW);
EXPECT_EQ(schema_view.storage_type, NANOARROW_TYPE_BINARY_VIEW);
EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
EXPECT_EQ(schema_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_DATA_VIEW);
EXPECT_EQ(schema_view.layout.buffer_data_type[0], NANOARROW_TYPE_BOOL);
EXPECT_EQ(schema_view.layout.buffer_data_type[1], NANOARROW_TYPE_BINARY_VIEW);
EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
EXPECT_EQ(schema_view.layout.element_size_bits[1], 128);
EXPECT_EQ(ArrowSchemaToStdString(&schema), "binary_view");
ArrowSchemaRelease(&schema);
}

TEST(SchemaViewTest, SchemaViewInitBinaryAndStringErrors) {
Expand Down
8 changes: 0 additions & 8 deletions src/nanoarrow/testing/testing.cc
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,6 @@ ArrowErrorCode WriteData(std::ostream& out, const ArrowArrayView* value,

case NANOARROW_TYPE_STRING:
case NANOARROW_TYPE_LARGE_STRING:
case NANOARROW_TYPE_STRING_VIEW:
WriteString(out, ArrowArrayViewGetStringUnsafe(value, 0));
for (int64_t i = 1; i < value->length; i++) {
out << ", ";
Expand All @@ -296,7 +295,6 @@ ArrowErrorCode WriteData(std::ostream& out, const ArrowArrayView* value,

case NANOARROW_TYPE_BINARY:
case NANOARROW_TYPE_LARGE_BINARY:
case NANOARROW_TYPE_BINARY_VIEW:
case NANOARROW_TYPE_FIXED_SIZE_BINARY: {
WriteBytesMaybeNull(out, value, 0);
for (int64_t i = 1; i < value->length; i++) {
Expand Down Expand Up @@ -406,18 +404,12 @@ ArrowErrorCode WriteTypeFromView(std::ostream& out, const ArrowSchemaView* field
case NANOARROW_TYPE_LARGE_STRING:
out << R"("name": "largeutf8")";
break;
case NANOARROW_TYPE_STRING_VIEW:
out << R"("name": "stringview")";
break;
case NANOARROW_TYPE_BINARY:
out << R"("name": "binary")";
break;
case NANOARROW_TYPE_LARGE_BINARY:
out << R"("name": "largebinary")";
break;
case NANOARROW_TYPE_BINARY_VIEW:
out << R"("name": "binaryview")";
break;
case NANOARROW_TYPE_FIXED_SIZE_BINARY:
out << R"("name": "fixedsizebinary", "byteWidth": )" << field->fixed_size;
break;
Expand Down

0 comments on commit ea4208a

Please sign in to comment.