Skip to content

Commit

Permalink
feat: Support for BinaryvieW and StringView types
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisvandenbossche committed Jan 19, 2024
1 parent b3c952a commit b3a9894
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 1 deletion.
7 changes: 7 additions & 0 deletions src/nanoarrow/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -695,6 +695,13 @@ static int ArrowArrayViewSetArrayInternal(struct ArrowArrayView* array_view,
}
}

if (array_view->storage_type == NANOARROW_TYPE_STRING_VIEW) {
array_view->n_varidic_buffers = array->n_buffers - 3;
array_view->variadic_buffer_sizes = array->buffers[array->n_buffers - 1];
// array_view->variadic_buffers = ...
buffers_required += array_view->n_varidic_buffers + 1;
}

// Check the number of buffers
if (buffers_required != array->n_buffers) {
ArrowErrorSet(error, "Expected array with %d buffer(s) but found %d buffer(s)",
Expand Down
12 changes: 11 additions & 1 deletion src/nanoarrow/nanoarrow_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,9 @@ enum ArrowType {
NANOARROW_TYPE_LARGE_STRING,
NANOARROW_TYPE_LARGE_BINARY,
NANOARROW_TYPE_LARGE_LIST,
NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO
NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO,
NANOARROW_TYPE_BINARY_VIEW,
NANOARROW_TYPE_STRING_VIEW,
};

/// \brief Get a string value of an enum ArrowType value
Expand Down Expand Up @@ -482,10 +484,14 @@ static inline const char* ArrowTypeString(enum ArrowType type) {
return "double";
case NANOARROW_TYPE_STRING:
return "string";
case NANOARROW_TYPE_STRING_VIEW:
return "string_view";
case NANOARROW_TYPE_BINARY:
return "binary";
case NANOARROW_TYPE_FIXED_SIZE_BINARY:
return "fixed_size_binary";
case NANOARROW_TYPE_BINARY_VIEW:
return "binary_view";
case NANOARROW_TYPE_DATE32:
return "date32";
case NANOARROW_TYPE_DATE64:
Expand Down Expand Up @@ -784,6 +790,10 @@ struct ArrowArrayView {
/// type_id == union_type_id_map[128 + child_index]. This value may be
/// NULL in the case where child_id == type_id.
int8_t* union_type_id_map;

int64_t n_varidic_buffers;
int64_t* variadic_buffer_sizes;
const void** variadic_buffers;
};

// Used as the private data member for ArrowArrays allocated here and accessed
Expand Down
25 changes: 25 additions & 0 deletions src/nanoarrow/schema.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,14 @@ static const char* ArrowSchemaFormatTemplate(enum ArrowType type) {
return "u";
case NANOARROW_TYPE_LARGE_STRING:
return "U";
case NANOARROW_TYPE_STRING_VIEW:
return "vu";
case NANOARROW_TYPE_BINARY:
return "z";
case NANOARROW_TYPE_LARGE_BINARY:
return "Z";
case NANOARROW_TYPE_BINARY_VIEW:
return "vz";

case NANOARROW_TYPE_DATE32:
return "tdD";
Expand Down Expand Up @@ -690,6 +694,25 @@ static ArrowErrorCode ArrowSchemaViewParse(struct ArrowSchemaView* schema_view,
*format_end_out = format + 1;
return NANOARROW_OK;

// view types
case 'v':
switch (format[1]) {
case 'z':
schema_view->type = NANOARROW_TYPE_BINARY_VIEW;
schema_view->storage_type = NANOARROW_TYPE_BINARY_VIEW;
*format_end_out = format + 2;
return NANOARROW_OK;
case 'u':
schema_view->type = NANOARROW_TYPE_STRING_VIEW;
schema_view->storage_type = NANOARROW_TYPE_STRING_VIEW;
*format_end_out = format + 2;
return NANOARROW_OK;
default:
ArrowErrorSet(error, "Expected 'v' or 'u' following 'z' but found '%s'",
format + 1);
return EINVAL;
}

// nested types
case '+':
switch (format[1]) {
Expand Down Expand Up @@ -1055,8 +1078,10 @@ static ArrowErrorCode ArrowSchemaViewValidate(struct ArrowSchemaView* schema_vie
case NANOARROW_TYPE_DECIMAL256:
case NANOARROW_TYPE_STRING:
case NANOARROW_TYPE_LARGE_STRING:
case NANOARROW_TYPE_STRING_VIEW:
case NANOARROW_TYPE_BINARY:
case NANOARROW_TYPE_LARGE_BINARY:
case NANOARROW_TYPE_BINARY_VIEW:
case NANOARROW_TYPE_DATE32:
case NANOARROW_TYPE_DATE64:
case NANOARROW_TYPE_INTERVAL_MONTHS:
Expand Down
15 changes: 15 additions & 0 deletions src/nanoarrow/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,21 @@ void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type) {
layout->buffer_data_type[2] = NANOARROW_TYPE_BINARY;
break;

// case NANOARROW_TYPE_STRING_VIEW:
// layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
// layout->buffer_data_type[1] = NANOARROW_TYPE_INT64;
// layout->element_size_bits[1] = 64;
// layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA;
// layout->buffer_data_type[2] = NANOARROW_TYPE_STRING;
// break;
// case NANOARROW_TYPE_BINARY_VIEW:
// layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
// layout->buffer_data_type[1] = NANOARROW_TYPE_INT64;
// layout->element_size_bits[1] = 64;
// layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA;
// layout->buffer_data_type[2] = NANOARROW_TYPE_BINARY;
// break;

default:
break;
}
Expand Down

0 comments on commit b3a9894

Please sign in to comment.