diff --git a/src/nanoarrow/array.c b/src/nanoarrow/array.c index e79074069..bcf5b5cab 100644 --- a/src/nanoarrow/array.c +++ b/src/nanoarrow/array.c @@ -695,6 +695,13 @@ static int ArrowArrayViewSetArrayInternal(struct ArrowArrayView* array_view, } } + if (array_view->storage_type == NANOARROW_TYPE_STRING_VIEW) { + array_view->n_varidic_buffers = array->n_buffers - 3; + array_view->variadic_buffer_sizes = array->buffers[array->n_buffers - 1]; + // array_view->variadic_buffers = ... + buffers_required += array_view->n_varidic_buffers + 1; + } + // Check the number of buffers if (buffers_required != array->n_buffers) { ArrowErrorSet(error, "Expected array with %d buffer(s) but found %d buffer(s)", diff --git a/src/nanoarrow/nanoarrow_types.h b/src/nanoarrow/nanoarrow_types.h index 0d5085f29..9d1cc255c 100644 --- a/src/nanoarrow/nanoarrow_types.h +++ b/src/nanoarrow/nanoarrow_types.h @@ -443,7 +443,9 @@ enum ArrowType { NANOARROW_TYPE_LARGE_STRING, NANOARROW_TYPE_LARGE_BINARY, NANOARROW_TYPE_LARGE_LIST, - NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO + NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO, + NANOARROW_TYPE_BINARY_VIEW, + NANOARROW_TYPE_STRING_VIEW, }; /// \brief Get a string value of an enum ArrowType value @@ -482,10 +484,14 @@ static inline const char* ArrowTypeString(enum ArrowType type) { return "double"; case NANOARROW_TYPE_STRING: return "string"; + case NANOARROW_TYPE_STRING_VIEW: + return "string_view"; case NANOARROW_TYPE_BINARY: return "binary"; case NANOARROW_TYPE_FIXED_SIZE_BINARY: return "fixed_size_binary"; + case NANOARROW_TYPE_BINARY_VIEW: + return "binary_view"; case NANOARROW_TYPE_DATE32: return "date32"; case NANOARROW_TYPE_DATE64: @@ -784,6 +790,10 @@ struct ArrowArrayView { /// type_id == union_type_id_map[128 + child_index]. This value may be /// NULL in the case where child_id == type_id. int8_t* union_type_id_map; + + int64_t n_varidic_buffers; + int64_t* variadic_buffer_sizes; + const void** variadic_buffers; }; // Used as the private data member for ArrowArrays allocated here and accessed diff --git a/src/nanoarrow/schema.c b/src/nanoarrow/schema.c index 9ff1ac734..f67636c6d 100644 --- a/src/nanoarrow/schema.c +++ b/src/nanoarrow/schema.c @@ -100,10 +100,14 @@ static const char* ArrowSchemaFormatTemplate(enum ArrowType type) { return "u"; case NANOARROW_TYPE_LARGE_STRING: return "U"; + case NANOARROW_TYPE_STRING_VIEW: + return "vu"; case NANOARROW_TYPE_BINARY: return "z"; case NANOARROW_TYPE_LARGE_BINARY: return "Z"; + case NANOARROW_TYPE_BINARY_VIEW: + return "vz"; case NANOARROW_TYPE_DATE32: return "tdD"; @@ -690,6 +694,25 @@ static ArrowErrorCode ArrowSchemaViewParse(struct ArrowSchemaView* schema_view, *format_end_out = format + 1; return NANOARROW_OK; + // view types + case 'v': + switch (format[1]) { + case 'z': + schema_view->type = NANOARROW_TYPE_BINARY_VIEW; + schema_view->storage_type = NANOARROW_TYPE_BINARY_VIEW; + *format_end_out = format + 2; + return NANOARROW_OK; + case 'u': + schema_view->type = NANOARROW_TYPE_STRING_VIEW; + schema_view->storage_type = NANOARROW_TYPE_STRING_VIEW; + *format_end_out = format + 2; + return NANOARROW_OK; + default: + ArrowErrorSet(error, "Expected 'v' or 'u' following 'z' but found '%s'", + format + 1); + return EINVAL; + } + // nested types case '+': switch (format[1]) { @@ -1055,8 +1078,10 @@ static ArrowErrorCode ArrowSchemaViewValidate(struct ArrowSchemaView* schema_vie case NANOARROW_TYPE_DECIMAL256: case NANOARROW_TYPE_STRING: case NANOARROW_TYPE_LARGE_STRING: + case NANOARROW_TYPE_STRING_VIEW: case NANOARROW_TYPE_BINARY: case NANOARROW_TYPE_LARGE_BINARY: + case NANOARROW_TYPE_BINARY_VIEW: case NANOARROW_TYPE_DATE32: case NANOARROW_TYPE_DATE64: case NANOARROW_TYPE_INTERVAL_MONTHS: diff --git a/src/nanoarrow/utils.c b/src/nanoarrow/utils.c index 2a17d25e6..812ae3415 100644 --- a/src/nanoarrow/utils.c +++ b/src/nanoarrow/utils.c @@ -178,6 +178,21 @@ void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type) { layout->buffer_data_type[2] = NANOARROW_TYPE_BINARY; break; + // case NANOARROW_TYPE_STRING_VIEW: + // layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET; + // layout->buffer_data_type[1] = NANOARROW_TYPE_INT64; + // layout->element_size_bits[1] = 64; + // layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA; + // layout->buffer_data_type[2] = NANOARROW_TYPE_STRING; + // break; + // case NANOARROW_TYPE_BINARY_VIEW: + // layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET; + // layout->buffer_data_type[1] = NANOARROW_TYPE_INT64; + // layout->element_size_bits[1] = 64; + // layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA; + // layout->buffer_data_type[2] = NANOARROW_TYPE_BINARY; + // break; + default: break; }