diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c977fde7a..9795be5dd 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -86,6 +86,7 @@ jobs: sudo apt update sudo apt install -y -V libarrow-dev # For C++ sudo apt install -y -V libarrow-glib-dev # For GLib (C) + sudo apt install -y -V valgrind # For memory leak test elif [ "$RUNNER_OS" == "macOS" ]; then brew install apache-arrow brew install apache-arrow-glib @@ -108,9 +109,9 @@ jobs: cargo build popd pushd ffi - cargo b --features default-engine,sync-engine + cargo b --features default-engine,sync-engine,test-ffi popd - - name: build and run test + - name: build and run read-table test run: | pushd ffi/examples/read-table mkdir build @@ -118,6 +119,14 @@ jobs: cmake .. make make test + - name: build and run visit-expression test + run: | + pushd ffi/examples/visit-expression + mkdir build + pushd build + cmake .. + make + make test coverage: runs-on: ubuntu-latest diff --git a/ffi/Cargo.toml b/ffi/Cargo.toml index 9722eb8f3..1bd69e708 100644 --- a/ffi/Cargo.toml +++ b/ffi/Cargo.toml @@ -47,3 +47,4 @@ default-engine = [ ] sync-engine = ["delta_kernel/sync-engine"] developer-visibility = [] +test-ffi = [] diff --git a/ffi/examples/visit-expression/CMakeLists.txt b/ffi/examples/visit-expression/CMakeLists.txt new file mode 100644 index 000000000..1b89deec6 --- /dev/null +++ b/ffi/examples/visit-expression/CMakeLists.txt @@ -0,0 +1,33 @@ +cmake_minimum_required(VERSION 3.12) +project(visit_expressions) + +add_executable(visit_expression visit_expression.c) +target_compile_definitions(visit_expression PUBLIC DEFINE_DEFAULT_ENGINE) +target_include_directories(visit_expression PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../../target/ffi-headers") +target_link_directories(visit_expression PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../../target/debug") +target_link_libraries(visit_expression PUBLIC delta_kernel_ffi) +target_compile_options(visit_expression PUBLIC) + +target_compile_options(visit_expression PRIVATE -Wall -Wextra -Wpedantic -Werror -Wno-strict-prototypes) + +# Get info on the OS and platform +if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") + set(MACOSX TRUE) +endif() +if(UNIX AND NOT APPLE) + set(LINUX TRUE) +endif() + +# Add the kernel expresion -> engine expression test +include(CTest) +set(ExprTestRunner "../../../tests/test-expression-visitor/run_test.sh") +set(ExprExpectedPath "../../../tests/test-expression-visitor/expected.txt") +add_test(NAME test_expression_visitor COMMAND ${ExprTestRunner} ${ExprExpectedPath}) +if(LINUX) + add_test(NAME test_expression_visitor_leaks COMMAND valgrind + --error-exitcode=1 + --tool=memcheck + --leak-check=full + --errors-for-leak-kinds=definite + --show-leak-kinds=definite ./visit_expression) +endif() diff --git a/ffi/examples/visit-expression/expression.h b/ffi/examples/visit-expression/expression.h new file mode 100644 index 000000000..302566d10 --- /dev/null +++ b/ffi/examples/visit-expression/expression.h @@ -0,0 +1,440 @@ +#pragma once + +#include "delta_kernel_ffi.h" +#include +#include +#include +#include +#include +#include + +/** + * This module defines a very simple model of an expression, used only to be able to print the + * provided expression. It consists of an "ExpressionBuilder" which is our user data that gets + * passed into each visit_x call. This simply keeps track of all the lists we are asked to allocate. + * + * Each "ExpressionItem" tracks the type and pointer to the expression. + * + * Each complex type is made of an "ExpressionItemList", which tracks its length and an array of + * "ExpressionItems" that make up the complex type. The top level expression is in a length 1 + * "ExpressionItemList". + */ + +/************************************************************* + * Data Types + ************************************************************/ +enum OpType { + Add, + Minus, + Divide, + Multiply, + LessThan, + LessThanOrEqual, + GreaterThan, + GreaterThaneOrEqual, + Equal, + NotEqual, + Distinct, + In, + NotIn, +}; +enum LitType { + Integer, + Long, + Short, + Byte, + Float, + Double, + String, + Boolean, + Timestamp, + TimestampNtz, + Date, + Binary, + Decimal, + Null, + Struct, + Array +}; +enum ExpressionType { BinOp, Variadic, Literal, Unary, Column }; +enum VariadicType { + And, + Or, + StructExpression, +}; +enum UnaryType { Not, IsNull }; +typedef struct { + void* ref; + enum ExpressionType type; +} ExpressionItem; +typedef struct { + uint32_t len; + ExpressionItem* list; +} ExpressionItemList; +struct BinOp { + enum OpType op; + ExpressionItemList exprs; +}; +struct Variadic { + enum VariadicType op; + ExpressionItemList exprs; +}; +struct Unary { + enum UnaryType type; + ExpressionItemList sub_expr; +}; +struct BinaryData { + uint8_t* buf; + uintptr_t len; +}; +struct Decimal { + uint64_t value[2]; + uint8_t precision; + uint8_t scale; +}; +typedef struct { + size_t list_count; + ExpressionItemList* lists; +} ExpressionBuilder; +struct Struct { + ExpressionItemList fields; + ExpressionItemList values; +}; +struct ArrayData { + ExpressionItemList exprs; +}; +struct Literal { + enum LitType type; + union LiteralValue { + int32_t integer_data; + int64_t long_data; + int16_t short_data; + int8_t byte_data; + float float_data; + double double_data; + bool boolean_data; + char* string_data; + struct Struct struct_data; + struct ArrayData array_data; + struct BinaryData binary; + struct Decimal decimal; + } value; +}; + +/************************************************************* + * Utility functions + ************************************************************/ +void put_expr_item(void* data, size_t sibling_list_id, void* ref, enum ExpressionType type) { + ExpressionBuilder* data_ptr = (ExpressionBuilder*)data; + ExpressionItem expr = { .ref = ref, .type = type }; + ExpressionItemList* list = &data_ptr->lists[sibling_list_id]; + list->list[list->len++] = expr; +} +ExpressionItemList get_expr_list(void* data, size_t list_id) { + ExpressionBuilder* data_ptr = (ExpressionBuilder*)data; + assert(list_id < data_ptr->list_count); + return data_ptr->lists[list_id]; +} +// utility to turn a slice into a char* +char* allocate_string(const KernelStringSlice slice) { + return strndup(slice.ptr, slice.len); +} + +/************************************************************* + * Binary Operations + ************************************************************/ +#define DEFINE_BINOP(fun_name, op) \ + void fun_name(void* data, uintptr_t sibling_list_id, uintptr_t child_list_id) { \ + visit_expr_binop(data, sibling_list_id, op, child_list_id); \ + } +void visit_expr_binop(void* data, + uintptr_t sibling_id_list, + enum OpType op, + uintptr_t child_id_list) { + struct BinOp* binop = malloc(sizeof(struct BinOp)); + binop->op = op; + binop->exprs = get_expr_list(data, child_id_list); + put_expr_item(data, sibling_id_list, binop, BinOp); +} +DEFINE_BINOP(visit_expr_add, Add) +DEFINE_BINOP(visit_expr_minus, Minus) +DEFINE_BINOP(visit_expr_multiply, Multiply) +DEFINE_BINOP(visit_expr_divide, Divide) +DEFINE_BINOP(visit_expr_lt, LessThan) +DEFINE_BINOP(visit_expr_le, LessThanOrEqual) +DEFINE_BINOP(visit_expr_gt, GreaterThan) +DEFINE_BINOP(visit_expr_ge, GreaterThaneOrEqual) +DEFINE_BINOP(visit_expr_eq, Equal) +DEFINE_BINOP(visit_expr_ne, NotEqual) +DEFINE_BINOP(visit_expr_distinct, Distinct) +DEFINE_BINOP(visit_expr_in, In) +DEFINE_BINOP(visit_expr_not_in, NotIn) +#undef DEFINE_BINOP + +/************************************************************* + * Literal Values + ************************************************************/ +#define DEFINE_SIMPLE_SCALAR(fun_name, enum_member, c_type, literal_field) \ + void fun_name(void* data, uintptr_t sibling_list_id, c_type val) { \ + struct Literal* lit = malloc(sizeof(struct Literal)); \ + lit->type = enum_member; \ + lit->value.literal_field = val; \ + put_expr_item(data, sibling_list_id, lit, Literal); \ + } \ + _Static_assert(sizeof(c_type) <= sizeof(uintptr_t), \ + "The provided type is not a valid simple scalar") +DEFINE_SIMPLE_SCALAR(visit_expr_int_literal, Integer, int32_t, integer_data); +DEFINE_SIMPLE_SCALAR(visit_expr_long_literal, Long, int64_t, long_data); +DEFINE_SIMPLE_SCALAR(visit_expr_short_literal, Short, int16_t, short_data); +DEFINE_SIMPLE_SCALAR(visit_expr_byte_literal, Byte, int8_t, byte_data); +DEFINE_SIMPLE_SCALAR(visit_expr_float_literal, Float, float, float_data); +DEFINE_SIMPLE_SCALAR(visit_expr_double_literal, Double, double, double_data); +DEFINE_SIMPLE_SCALAR(visit_expr_boolean_literal, Boolean, _Bool, boolean_data); +DEFINE_SIMPLE_SCALAR(visit_expr_timestamp_literal, Timestamp, int64_t, long_data); +DEFINE_SIMPLE_SCALAR(visit_expr_timestamp_ntz_literal, TimestampNtz, int64_t, long_data); +DEFINE_SIMPLE_SCALAR(visit_expr_date_literal, Date, int32_t, integer_data); +#undef DEFINE_SIMPLE_SCALAR + +void visit_expr_string_literal(void* data, uintptr_t sibling_list_id, KernelStringSlice string) { + struct Literal* literal = malloc(sizeof(struct Literal)); + literal->type = String; + literal->value.string_data = allocate_string(string); + put_expr_item(data, sibling_list_id, literal, Literal); +} +void visit_expr_decimal_literal(void* data, + uintptr_t sibling_list_id, + uint64_t value_ms, + uint64_t value_ls, + uint8_t precision, + uint8_t scale) { + struct Literal* literal = malloc(sizeof(struct Literal)); + literal->type = Decimal; + struct Decimal* dec = &literal->value.decimal; + dec->value[0] = value_ms; + dec->value[1] = value_ls; + dec->precision = precision; + dec->scale = scale; + put_expr_item(data, sibling_list_id, literal, Literal); +} +void visit_expr_binary_literal(void* data, + uintptr_t sibling_list_id, + const uint8_t* buf, + uintptr_t len) { + struct Literal* literal = malloc(sizeof(struct Literal)); + literal->type = Binary; + struct BinaryData* bin = &literal->value.binary; + bin->buf = malloc(len); + bin->len = len; + memcpy(bin->buf, buf, len); + put_expr_item(data, sibling_list_id, literal, Literal); +} +void visit_expr_struct_literal(void* data, + uintptr_t sibling_list_id, + uintptr_t child_field_list_id, + uintptr_t child_value_list_id) { + struct Literal* literal = malloc(sizeof(struct Literal)); + literal->type = Struct; + struct Struct* struct_data = &literal->value.struct_data; + struct_data->fields = get_expr_list(data, child_field_list_id); + struct_data->values = get_expr_list(data, child_value_list_id); + put_expr_item(data, sibling_list_id, literal, Literal); +} +void visit_expr_null_literal(void* data, uintptr_t sibling_id_list) { + struct Literal* literal = malloc(sizeof(struct Literal)); + literal->type = Null; + put_expr_item(data, sibling_id_list, literal, Literal); +} + +/************************************************************* + * Variadic Expressions + ************************************************************/ +#define DEFINE_VARIADIC(fun_name, enum_member) \ + void fun_name(void* data, uintptr_t sibling_list_id, uintptr_t child_list_id) { \ + visit_expr_variadic(data, sibling_list_id, enum_member, child_list_id); \ + } + +void visit_expr_variadic(void* data, + uintptr_t sibling_list_id, + enum VariadicType op, + uintptr_t child_list_id) { + struct Variadic* var = malloc(sizeof(struct Variadic)); + var->op = op; + var->exprs = get_expr_list(data, child_list_id); + put_expr_item(data, sibling_list_id, var, Variadic); +} +DEFINE_VARIADIC(visit_expr_and, And) +DEFINE_VARIADIC(visit_expr_or, Or) +DEFINE_VARIADIC(visit_expr_struct_expr, StructExpression) +#undef DEFINE_VARIADIC + +void visit_expr_array_literal(void* data, uintptr_t sibling_list_id, uintptr_t child_list_id) { + struct Literal* literal = malloc(sizeof(struct Literal)); + literal->type = Array; + struct ArrayData* arr = &(literal->value.array_data); + arr->exprs = get_expr_list(data, child_list_id); + put_expr_item(data, sibling_list_id, literal, Literal); +} + +/************************************************************* + * Unary Expressions + ************************************************************/ +#define DEFINE_UNARY(fun_name, op) \ + void fun_name(void* data, uintptr_t sibling_list_id, uintptr_t child_list_id) { \ + visit_expr_unary(data, sibling_list_id, op, child_list_id); \ + } + +void visit_expr_unary(void* data, + uintptr_t sibling_list_id, + enum UnaryType type, + uintptr_t child_list_id) { + struct Unary* unary = malloc(sizeof(struct Unary)); + unary->type = type; + unary->sub_expr = get_expr_list(data, child_list_id); + put_expr_item(data, sibling_list_id, unary, Unary); +} +DEFINE_UNARY(visit_expr_is_null, IsNull) +DEFINE_UNARY(visit_expr_not, Not) +#undef DEFINE_UNARY + +/************************************************************* + * Column Expression + ************************************************************/ +void visit_expr_column(void* data, uintptr_t sibling_id_list, KernelStringSlice col_name) { + char* column_name = allocate_string(col_name); + put_expr_item(data, sibling_id_list, column_name, Column); +} + +/************************************************************* + * EngineExpressionVisitor Implementation + ************************************************************/ +uintptr_t make_field_list(void* data, uintptr_t reserve) { + ExpressionBuilder* builder = data; + int id = builder->list_count; + builder->list_count++; + builder->lists = realloc(builder->lists, sizeof(ExpressionItemList) * builder->list_count); + ExpressionItem* list = calloc(reserve, sizeof(ExpressionItem)); + builder->lists[id].len = 0; + builder->lists[id].list = list; + return id; +} + +ExpressionItemList construct_predicate(SharedExpression* predicate) { + ExpressionBuilder data = { 0 }; + EngineExpressionVisitor visitor = { + .data = &data, + .make_field_list = make_field_list, + .visit_literal_int = visit_expr_int_literal, + .visit_literal_long = visit_expr_long_literal, + .visit_literal_short = visit_expr_short_literal, + .visit_literal_byte = visit_expr_byte_literal, + .visit_literal_float = visit_expr_float_literal, + .visit_literal_double = visit_expr_double_literal, + .visit_literal_bool = visit_expr_boolean_literal, + .visit_literal_timestamp = visit_expr_timestamp_literal, + .visit_literal_timestamp_ntz = visit_expr_timestamp_ntz_literal, + .visit_literal_date = visit_expr_date_literal, + .visit_literal_binary = visit_expr_binary_literal, + .visit_literal_null = visit_expr_null_literal, + .visit_literal_decimal = visit_expr_decimal_literal, + .visit_literal_string = visit_expr_string_literal, + .visit_literal_struct = visit_expr_struct_literal, + .visit_literal_array = visit_expr_array_literal, + .visit_and = visit_expr_and, + .visit_or = visit_expr_or, + .visit_not = visit_expr_not, + .visit_is_null = visit_expr_is_null, + .visit_lt = visit_expr_lt, + .visit_le = visit_expr_le, + .visit_gt = visit_expr_gt, + .visit_ge = visit_expr_ge, + .visit_eq = visit_expr_eq, + .visit_ne = visit_expr_ne, + .visit_distinct = visit_expr_distinct, + .visit_in = visit_expr_in, + .visit_not_in = visit_expr_not_in, + .visit_add = visit_expr_add, + .visit_minus = visit_expr_minus, + .visit_multiply = visit_expr_multiply, + .visit_divide = visit_expr_divide, + .visit_column = visit_expr_column, + .visit_struct_expr = visit_expr_struct_expr, + }; + uintptr_t top_level_id = visit_expression(&predicate, &visitor); + ExpressionItemList top_level_expr = data.lists[top_level_id]; + free(data.lists); + return top_level_expr; +} + +void free_expression_list(ExpressionItemList list); +void free_expression_item(ExpressionItem ref) { + switch (ref.type) { + case BinOp: { + struct BinOp* op = ref.ref; + free_expression_list(op->exprs); + free(op); + break; + } + case Variadic: { + struct Variadic* var = ref.ref; + free_expression_list(var->exprs); + free(var); + break; + }; + case Literal: { + struct Literal* lit = ref.ref; + switch (lit->type) { + case Struct: { + struct Struct* struct_data = &lit->value.struct_data; + free_expression_list(struct_data->values); + free_expression_list(struct_data->fields); + break; + } + case Array: { + struct ArrayData* array = &lit->value.array_data; + free_expression_list(array->exprs); + break; + } + case String: { + free(lit->value.string_data); + break; + } + case Binary: { + struct BinaryData* binary = &lit->value.binary; + free(binary->buf); + break; + } + case Integer: + case Long: + case Short: + case Byte: + case Float: + case Double: + case Boolean: + case Timestamp: + case TimestampNtz: + case Date: + case Decimal: + case Null: + break; + } + free(lit); + break; + }; + case Unary: { + struct Unary* unary = ref.ref; + free_expression_list(unary->sub_expr); + free(unary); + break; + } + case Column: { + free(ref.ref); + break; + } + } +} +void free_expression_list(ExpressionItemList list) { + for (size_t i = 0; i < list.len; i++) { + free_expression_item(list.list[i]); + } + free(list.list); +} diff --git a/ffi/examples/visit-expression/expression_print.h b/ffi/examples/visit-expression/expression_print.h new file mode 100644 index 000000000..23c1dcb7b --- /dev/null +++ b/ffi/examples/visit-expression/expression_print.h @@ -0,0 +1,206 @@ +#pragma once + +#include "expression.h" + +/** + * This module defines a function `print_tree` to recursively print an ExpressionItem. + */ + +void print_tree_helper(ExpressionItem ref, int depth); +void print_n_spaces(int n) { + if (n == 0) + return; + printf(" "); + print_n_spaces(n - 1); +} +void print_expression_item_list(ExpressionItemList list, int depth) { + for (size_t i = 0; i < list.len; i++) { + print_tree_helper(list.list[i], depth); + } +} +void print_tree_helper(ExpressionItem ref, int depth) { + switch (ref.type) { + case BinOp: { + struct BinOp* op = ref.ref; + print_n_spaces(depth); + switch (op->op) { + case Add: { + printf("Add\n"); + break; + } + case Minus: { + printf("Minus\n"); + break; + }; + case Divide: { + printf("Divide\n"); + break; + }; + case Multiply: { + printf("Multiply\n"); + break; + }; + case LessThan: { + printf("LessThan\n"); + break; + }; + case LessThanOrEqual: { + printf("LessThanOrEqual\n"); + break; + } + case GreaterThan: { + printf("GreaterThan\n"); + break; + }; + case GreaterThaneOrEqual: { + printf("GreaterThanOrEqual\n"); + break; + }; + case Equal: { + printf("Equal\n"); + break; + }; + case NotEqual: { + printf("NotEqual\n"); + break; + }; + case In: { + printf("In\n"); + break; + }; + case NotIn: { + printf("NotIn\n"); + break; + }; break; + case Distinct: + printf("Distinct\n"); + break; + } + print_expression_item_list(op->exprs, depth + 1); + break; + } + case Variadic: { + struct Variadic* var = ref.ref; + print_n_spaces(depth); + switch (var->op) { + case And: + printf("And\n"); + break; + case Or: + printf("Or\n"); + break; + case StructExpression: + printf("StructExpression\n"); + break; + } + print_expression_item_list(var->exprs, depth + 1); + break; + } + case Literal: { + struct Literal* lit = ref.ref; + print_n_spaces(depth); + switch (lit->type) { + case Integer: + printf("Integer(%d)\n", lit->value.integer_data); + break; + case Long: + printf("Long(%lld)\n", (long long)lit->value.long_data); + break; + case Short: + printf("Short(%hd)\n", lit->value.short_data); + break; + case Byte: + printf("Byte(%hhd)\n", lit->value.byte_data); + break; + case Float: + printf("Float(%f)\n", (float)lit->value.float_data); + break; + case Double: + printf("Double(%f)\n", lit->value.double_data); + break; + case String: { + printf("String(%s)\n", lit->value.string_data); + break; + } + case Boolean: + printf("Boolean(%d)\n", lit->value.boolean_data); + break; + case Timestamp: + printf("Timestamp(%lld)\n", (long long)lit->value.long_data); + break; + case TimestampNtz: + printf("TimestampNtz(%lld)\n", (long long)lit->value.long_data); + break; + case Date: + printf("Date(%d)\n", lit->value.integer_data); + break; + case Binary: { + printf("Binary("); + for (size_t i = 0; i < lit->value.binary.len; i++) { + printf("%02x", lit->value.binary.buf[i]); + } + printf(")\n"); + break; + } + case Decimal: { + struct Decimal* dec = &lit->value.decimal; + printf("Decimal(%lld,%lld, %d, %d)\n", + (long long)dec->value[0], + (long long)dec->value[1], + dec->scale, + dec->precision); + break; + } + case Null: + printf("Null\n"); + break; + case Struct: + printf("Struct\n"); + struct Struct* struct_data = &lit->value.struct_data; + for (size_t i = 0; i < struct_data->values.len; i++) { + print_n_spaces(depth + 1); + + // Extract field name from field + ExpressionItem item = struct_data->fields.list[i]; + assert(item.type == Literal); + struct Literal* lit = item.ref; + assert(lit->type == String); + + printf("Field: %s\n", lit->value.string_data); + print_tree_helper(struct_data->values.list[i], depth + 2); + } + break; + case Array: + printf("Array\n"); + struct ArrayData* array = &lit->value.array_data; + print_expression_item_list(array->exprs, depth + 1); + break; + } + break; + } + case Unary: { + print_n_spaces(depth); + struct Unary* unary = ref.ref; + switch (unary->type) { + case Not: + printf("Not\n"); + break; + case IsNull: + printf("IsNull\n"); + break; + } + + print_expression_item_list(unary->sub_expr, depth + 1); + break; + } + case Column: + print_n_spaces(depth); + char* column_name = ref.ref; + printf("Column(%s)\n", column_name); + break; + } +} + +void print_expression(ExpressionItemList expression) { + print_expression_item_list(expression, 0); +} diff --git a/ffi/examples/visit-expression/visit_expression.c b/ffi/examples/visit-expression/visit_expression.c new file mode 100644 index 000000000..2fec1b662 --- /dev/null +++ b/ffi/examples/visit-expression/visit_expression.c @@ -0,0 +1,12 @@ +#include "delta_kernel_ffi.h" +#include "expression.h" +#include "expression_print.h" + +int main() { + SharedExpression* pred = get_testing_kernel_expression(); + ExpressionItemList expr = construct_predicate(pred); + print_expression(expr); + free_expression_list(expr); + free_kernel_predicate(pred); + return 0; +} diff --git a/ffi/src/expressions.rs b/ffi/src/expressions/engine.rs similarity index 97% rename from ffi/src/expressions.rs rename to ffi/src/expressions/engine.rs index 19c334038..722576766 100644 --- a/ffi/src/expressions.rs +++ b/ffi/src/expressions/engine.rs @@ -1,3 +1,5 @@ +//! Defines [`KernelExpressionVisitorState`]. This is a visitor that can be used by an [`EnginePredicate`] +//! to convert engine expressions into kernel expressions. use std::ffi::c_void; use crate::{ diff --git a/ffi/src/expressions/kernel.rs b/ffi/src/expressions/kernel.rs new file mode 100644 index 000000000..47e3e6dea --- /dev/null +++ b/ffi/src/expressions/kernel.rs @@ -0,0 +1,367 @@ +//! Defines [`EngineExpressionVisitor`]. This is a visitor that can be used to convert the kernel's +//! [`Expression`] to an engine's expression format. +use crate::expressions::SharedExpression; +use std::ffi::c_void; + +use crate::{handle::Handle, KernelStringSlice}; +use delta_kernel::expressions::{ + ArrayData, BinaryOperator, Expression, Scalar, StructData, UnaryOperator, VariadicOperator, +}; + +/// Free the memory the passed SharedExpression +/// +/// # Safety +/// Engine is responsible for passing a valid SharedExpression +#[no_mangle] +pub unsafe extern "C" fn free_kernel_predicate(data: Handle) { + data.drop_handle(); +} + +type VisitLiteralFn = extern "C" fn(data: *mut c_void, sibling_list_id: usize, value: T); +type VisitBinaryOpFn = + extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize); +type VisitVariadicFn = + extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize); +type VisitUnaryFn = extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize); + +/// The [`EngineExpressionVisitor`] defines a visitor system to allow engines to build their own +/// representation of a kernel expression. +/// +/// The model is list based. When the kernel needs a list, it will ask engine to allocate one of a +/// particular size. Once allocated the engine returns an `id`, which can be any integer identifier +/// ([`usize`]) the engine wants, and will be passed back to the engine to identify the list in the +/// future. +/// +/// Every expression the kernel visits belongs to some list of "sibling" elements. The schema +/// itself is a list of schema elements, and every complex type (struct expression, array, variadic, etc) +/// contains a list of "child" elements. +/// 1. Before visiting any complex expression type, the kernel asks the engine to allocate a list to +/// hold its children +/// 2. When visiting any expression element, the kernel passes its parent's "child list" as the +/// "sibling list" the element should be appended to: +/// - For a struct literal, first visit each struct field and visit each value +/// - For a struct expression, visit each sub expression. +/// - For an array literal, visit each of the elements. +/// - For a variadic `and` or `or` expression, visit each sub-expression. +/// - For a binary operator expression, visit the left and right operands. +/// - For a unary `is null` or `not` expression, visit the sub-expression. +/// 3. When visiting a complex expression, the kernel also passes the "child list" containing +/// that element's (already-visited) children. +/// 4. The [`visit_expression`] method returns the id of the list of top-level columns +/// +/// WARNING: The visitor MUST NOT retain internal references to string slices or binary data passed +/// to visitor methods +/// TODO: Visit type information in struct field and null. This will likely involve using the schema +/// visitor. Note that struct literals are currently in flux, and may change significantly. Here is the relevant +/// issue: https://github.com/delta-incubator/delta-kernel-rs/issues/412 +#[repr(C)] +pub struct EngineExpressionVisitor { + /// An opaque engine state pointer + pub data: *mut c_void, + /// Creates a new expression list, optionally reserving capacity up front + pub make_field_list: extern "C" fn(data: *mut c_void, reserve: usize) -> usize, + /// Visit a 32bit `integer` belonging to the list identified by `sibling_list_id`. + pub visit_literal_int: VisitLiteralFn, + /// Visit a 64bit `long` belonging to the list identified by `sibling_list_id`. + pub visit_literal_long: VisitLiteralFn, + /// Visit a 16bit `short` belonging to the list identified by `sibling_list_id`. + pub visit_literal_short: VisitLiteralFn, + /// Visit an 8bit `byte` belonging to the list identified by `sibling_list_id`. + pub visit_literal_byte: VisitLiteralFn, + /// Visit a 32bit `float` belonging to the list identified by `sibling_list_id`. + pub visit_literal_float: VisitLiteralFn, + /// Visit a 64bit `double` belonging to the list identified by `sibling_list_id`. + pub visit_literal_double: VisitLiteralFn, + /// Visit a `string` belonging to the list identified by `sibling_list_id`. + pub visit_literal_string: VisitLiteralFn, + /// Visit a `boolean` belonging to the list identified by `sibling_list_id`. + pub visit_literal_bool: VisitLiteralFn, + /// Visit a 64bit timestamp belonging to the list identified by `sibling_list_id`. + /// The timestamp is microsecond precision and adjusted to UTC. + pub visit_literal_timestamp: VisitLiteralFn, + /// Visit a 64bit timestamp belonging to the list identified by `sibling_list_id`. + /// The timestamp is microsecond precision with no timezone. + pub visit_literal_timestamp_ntz: VisitLiteralFn, + /// Visit a 32bit intger `date` representing days since UNIX epoch 1970-01-01. The `date` belongs + /// to the list identified by `sibling_list_id`. + pub visit_literal_date: VisitLiteralFn, + /// Visit binary data at the `buffer` with length `len` belonging to the list identified by + /// `sibling_list_id`. + pub visit_literal_binary: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, buffer: *const u8, len: usize), + /// Visit a 128bit `decimal` value with the given precision and scale. The 128bit integer + /// is split into the most significant 64 bits in `value_ms`, and the least significant 64 + /// bits in `value_ls`. The `decimal` belongs to the list identified by `sibling_list_id`. + pub visit_literal_decimal: extern "C" fn( + data: *mut c_void, + sibling_list_id: usize, + value_ms: u64, + value_ls: u64, + precision: u8, + scale: u8, + ), + /// Visit a struct literal belonging to the list identified by `sibling_list_id`. + /// The field names of the struct are in a list identified by `child_field_list_id`. + /// The values of the struct are in a list identified by `child_value_list_id`. + pub visit_literal_struct: extern "C" fn( + data: *mut c_void, + sibling_list_id: usize, + child_field_list_value: usize, + child_value_list_id: usize, + ), + /// Visit an array literal belonging to the list identified by `sibling_list_id`. + /// The values of the array are in a list identified by `child_list_id`. + pub visit_literal_array: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), + /// Visits a null value belonging to the list identified by `sibling_list_id. + pub visit_literal_null: extern "C" fn(data: *mut c_void, sibling_list_id: usize), + /// Visits an `and` expression belonging to the list identified by `sibling_list_id`. + /// The sub-expressions of the array are in a list identified by `child_list_id` + pub visit_and: VisitVariadicFn, + /// Visits an `or` expression belonging to the list identified by `sibling_list_id`. + /// The sub-expressions of the array are in a list identified by `child_list_id` + pub visit_or: VisitVariadicFn, + /// Visits a `not` expression belonging to the list identified by `sibling_list_id`. + /// The sub-expression will be in a _one_ item list identified by `child_list_id` + pub visit_not: VisitUnaryFn, + /// Visits a `is_null` expression belonging to the list identified by `sibling_list_id`. + /// The sub-expression will be in a _one_ item list identified by `child_list_id` + pub visit_is_null: VisitUnaryFn, + /// Visits the `LessThan` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + pub visit_lt: VisitBinaryOpFn, + /// Visits the `LessThanOrEqual` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + pub visit_le: VisitBinaryOpFn, + /// Visits the `GreaterThan` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + pub visit_gt: VisitBinaryOpFn, + /// Visits the `GreaterThanOrEqual` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + pub visit_ge: VisitBinaryOpFn, + /// Visits the `Equal` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + pub visit_eq: VisitBinaryOpFn, + /// Visits the `NotEqual` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + pub visit_ne: VisitBinaryOpFn, + /// Visits the `Distinct` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + pub visit_distinct: VisitBinaryOpFn, + /// Visits the `In` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + pub visit_in: VisitBinaryOpFn, + /// Visits the `NotIn` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + pub visit_not_in: VisitBinaryOpFn, + /// Visits the `Add` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + pub visit_add: VisitBinaryOpFn, + /// Visits the `Minus` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + pub visit_minus: VisitBinaryOpFn, + /// Visits the `Multiply` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + pub visit_multiply: VisitBinaryOpFn, + /// Visits the `Divide` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + pub visit_divide: VisitBinaryOpFn, + /// Visits the `column` belonging to the list identified by `sibling_list_id`. + pub visit_column: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice), + /// Visits a `StructExpression` belonging to the list identified by `sibling_list_id`. + /// The sub-expressions of the `StructExpression` are in a list identified by `child_list_id` + pub visit_struct_expr: + extern "C" fn(data: *mut c_void, sibling_list_id: usize, child_list_id: usize), +} + +/// Visit the expression of the passed [`SharedExpression`] Handle using the provided `visitor`. +/// See the documentation of [`EngineExpressionVisitor`] for a description of how this visitor +/// works. +/// +/// This method returns the id that the engine generated for the top level expression +/// +/// # Safety +/// +/// The caller must pass a valid SharedExpression Handle and expression visitor +#[no_mangle] +pub unsafe extern "C" fn visit_expression( + expression: &Handle, + visitor: &mut EngineExpressionVisitor, +) -> usize { + macro_rules! call { + ( $visitor:ident, $visitor_fn:ident $(, $extra_args:expr) *) => { + ($visitor.$visitor_fn)($visitor.data $(, $extra_args) *) + }; + } + fn visit_expression_array( + visitor: &mut EngineExpressionVisitor, + array: &ArrayData, + sibling_list_id: usize, + ) { + #[allow(deprecated)] + let elements = array.array_elements(); + let child_list_id = call!(visitor, make_field_list, elements.len()); + for scalar in elements { + visit_expression_scalar(visitor, scalar, child_list_id); + } + call!(visitor, visit_literal_array, sibling_list_id, child_list_id); + } + fn visit_expression_struct_literal( + visitor: &mut EngineExpressionVisitor, + struct_data: &StructData, + sibling_list_id: usize, + ) { + let child_value_list_id = call!(visitor, make_field_list, struct_data.fields().len()); + let child_field_list_id = call!(visitor, make_field_list, struct_data.fields().len()); + for (field, value) in struct_data.fields().iter().zip(struct_data.values()) { + call!( + visitor, + visit_literal_string, + child_field_list_id, + field.name().into() + ); + visit_expression_scalar(visitor, value, child_value_list_id); + } + call!( + visitor, + visit_literal_struct, + sibling_list_id, + child_field_list_id, + child_value_list_id + ) + } + fn visit_expression_struct_expr( + visitor: &mut EngineExpressionVisitor, + exprs: &[Expression], + sibling_list_id: usize, + ) { + let child_list_id = call!(visitor, make_field_list, exprs.len()); + for expr in exprs { + visit_expression_impl(visitor, expr, child_list_id); + } + call!(visitor, visit_struct_expr, sibling_list_id, child_list_id) + } + fn visit_expression_variadic( + visitor: &mut EngineExpressionVisitor, + op: &VariadicOperator, + exprs: &[Expression], + sibling_list_id: usize, + ) { + let child_list_id = call!(visitor, make_field_list, exprs.len()); + for expr in exprs { + visit_expression_impl(visitor, expr, child_list_id); + } + + let visit_fn = match op { + VariadicOperator::And => &visitor.visit_and, + VariadicOperator::Or => &visitor.visit_or, + }; + visit_fn(visitor.data, sibling_list_id, child_list_id); + } + fn visit_expression_scalar( + visitor: &mut EngineExpressionVisitor, + scalar: &Scalar, + sibling_list_id: usize, + ) { + match scalar { + Scalar::Integer(val) => call!(visitor, visit_literal_int, sibling_list_id, *val), + Scalar::Long(val) => call!(visitor, visit_literal_long, sibling_list_id, *val), + Scalar::Short(val) => call!(visitor, visit_literal_short, sibling_list_id, *val), + Scalar::Byte(val) => call!(visitor, visit_literal_byte, sibling_list_id, *val), + Scalar::Float(val) => call!(visitor, visit_literal_float, sibling_list_id, *val), + Scalar::Double(val) => { + call!(visitor, visit_literal_double, sibling_list_id, *val) + } + Scalar::String(val) => { + call!(visitor, visit_literal_string, sibling_list_id, val.into()) + } + Scalar::Boolean(val) => call!(visitor, visit_literal_bool, sibling_list_id, *val), + Scalar::Timestamp(val) => { + call!(visitor, visit_literal_timestamp, sibling_list_id, *val) + } + Scalar::TimestampNtz(val) => { + call!(visitor, visit_literal_timestamp_ntz, sibling_list_id, *val) + } + Scalar::Date(val) => call!(visitor, visit_literal_date, sibling_list_id, *val), + Scalar::Binary(buf) => call!( + visitor, + visit_literal_binary, + sibling_list_id, + buf.as_ptr(), + buf.len() + ), + Scalar::Decimal(value, precision, scale) => { + let ms: u64 = (value >> 64) as u64; + let ls: u64 = *value as u64; + call!( + visitor, + visit_literal_decimal, + sibling_list_id, + ms, + ls, + *precision, + *scale + ) + } + Scalar::Null(_) => call!(visitor, visit_literal_null, sibling_list_id), + Scalar::Struct(struct_data) => { + visit_expression_struct_literal(visitor, struct_data, sibling_list_id) + } + Scalar::Array(array) => visit_expression_array(visitor, array, sibling_list_id), + } + } + fn visit_expression_impl( + visitor: &mut EngineExpressionVisitor, + expression: &Expression, + sibling_list_id: usize, + ) { + match expression { + Expression::Literal(scalar) => { + visit_expression_scalar(visitor, scalar, sibling_list_id) + } + Expression::Column(name) => { + call!(visitor, visit_column, sibling_list_id, name.as_str().into()) + } + Expression::Struct(exprs) => { + visit_expression_struct_expr(visitor, exprs, sibling_list_id) + } + Expression::BinaryOperation { op, left, right } => { + let child_list_id = call!(visitor, make_field_list, 2); + visit_expression_impl(visitor, left, child_list_id); + visit_expression_impl(visitor, right, child_list_id); + let op = match op { + BinaryOperator::Plus => visitor.visit_add, + BinaryOperator::Minus => visitor.visit_minus, + BinaryOperator::Multiply => visitor.visit_multiply, + BinaryOperator::Divide => visitor.visit_divide, + BinaryOperator::LessThan => visitor.visit_lt, + BinaryOperator::LessThanOrEqual => visitor.visit_le, + BinaryOperator::GreaterThan => visitor.visit_gt, + BinaryOperator::GreaterThanOrEqual => visitor.visit_ge, + BinaryOperator::Equal => visitor.visit_eq, + BinaryOperator::NotEqual => visitor.visit_ne, + BinaryOperator::Distinct => visitor.visit_distinct, + BinaryOperator::In => visitor.visit_in, + BinaryOperator::NotIn => visitor.visit_not_in, + }; + op(visitor.data, sibling_list_id, child_list_id); + } + Expression::UnaryOperation { op, expr } => { + let child_id_list = call!(visitor, make_field_list, 1); + visit_expression_impl(visitor, expr, child_id_list); + let op = match op { + UnaryOperator::Not => visitor.visit_not, + UnaryOperator::IsNull => visitor.visit_is_null, + }; + op(visitor.data, sibling_list_id, child_id_list); + } + Expression::VariadicOperation { op, exprs } => { + visit_expression_variadic(visitor, op, exprs, sibling_list_id) + } + } + } + let top_level = call!(visitor, make_field_list, 1); + visit_expression_impl(visitor, expression.as_ref(), top_level); + top_level +} diff --git a/ffi/src/expressions/mod.rs b/ffi/src/expressions/mod.rs new file mode 100644 index 000000000..a6756f972 --- /dev/null +++ b/ffi/src/expressions/mod.rs @@ -0,0 +1,10 @@ +//! This module holds functionality for moving expressions across the FFI boundary, both from +//! engine to kernel, and from kernel to engine. +use delta_kernel::Expression; +use delta_kernel_ffi_macros::handle_descriptor; + +pub mod engine; +pub mod kernel; + +#[handle_descriptor(target=Expression, mutable=false, sized=true)] +pub struct SharedExpression; diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index ff3d60f2e..290c3a09d 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -33,6 +33,8 @@ pub mod engine_funcs; pub mod expressions; pub mod scan; pub mod schema; +#[cfg(feature = "test-ffi")] +pub mod test_ffi; pub(crate) type NullableCvoid = Option>; diff --git a/ffi/src/scan.rs b/ffi/src/scan.rs index 8687c9a3a..d388a6e0c 100644 --- a/ffi/src/scan.rs +++ b/ffi/src/scan.rs @@ -13,7 +13,9 @@ use delta_kernel_ffi_macros::handle_descriptor; use tracing::debug; use url::Url; -use crate::expressions::{unwrap_kernel_expression, EnginePredicate, KernelExpressionVisitorState}; +use crate::expressions::engine::{ + unwrap_kernel_expression, EnginePredicate, KernelExpressionVisitorState, +}; use crate::{ AllocateStringFn, ExclusiveEngineData, ExternEngine, ExternResult, IntoExternResult, KernelBoolSlice, KernelRowIndexArray, KernelStringSlice, NullableCvoid, SharedExternEngine, diff --git a/ffi/src/test_ffi.rs b/ffi/src/test_ffi.rs new file mode 100644 index 000000000..fa7e2ef05 --- /dev/null +++ b/ffi/src/test_ffi.rs @@ -0,0 +1,95 @@ +//! Utility functions used for testing ffi code + +use std::{ops::Not, sync::Arc}; + +use crate::{expressions::SharedExpression, handle::Handle}; +use delta_kernel::{ + expressions::{column_expr, ArrayData, BinaryOperator, Expression, Scalar, StructData}, + schema::{ArrayType, DataType, StructField, StructType}, +}; + +/// Constructs a kernel expression that is passed back as a SharedExpression handle. The expected +/// output expression can be found in `ffi/tests/test_expression_visitor/expected.txt`. +/// +/// # Safety +/// The caller is responsible for freeing the retured memory, either by calling +/// [`free_kernel_predicate`], or [`Handle::drop_handle`] +#[no_mangle] +pub unsafe extern "C" fn get_testing_kernel_expression() -> Handle { + use Expression as Expr; + + let array_type = ArrayType::new( + DataType::Primitive(delta_kernel::schema::PrimitiveType::Short), + false, + ); + let array_data = ArrayData::new(array_type.clone(), vec![Scalar::Short(5), Scalar::Short(0)]); + + let nested_fields = vec![ + StructField::new("a", DataType::INTEGER, false), + StructField::new("b", array_type, false), + ]; + let nested_values = vec![Scalar::Integer(500), Scalar::Array(array_data.clone())]; + let nested_struct = StructData::try_new(nested_fields.clone(), nested_values).unwrap(); + let nested_struct_type = StructType::new(nested_fields); + + let top_level_struct = StructData::try_new( + vec![StructField::new( + "top", + DataType::Struct(Box::new(nested_struct_type)), + true, + )], + vec![Scalar::Struct(nested_struct)], + ) + .unwrap(); + + let mut sub_exprs = vec![ + Expr::literal(i8::MAX), + Expr::literal(i8::MIN), + Expr::literal(f32::MAX), + Expr::literal(f32::MIN), + Expr::literal(f64::MAX), + Expr::literal(f64::MIN), + Expr::literal(i32::MAX), + Expr::literal(i32::MIN), + Expr::literal(i64::MAX), + Expr::literal(i64::MIN), + Expr::literal("hello expressions"), + Expr::literal(true), + Expr::literal(false), + Scalar::Timestamp(50).into(), + Scalar::TimestampNtz(100).into(), + Scalar::Date(32).into(), + Scalar::Binary(0x0000deadbeefcafeu64.to_be_bytes().to_vec()).into(), + // Both the most and least significant u64 of the Decimal value will be 1 + Scalar::Decimal((1 << 64) + 1, 2, 3).into(), + Expr::null_literal(DataType::SHORT), + Scalar::Struct(top_level_struct).into(), + Scalar::Array(array_data).into(), + Expr::struct_from(vec![Expr::or_from(vec![ + Scalar::Integer(5).into(), + Scalar::Long(20).into(), + ])]), + Expr::not(Expr::is_null(column_expr!("col"))), + ]; + sub_exprs.extend( + [ + BinaryOperator::In, + BinaryOperator::Plus, + BinaryOperator::Minus, + BinaryOperator::Equal, + BinaryOperator::NotEqual, + BinaryOperator::NotIn, + BinaryOperator::Divide, + BinaryOperator::Multiply, + BinaryOperator::LessThan, + BinaryOperator::LessThanOrEqual, + BinaryOperator::GreaterThan, + BinaryOperator::GreaterThanOrEqual, + BinaryOperator::Distinct, + ] + .iter() + .map(|op| Expr::binary(*op, Scalar::Integer(0), Scalar::Long(0))), + ); + + Arc::new(Expr::and_from(sub_exprs)).into() +} diff --git a/ffi/tests/test-expression-visitor/expected.txt b/ffi/tests/test-expression-visitor/expected.txt new file mode 100644 index 000000000..4321df93d --- /dev/null +++ b/ffi/tests/test-expression-visitor/expected.txt @@ -0,0 +1,78 @@ +And + Byte(127) + Byte(-128) + Float(340282346638528859811704183484516925440.000000) + Float(-340282346638528859811704183484516925440.000000) + Double(179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000) + Double(-179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000) + Integer(2147483647) + Integer(-2147483648) + Long(9223372036854775807) + Long(-9223372036854775808) + String(hello expressions) + Boolean(1) + Boolean(0) + Timestamp(50) + TimestampNtz(100) + Date(32) + Binary(0000deadbeefcafe) + Decimal(1,1, 3, 2) + Null + Struct + Field: top + Struct + Field: a + Integer(500) + Field: b + Array + Short(5) + Short(0) + Array + Short(5) + Short(0) + StructExpression + Or + Integer(5) + Long(20) + Not + IsNull + Column(col) + In + Integer(0) + Long(0) + Add + Integer(0) + Long(0) + Minus + Integer(0) + Long(0) + Equal + Integer(0) + Long(0) + NotEqual + Integer(0) + Long(0) + NotIn + Integer(0) + Long(0) + Divide + Integer(0) + Long(0) + Multiply + Integer(0) + Long(0) + LessThan + Integer(0) + Long(0) + LessThanOrEqual + Integer(0) + Long(0) + GreaterThan + Integer(0) + Long(0) + GreaterThanOrEqual + Integer(0) + Long(0) + Distinct + Integer(0) + Long(0) diff --git a/ffi/tests/test-expression-visitor/run_test.sh b/ffi/tests/test-expression-visitor/run_test.sh new file mode 100755 index 000000000..2c00c9ea4 --- /dev/null +++ b/ffi/tests/test-expression-visitor/run_test.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +set -euxo pipefail + +OUT_FILE=$(mktemp) +./visit_expression | tee "$OUT_FILE" +diff -s "$OUT_FILE" "$1" +DIFF_EXIT_CODE=$? +echo "Diff exited with $DIFF_EXIT_CODE" +rm "$OUT_FILE" +exit "$DIFF_EXIT_CODE" + diff --git a/kernel/src/engine/parquet_stats_skipping/tests.rs b/kernel/src/engine/parquet_stats_skipping/tests.rs index b4bdd97d3..a723c4600 100644 --- a/kernel/src/engine/parquet_stats_skipping/tests.rs +++ b/kernel/src/engine/parquet_stats_skipping/tests.rs @@ -164,7 +164,7 @@ fn test_binary_scalars() { Struct(StructData::try_new(vec![], vec![]).unwrap()), Array(ArrayData::new( ArrayType::new(DataType::LONG, false), - vec![], + Vec::::new(), )), ]; let larger_values = &[ @@ -185,7 +185,7 @@ fn test_binary_scalars() { Struct(StructData::try_new(vec![], vec![]).unwrap()), Array(ArrayData::new( ArrayType::new(DataType::LONG, false), - vec![], + Vec::::new(), )), ]; diff --git a/kernel/src/expressions/column_names.rs b/kernel/src/expressions/column_names.rs index 4129abb30..ca0e2ca70 100644 --- a/kernel/src/expressions/column_names.rs +++ b/kernel/src/expressions/column_names.rs @@ -135,7 +135,7 @@ impl Hash for ColumnName { #[doc(hidden)] macro_rules! __column_name { ( $($name:tt)* ) => { - $crate::expressions::ColumnName::new(delta_kernel_derive::parse_column_name!($($name)*)) + $crate::expressions::ColumnName::new($crate::delta_kernel_derive::parse_column_name!($($name)*)) }; } #[doc(inline)] diff --git a/kernel/src/expressions/scalars.rs b/kernel/src/expressions/scalars.rs index 3578258e9..bbbabe17e 100644 --- a/kernel/src/expressions/scalars.rs +++ b/kernel/src/expressions/scalars.rs @@ -20,8 +20,9 @@ pub struct ArrayData { } impl ArrayData { - #[cfg(test)] - pub(crate) fn new(tpe: ArrayType, elements: Vec) -> Self { + #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] + pub fn new(tpe: ArrayType, elements: impl IntoIterator>) -> Self { + let elements = elements.into_iter().map(Into::into).collect(); Self { tpe, elements } } pub fn array_type(&self) -> &ArrayType { diff --git a/kernel/src/lib.rs b/kernel/src/lib.rs index 6d47d9ae2..2f686a3ad 100644 --- a/kernel/src/lib.rs +++ b/kernel/src/lib.rs @@ -76,6 +76,7 @@ pub mod table; pub mod transaction; pub(crate) mod utils; +pub use delta_kernel_derive; pub use engine_data::{DataVisitor, EngineData}; pub use error::{DeltaResult, Error}; pub use expressions::{Expression, ExpressionRef};