-
Notifications
You must be signed in to change notification settings - Fork 3.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Fix](Variant) fix some nested explode_variant_array bug and add more… #44533
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,12 +23,17 @@ | |
|
||
#include "common/status.h" | ||
#include "vec/columns/column.h" | ||
#include "vec/columns/column_array.h" | ||
#include "vec/columns/column_nothing.h" | ||
#include "vec/columns/column_object.h" | ||
#include "vec/core/block.h" | ||
#include "vec/core/column_with_type_and_name.h" | ||
#include "vec/data_types/data_type.h" | ||
#include "vec/data_types/data_type_array.h" | ||
#include "vec/data_types/data_type_nothing.h" | ||
#include "vec/exprs/vexpr.h" | ||
#include "vec/exprs/vexpr_context.h" | ||
#include "vec/functions/function_helpers.h" | ||
|
||
namespace doris::vectorized { | ||
#include "common/compile_check_begin.h" | ||
|
@@ -37,6 +42,34 @@ VExplodeTableFunction::VExplodeTableFunction() { | |
_fn_name = "vexplode"; | ||
} | ||
|
||
Status VExplodeTableFunction::_process_init_variant(Block* block, int value_column_idx) { | ||
// explode variant array | ||
const auto& variant_column = check_and_get_column<ColumnObject>( | ||
remove_nullable(block->get_by_position(value_column_idx) | ||
.column->convert_to_full_column_if_const()) | ||
.get()); | ||
_detail.output_as_variant = true; | ||
if (!variant_column->is_null_root()) { | ||
_array_column = variant_column->get_root(); | ||
// We need to wrap the output nested column within a variant column. | ||
// Otherwise the type is missmatched | ||
const auto* array_type = check_and_get_data_type<DataTypeArray>( | ||
remove_nullable(variant_column->get_root_type()).get()); | ||
if (array_type == nullptr) { | ||
return Status::NotSupported("explode not support none array type {}", | ||
variant_column->get_root_type()->get_name()); | ||
} | ||
_detail.nested_type = array_type->get_nested_type(); | ||
} else { | ||
// null root, use nothing type | ||
_array_column = ColumnNullable::create(ColumnArray::create(ColumnNothing::create(0)), | ||
ColumnUInt8::create(0)); | ||
_array_column->assume_mutable()->insert_many_defaults(variant_column->size()); | ||
_detail.nested_type = std::make_shared<DataTypeNothing>(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe here should wrap in nullable , because many calculate operator behavior just make nested column in array as nullable There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's redundant in column object case |
||
} | ||
return Status::OK(); | ||
} | ||
|
||
Status VExplodeTableFunction::process_init(Block* block, RuntimeState* state) { | ||
CHECK(_expr_context->root()->children().size() == 1) | ||
<< "VExplodeTableFunction only support 1 child but has " | ||
|
@@ -47,12 +80,7 @@ Status VExplodeTableFunction::process_init(Block* block, RuntimeState* state) { | |
&value_column_idx)); | ||
if (WhichDataType(remove_nullable(block->get_by_position(value_column_idx).type)) | ||
.is_variant_type()) { | ||
// explode variant array | ||
const auto& variant_column = check_and_get_column<ColumnObject>( | ||
remove_nullable(block->get_by_position(value_column_idx) | ||
.column->convert_to_full_column_if_const()) | ||
.get()); | ||
_array_column = variant_column->get_root(); | ||
RETURN_IF_ERROR(_process_init_variant(block, value_column_idx)); | ||
} else { | ||
_array_column = | ||
block->get_by_position(value_column_idx).column->convert_to_full_column_if_const(); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -164,13 +164,64 @@ parallel_pipeline_task_num=7,parallel_fragment_exec_instance_num=4,profile_level | |
sql """insert into var_nested2 select * from var_nested order by k limit 1024""" | ||
qt_sql """select /*+SET_VAR(batch_size=4064,broker_load_batch_size=16352,disable_streaming_preaggregations=true,enable_distinct_streaming_aggregation=true,parallel_fragment_exec_instance_num=5,parallel_pipeline_task_num=1,profile_level=1,enable_pipeline_engine=false,enable_parallel_scan=true,parallel_scan_max_scanners_count=48,parallel_scan_min_rows_per_scanner=16384,enable_fold_constant_by_be=true,enable_rewrite_element_at_to_slot=true,runtime_filter_type=12,enable_parallel_result_sink=false,enable_nereids_planner=true,rewrite_or_to_in_predicate_threshold=2,enable_function_pushdown=true,enable_common_expr_pushdown=false,enable_local_exchange=false,partitioned_hash_join_rows_threshold=1048576,partitioned_hash_agg_rows_threshold=8,partition_pruning_expand_threshold=10,enable_share_hash_table_for_broadcast_join=false,enable_two_phase_read_opt=true,enable_common_expr_pushdown_for_inverted_index=true,enable_delete_sub_predicate_v2=true,min_revocable_mem=33554432,fetch_remote_schema_timeout_seconds=120,max_fetch_remote_schema_tablet_count=512,enable_join_spill=false,enable_sort_spill=false,enable_agg_spill=false,enable_force_spill=false,data_queue_max_blocks=1,spill_streaming_agg_mem_limit=268435456,external_agg_partition_bits=5) */ * from var_nested2 order by k limit 10;""" | ||
qt_sql """select v['nested'] from var_nested2 where k < 10 order by k limit 10;""" | ||
// explode variant array | ||
// 0. nomal explode variant array | ||
order_qt_explode_sql """select count(),cast(vv['xx'] as int) from var_nested lateral view explode_variant_array(v['nested']) tmp as vv where vv['xx'] = 10 group by cast(vv['xx'] as int)""" | ||
sql """truncate table var_nested2""" | ||
sql """insert into var_nested2 values(1119111, '{"eventId":1,"firstName":"Name1","lastName":"Surname1","body":{"phoneNumbers":[{"number":"5550219210","type":"GSM","callLimit":5},{"number":"02124713252","type":"HOME","callLimit":3},{"number":"05550219211","callLimit":2,"type":"WORK"}]}} | ||
')""" | ||
order_qt_explode_sql """select v['eventId'], phone_numbers from var_nested2 lateral view explode_variant_array(v['body']['phoneNumbers']) tmp1 as phone_numbers | ||
where phone_numbers['type'] = 'GSM' OR phone_numbers['type'] = 'HOME' and phone_numbers['callLimit'] > 2;""" | ||
|
||
// test array_function | ||
sql "DROP TABLE IF EXISTS var_nested_array_agg" | ||
sql """ | ||
CREATE TABLE IF NOT EXISTS var_nested_array_agg( | ||
k bigint, | ||
v variant | ||
) | ||
UNIQUE KEY(`k`) | ||
DISTRIBUTED BY HASH(k) BUCKETS 1 | ||
properties("replication_num" = "1", "disable_auto_compaction" = "false", "enable_unique_key_merge_on_write" = "true", "variant_enable_flatten_nested" = "true"); | ||
""" | ||
sql "insert into var_nested_array_agg select * from var_nested" | ||
// 1. array_contains | ||
qt_sql "select * from var_nested_array_agg where array_contains(cast(v['nested']['xx'] as array<int>), 10) order by k limit 10" | ||
// 2. array_agg scalar | ||
sql "select k, array_agg(cast(v['nested'] as text)) from var_nested_array_agg group by k limit 10" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. qt_sql ? just sql will not show the output in out file There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the output is not stable since the serailized |
||
|
||
// test explode_variant_array with abonomal case | ||
sql "DROP TABLE IF EXISTS var_nested_explode_variant_with_abnomal" | ||
sql """ | ||
CREATE TABLE IF NOT EXISTS var_nested_explode_variant_with_abnomal( | ||
k bigint, | ||
v variant | ||
) | ||
UNIQUE KEY(`k`) | ||
DISTRIBUTED BY HASH(k) BUCKETS 1 | ||
properties("replication_num" = "1", "disable_auto_compaction" = "false", "enable_unique_key_merge_on_write" = "true", "variant_enable_flatten_nested" = "true"); | ||
""" | ||
sql "insert into var_nested_explode_variant_with_abnomal select * from var_nested" | ||
// 1. v['nested']['x'] is null root | ||
order_qt_explode_sql """select count(),cast(vv as int) from var_nested_explode_variant_with_abnomal lateral view explode_variant_array(v['nested']['x']) tmp as vv where vv = 10 group by cast(vv as int)""" | ||
// 2. v['nested']['xx'] is normal array | ||
order_qt_explode_sql """select count(),cast(vv as int) from var_nested_explode_variant_with_abnomal lateral view explode_variant_array(v['nested']['xx']) tmp as vv where vv = 10 group by cast(vv as int)""" | ||
// 3. v['xx'] is none array scalar type | ||
test { | ||
sql """select count(),cast(vv as int) from var_nested_explode_variant_with_abnomal lateral view explode_variant_array(v['xx']) tmp as vv where vv = 10 group by cast(vv as int)""" | ||
exception("explode not support none array type") | ||
} | ||
// 4. v['k1'] is json scalar type | ||
test { | ||
sql """select count(),cast(vv as int) from var_nested_explode_variant_with_abnomal lateral view explode_variant_array(v['k1']) tmp as vv where vv = 10 group by cast(vv as int)""" | ||
exception("explode not support none array type") | ||
} | ||
// 5. toplevel nested array | ||
sql "truncate table var_nested_explode_variant_with_abnomal" | ||
sql """insert into var_nested_explode_variant_with_abnomal values(1, '[{"a" : 10}, {"b" : "20", "c" :1024, "a" : 11}]')""" | ||
sql """insert into var_nested_explode_variant_with_abnomal values(2, '[{"a" : 10}, {"b" : "20", "a" : 150}]')""" | ||
order_qt_explode_sql """select count(),cast(vv as int) from var_nested_explode_variant_with_abnomal lateral view explode_variant_array(v['a']) tmp as vv where vv = 10 group by cast(vv as int)""" | ||
// FIXME after refator | ||
// order_qt_explode_sql """select count(),cast(vv as int) from var_nested_explode_variant_with_abnomal lateral view explode_variant_array(v) tmp as vv where vv['a'] = 10 group by cast(vv as int)""" | ||
} finally { | ||
// reset flags | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
here's bug which will lead to coredump