From 705012ef0d728f67afd6edf9d82265620f5e8086 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 28 Nov 2024 22:53:02 +0800 Subject: [PATCH] branch-3.0: [enhance](orc) Optimize ORC Predicate Pushdown for OR-connected Predicate #43255 (#44436) Cherry-picked from #43255 Co-authored-by: Socrates --- be/src/runtime/exec_env.h | 1 + be/src/vec/exec/format/orc/vorc_reader.cpp | 478 ++++++++++++------ be/src/vec/exec/format/orc/vorc_reader.h | 37 +- be/test/exec/test_data/orc_scanner/orders.orc | Bin 0 -> 1293 bytes be/test/testutil/desc_tbl_builder.cpp | 29 +- be/test/testutil/desc_tbl_builder.h | 17 +- be/test/vec/exec/orc_reader_test.cpp | 155 ++++++ .../external_table_p0/hive/test_hive_orc.out | 48 ++ .../hive/test_hive_orc.groovy | 12 + 9 files changed, 595 insertions(+), 182 deletions(-) create mode 100644 be/test/exec/test_data/orc_scanner/orders.orc create mode 100644 be/test/vec/exec/orc_reader_test.cpp diff --git a/be/src/runtime/exec_env.h b/be/src/runtime/exec_env.h index 031595a9c41b1d..1b92a6b3659e3f 100644 --- a/be/src/runtime/exec_env.h +++ b/be/src/runtime/exec_env.h @@ -291,6 +291,7 @@ class ExecEnv { static void set_tracking_memory(bool tracking_memory) { _s_tracking_memory.store(tracking_memory, std::memory_order_release); } + void set_orc_memory_pool(orc::MemoryPool* pool) { _orc_memory_pool = pool; } #endif LoadStreamMapPool* load_stream_map_pool() { return _load_stream_map_pool.get(); } diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index e2161d8a6dc48a..c649ef68617ff6 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -18,13 +18,14 @@ #include "vorc_reader.h" #include -#include #include +#include #include #include #include #include +#include // IWYU pragma: no_include #include // IWYU pragma: keep #include @@ -33,12 +34,10 @@ #include #include #include -#include #include "cctz/civil_time.h" #include "cctz/time_zone.h" #include "common/exception.h" -#include "exec/olap_utils.h" #include "exprs/create_predicate_function.h" #include "exprs/hybrid_set.h" #include "gutil/strings/substitute.h" @@ -55,6 +54,7 @@ #include "runtime/descriptors.h" #include "runtime/primitive_type.h" #include "runtime/thread_context.h" +#include "util/runtime_profile.h" #include "util/slice.h" #include "util/timezone_utils.h" #include "vec/columns/column.h" @@ -71,15 +71,13 @@ #include "vec/data_types/data_type_map.h" #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_struct.h" -#include "vec/exec/format/orc/orc_memory_pool.h" #include "vec/exec/format/table/transactional_hive_common.h" #include "vec/exprs/vbloom_predicate.h" #include "vec/exprs/vdirect_in_predicate.h" #include "vec/exprs/vectorized_fn_call.h" +#include "vec/exprs/vexpr_fwd.h" #include "vec/exprs/vin_predicate.h" -#include "vec/exprs/vliteral.h" #include "vec/exprs/vruntimefilter_wrapper.h" -#include "vec/exprs/vslot_ref.h" #include "vec/runtime/vdatetime_value.h" namespace doris { @@ -237,6 +235,10 @@ void OrcReader::_init_profile() { ADD_CHILD_TIMER_WITH_LEVEL(_profile, "DecodeNullMapTime", orc_profile, 1); _orc_profile.filter_block_time = ADD_CHILD_TIMER_WITH_LEVEL(_profile, "FilterBlockTime", orc_profile, 1); + _orc_profile.selected_row_group_count = + ADD_COUNTER_WITH_LEVEL(_profile, "SelectedRowGroupCount", TUnit::UNIT, 1); + _orc_profile.evaluated_row_group_count = + ADD_COUNTER_WITH_LEVEL(_profile, "EvaluatedRowGroupCount", TUnit::UNIT, 1); } } @@ -259,6 +261,7 @@ Status OrcReader::_create_file_reader() { try { orc::ReaderOptions options; options.setMemoryPool(*ExecEnv::GetInstance()->orc_memory_pool()); + options.setReaderMetrics(&_reader_metrics); _reader = orc::createReader( std::unique_ptr(_file_input_stream.release()), options); } catch (std::exception& e) { @@ -388,6 +391,9 @@ Status OrcReader::_init_read_columns() { } _col_name_to_file_col_name[col_name] = read_col; + // TODO: refactor this + std::transform(read_col.begin(), read_col.end(), read_col.begin(), ::tolower); + _col_name_to_file_col_name_low_case[col_name] = read_col; } } return Status::OK(); @@ -461,8 +467,10 @@ static std::unordered_map TYPEKIND_TO_PRE {orc::TypeKind::BOOLEAN, orc::PredicateDataType::BOOLEAN}}; template -std::tuple convert_to_orc_literal(const orc::Type* type, const void* value, - int precision, int scale) { +std::tuple convert_to_orc_literal(const orc::Type* type, + StringRef& literal_data, int precision, + int scale) { + const auto* value = literal_data.data; try { switch (type->getKind()) { case orc::TypeKind::BOOLEAN: @@ -486,8 +494,7 @@ std::tuple convert_to_orc_literal(const orc::Type* type, con case orc::TypeKind::CHAR: [[fallthrough]]; case orc::TypeKind::VARCHAR: { - StringRef* string_value = (StringRef*)value; - return std::make_tuple(true, orc::Literal(string_value->data, string_value->size)); + return std::make_tuple(true, orc::Literal(literal_data.data, literal_data.size)); } case orc::TypeKind::DECIMAL: { int128_t decimal_value; @@ -558,190 +565,353 @@ std::tuple convert_to_orc_literal(const orc::Type* type, con } } -template -std::vector value_range_to_predicate( - const ColumnValueRange& col_val_range, const orc::Type* type, - std::vector* unsupported_pushdown_types) { - std::vector predicates; - - PrimitiveType src_type = OrcReader::convert_to_doris_type(type).type; - if (src_type != primitive_type) { - if (!(is_string_type(src_type) && is_string_type(primitive_type))) { - // not support schema change - return predicates; - } +std::tuple OrcReader::_make_orc_literal( + const VSlotRef* slot_ref, const VLiteral* literal) { + auto file_col_name_low_case = _col_name_to_file_col_name_low_case[slot_ref->expr_name()]; + if (!_type_map.contains(file_col_name_low_case)) { + // TODO: this is for acid table + LOG(WARNING) << "Column " << slot_ref->expr_name() << " not found in _type_map"; + return std::make_tuple(false, orc::Literal(false), orc::PredicateDataType::LONG); + } + const auto* orc_type = _type_map[file_col_name_low_case]; + if (!TYPEKIND_TO_PREDICATE_TYPE.contains(orc_type->getKind())) { + LOG(WARNING) << "Unsupported Push Down Orc Type [TypeKind=" << orc_type->getKind() << "]"; + return std::make_tuple(false, orc::Literal(false), orc::PredicateDataType::LONG); + } + const auto predicate_type = TYPEKIND_TO_PREDICATE_TYPE[orc_type->getKind()]; + if (literal == nullptr) { + // only get the predicate_type + return std::make_tuple(true, orc::Literal(true), predicate_type); + } + auto literal_data = literal->get_column_ptr()->get_data_at(0); + auto* slot = _tuple_descriptor->slots()[slot_ref->column_id()]; + auto slot_type = slot->type(); + switch (slot_type.type) { +#define M(NAME) \ + case TYPE_##NAME: { \ + auto [valid, orc_literal] = convert_to_orc_literal( \ + orc_type, literal_data, slot_type.precision, slot_type.scale); \ + return std::make_tuple(valid, orc_literal, predicate_type); \ + } +#define APPLY_FOR_PRIMITIVE_TYPE(M) \ + M(TINYINT) \ + M(SMALLINT) \ + M(INT) \ + M(BIGINT) \ + M(LARGEINT) \ + M(CHAR) \ + M(DATE) \ + M(DATETIME) \ + M(DATEV2) \ + M(DATETIMEV2) \ + M(VARCHAR) \ + M(STRING) \ + M(HLL) \ + M(DECIMAL32) \ + M(DECIMAL64) \ + M(DECIMAL128I) \ + M(DECIMAL256) \ + M(DECIMALV2) \ + M(BOOLEAN) \ + M(IPV4) \ + M(IPV6) + APPLY_FOR_PRIMITIVE_TYPE(M) +#undef M + default: { + VLOG_CRITICAL << "Unsupported Convert Orc Literal [ColName=" << slot->col_name() << "]"; + return std::make_tuple(false, orc::Literal(false), predicate_type); + } } +} - if (unsupported_pushdown_types != nullptr) { - for (vector::iterator it = unsupported_pushdown_types->begin(); - it != unsupported_pushdown_types->end(); ++it) { - if (*it == type->getKind()) { - // Unsupported type - return predicates; - } - } +// check if the slot of expr can be pushed down to orc reader +bool OrcReader::_check_slot_can_push_down(const VExprSPtr& expr) { + if (!expr->children()[0]->is_slot_ref()) { + return false; } + const auto* slot_ref = static_cast(expr->children()[0].get()); + // check if the slot exists in orc file and not partition column + return _col_name_to_file_col_name.contains(slot_ref->expr_name()) && + !_lazy_read_ctx.predicate_partition_columns.contains(slot_ref->expr_name()); +} - orc::PredicateDataType predicate_data_type; - auto type_it = TYPEKIND_TO_PREDICATE_TYPE.find(type->getKind()); - if (type_it == TYPEKIND_TO_PREDICATE_TYPE.end()) { - // Unsupported type - return predicates; - } else { - predicate_data_type = type_it->second; +// check if there are rest children of expr can be pushed down to orc reader +bool OrcReader::_check_rest_children_can_push_down(const VExprSPtr& expr) { + if (expr->children().size() < 2) { + return false; } - if (col_val_range.is_fixed_value_range()) { - OrcPredicate in_predicate; - in_predicate.col_name = col_val_range.column_name(); - in_predicate.data_type = predicate_data_type; - in_predicate.op = SQLFilterOp::FILTER_IN; - for (const auto& value : col_val_range.get_fixed_value_set()) { - auto [valid, literal] = convert_to_orc_literal( - type, &value, col_val_range.precision(), col_val_range.scale()); - if (valid) { - in_predicate.literals.push_back(literal); - } + for (size_t i = 1; i < expr->children().size(); ++i) { + if (!expr->children()[i]->is_literal()) { + return false; } - if (!in_predicate.literals.empty()) { - predicates.emplace_back(in_predicate); + } + return true; +} + +// check if the expr can be pushed down to orc reader +bool OrcReader::_check_expr_can_push_down(const VExprSPtr& expr) { + DCHECK(expr != nullptr); + switch (expr->op()) { + case TExprOpcode::COMPOUND_AND: + // at least one child can be pushed down + return std::ranges::any_of(expr->children(), [this](const auto& child) { + return _check_expr_can_push_down(child); + }); + case TExprOpcode::COMPOUND_OR: + // all children must be pushed down + return std::ranges::all_of(expr->children(), [this](const auto& child) { + return _check_expr_can_push_down(child); + }); + case TExprOpcode::COMPOUND_NOT: + DCHECK_EQ(expr->children().size(), 1); + return _check_expr_can_push_down(expr->children()[0]); + + case TExprOpcode::GE: + case TExprOpcode::GT: + case TExprOpcode::LE: + case TExprOpcode::LT: + case TExprOpcode::EQ: + case TExprOpcode::NE: + case TExprOpcode::FILTER_IN: + case TExprOpcode::FILTER_NOT_IN: + return _check_slot_can_push_down(expr) && _check_rest_children_can_push_down(expr); + + case TExprOpcode::INVALID_OPCODE: + if (expr->node_type() == TExprNodeType::FUNCTION_CALL) { + auto fn_name = expr->fn().name.function_name; + // only support is_null_pred and is_not_null_pred + if (fn_name == "is_null_pred" || fn_name == "is_not_null_pred") { + return _check_slot_can_push_down(expr); + } + VLOG_CRITICAL << "Unsupported function [funciton=" << fn_name << "]"; } - return predicates; + return false; + default: + VLOG_CRITICAL << "Unsupported Opcode [OpCode=" << expr->op() << "]"; + return false; + } +} + +bool OrcReader::_build_less_than(const VExprSPtr& expr, + std::unique_ptr& builder) { + DCHECK(expr->children().size() == 2); + DCHECK(expr->children()[0]->is_slot_ref()); + DCHECK(expr->children()[1]->is_literal()); + const auto* slot_ref = static_cast(expr->children()[0].get()); + const auto* literal = static_cast(expr->children()[1].get()); + auto [valid, orc_literal, predicate_type] = _make_orc_literal(slot_ref, literal); + if (!valid) { + return false; } + builder->lessThan(slot_ref->expr_name(), predicate_type, orc_literal); + return true; +} - const auto& high_value = col_val_range.get_range_max_value(); - const auto& low_value = col_val_range.get_range_min_value(); - const auto& high_op = col_val_range.get_range_high_op(); - const auto& low_op = col_val_range.get_range_low_op(); +bool OrcReader::_build_less_than_equals(const VExprSPtr& expr, + std::unique_ptr& builder) { + DCHECK(expr->children().size() == 2); + DCHECK(expr->children()[0]->is_slot_ref()); + DCHECK(expr->children()[1]->is_literal()); + const auto* slot_ref = static_cast(expr->children()[0].get()); + const auto* literal = static_cast(expr->children()[1].get()); + auto [valid, orc_literal, predicate_type] = _make_orc_literal(slot_ref, literal); + if (!valid) { + return false; + } + builder->lessThanEquals(slot_ref->expr_name(), predicate_type, orc_literal); + return true; +} - // orc can only push down is_null. When col_value_range._contain_null = true, only indicating that - // value can be null, not equals null, so ignore _contain_null in col_value_range - if (col_val_range.is_high_value_maximum() && high_op == SQLFilterOp::FILTER_LESS_OR_EQUAL && - col_val_range.is_low_value_mininum() && low_op == SQLFilterOp::FILTER_LARGER_OR_EQUAL) { - return predicates; +bool OrcReader::_build_equals(const VExprSPtr& expr, + std::unique_ptr& builder) { + DCHECK(expr->children().size() == 2); + DCHECK(expr->children()[0]->is_slot_ref()); + DCHECK(expr->children()[1]->is_literal()); + const auto* slot_ref = static_cast(expr->children()[0].get()); + const auto* literal = static_cast(expr->children()[1].get()); + auto [valid, orc_literal, predicate_type] = _make_orc_literal(slot_ref, literal); + if (!valid) { + return false; } + builder->equals(slot_ref->expr_name(), predicate_type, orc_literal); + return true; +} - if (low_value < high_value) { - if (!col_val_range.is_low_value_mininum() || - SQLFilterOp::FILTER_LARGER_OR_EQUAL != low_op) { - auto [valid, low_literal] = convert_to_orc_literal( - type, &low_value, col_val_range.precision(), col_val_range.scale()); - if (valid) { - OrcPredicate low_predicate; - low_predicate.col_name = col_val_range.column_name(); - low_predicate.data_type = predicate_data_type; - low_predicate.op = low_op; - low_predicate.literals.emplace_back(low_literal); - predicates.emplace_back(low_predicate); - } - } - if (!col_val_range.is_high_value_maximum() || - SQLFilterOp::FILTER_LESS_OR_EQUAL != high_op) { - auto [valid, high_literal] = convert_to_orc_literal( - type, &high_value, col_val_range.precision(), col_val_range.scale()); - if (valid) { - OrcPredicate high_predicate; - high_predicate.col_name = col_val_range.column_name(); - high_predicate.data_type = predicate_data_type; - high_predicate.op = high_op; - high_predicate.literals.emplace_back(high_literal); - predicates.emplace_back(high_predicate); - } +bool OrcReader::_build_filter_in(const VExprSPtr& expr, + std::unique_ptr& builder) { + DCHECK(expr->children().size() >= 2); + DCHECK(expr->children()[0]->is_slot_ref()); + const auto* slot_ref = static_cast(expr->children()[0].get()); + std::vector literals; + orc::PredicateDataType predicate_type = orc::PredicateDataType::LONG; + for (size_t i = 1; i < expr->children().size(); ++i) { + DCHECK(expr->children()[i]->is_literal()); + const auto* literal = static_cast(expr->children()[i].get()); + auto [valid, orc_literal, type] = _make_orc_literal(slot_ref, literal); + if (!valid) { + return false; } + literals.emplace_back(orc_literal); + predicate_type = type; } - return predicates; + DCHECK(!literals.empty()); + builder->in(slot_ref->expr_name(), predicate_type, literals); + return true; } -bool static build_search_argument(std::vector& predicates, int index, - std::unique_ptr& builder) { - if (index >= predicates.size()) { +bool OrcReader::_build_is_null(const VExprSPtr& expr, + std::unique_ptr& builder) { + DCHECK(expr->children().size() == 1); + DCHECK(expr->children()[0]->is_slot_ref()); + const auto* slot_ref = static_cast(expr->children()[0].get()); + auto [valid, _, predicate_type] = _make_orc_literal(slot_ref, nullptr); + builder->isNull(slot_ref->expr_name(), predicate_type); + return true; +} + +bool OrcReader::_build_search_argument(const VExprSPtr& expr, + std::unique_ptr& builder) { + if (expr == nullptr) { return false; } - if (index < predicates.size() - 1) { - builder->startAnd(); + + // if expr can not be pushed down, skip it and continue to next expr + if (!_check_expr_can_push_down(expr)) { + return false; } - OrcPredicate& predicate = predicates[index]; - switch (predicate.op) { - case SQLFilterOp::FILTER_IN: { - if (predicate.literals.size() == 1) { - builder->equals(predicate.col_name, predicate.data_type, predicate.literals[0]); - } else { - builder->in(predicate.col_name, predicate.data_type, predicate.literals); + + switch (expr->op()) { + case TExprOpcode::COMPOUND_AND: { + bool at_least_one_can_push_down = false; + builder->startAnd(); + for (const auto& child : expr->children()) { + if (_build_search_argument(child, builder)) { + at_least_one_can_push_down = true; + } + } + if (!at_least_one_can_push_down) { + // if all exprs can not be pushed down, builder->end() will throw exception + return false; } + builder->end(); break; } - case SQLFilterOp::FILTER_LESS: - builder->lessThan(predicate.col_name, predicate.data_type, predicate.literals[0]); + case TExprOpcode::COMPOUND_OR: + builder->startOr(); + for (const auto& child : expr->children()) { + if (!_build_search_argument(child, builder)) { + return false; + } + } + builder->end(); + break; + case TExprOpcode::COMPOUND_NOT: + builder->startNot(); + DCHECK_EQ(expr->children().size(), 1); + if (!_build_search_argument(expr->children()[0], builder)) { + return false; + } + builder->end(); break; - case SQLFilterOp::FILTER_LESS_OR_EQUAL: - builder->lessThanEquals(predicate.col_name, predicate.data_type, predicate.literals[0]); + case TExprOpcode::GE: + builder->startNot(); + if (!_build_less_than(expr, builder)) { + return false; + } + builder->end(); break; - case SQLFilterOp::FILTER_LARGER: { + case TExprOpcode::GT: builder->startNot(); - builder->lessThanEquals(predicate.col_name, predicate.data_type, predicate.literals[0]); + if (!_build_less_than_equals(expr, builder)) { + return false; + } builder->end(); break; - } - case SQLFilterOp::FILTER_LARGER_OR_EQUAL: { + case TExprOpcode::LE: + if (!_build_less_than_equals(expr, builder)) { + return false; + } + break; + case TExprOpcode::LT: + if (!_build_less_than(expr, builder)) { + return false; + } + break; + case TExprOpcode::EQ: + if (!_build_equals(expr, builder)) { + return false; + } + break; + case TExprOpcode::NE: builder->startNot(); - builder->lessThan(predicate.col_name, predicate.data_type, predicate.literals[0]); + if (!_build_equals(expr, builder)) { + return false; + } builder->end(); break; - } - default: - return false; - } - if (index < predicates.size() - 1) { - bool can_build = build_search_argument(predicates, index + 1, builder); - if (!can_build) { + case TExprOpcode::FILTER_IN: + if (!_build_filter_in(expr, builder)) { + return false; + } + break; + case TExprOpcode::FILTER_NOT_IN: + builder->startNot(); + if (!_build_filter_in(expr, builder)) { return false; } builder->end(); + break; + // is null and is not null is represented as function call + case TExprOpcode::INVALID_OPCODE: { + DCHECK(expr->node_type() == TExprNodeType::FUNCTION_CALL); + if (expr->fn().name.function_name == "is_null_pred") { + if (!_build_is_null(expr, builder)) { + return false; + } + } else if (expr->fn().name.function_name == "is_not_null_pred") { + builder->startNot(); + if (!_build_is_null(expr, builder)) { + return false; + } + builder->end(); + } else { + __builtin_unreachable(); + } + break; + } + default: { + // should not reach here, because _check_expr_can_push_down has already checked + __builtin_unreachable(); + } } return true; } -bool OrcReader::_init_search_argument( - std::unordered_map* colname_to_value_range) { - if ((!_enable_filter_by_min_max) || colname_to_value_range->empty()) { +bool OrcReader::_init_search_argument(const VExprContextSPtrs& conjuncts) { + if (!_enable_filter_by_min_max) { return false; } - std::vector predicates; - auto& root_type = _reader->getType(); - std::unordered_map type_map; - for (int i = 0; i < root_type.getSubtypeCount(); ++i) { - type_map.emplace(get_field_name_lower_case(&root_type, i), root_type.getSubtype(i)); - } - for (auto& col_name : _lazy_read_ctx.all_read_columns) { - auto iter = colname_to_value_range->find(col_name); - if (iter == colname_to_value_range->end()) { - continue; - } - auto type_it = type_map.find(_col_name_to_file_col_name[col_name]); - if (type_it == type_map.end()) { - continue; + + // build search argument, if any expr can not be pushed down, return false + auto builder = orc::SearchArgumentFactory::newBuilder(); + bool at_least_one_can_push_down = false; + builder->startAnd(); + for (const auto& expr_ctx : conjuncts) { + if (_build_search_argument(expr_ctx->root(), builder)) { + at_least_one_can_push_down = true; } - std::visit( - [&](auto& range) { - std::vector value_predicates = value_range_to_predicate( - range, type_it->second, _unsupported_pushdown_types); - for (auto& range_predicate : value_predicates) { - predicates.emplace_back(range_predicate); - } - }, - iter->second); } - if (predicates.empty()) { - return false; - } - std::unique_ptr builder = orc::SearchArgumentFactory::newBuilder(); - if (build_search_argument(predicates, 0, builder)) { - std::unique_ptr sargs = builder->build(); - _row_reader_options.searchArgument(std::move(sargs)); - return true; - } else { + if (!at_least_one_can_push_down) { + // if all exprs can not be pushed down, builder->end() will throw exception return false; } + builder->end(); + + auto sargs = builder->build(); + _profile->add_info_string("OrcReader SearchArgument: ", sargs->toString()); + _row_reader_options.searchArgument(std::move(sargs)); + return true; } Status OrcReader::set_fill_columns( @@ -854,7 +1024,7 @@ Status OrcReader::set_fill_columns( _lazy_read_ctx.can_lazy_read = true; } - if (_colname_to_value_range == nullptr || !_init_search_argument(_colname_to_value_range)) { + if (_lazy_read_ctx.conjuncts.empty() || !_init_search_argument(_lazy_read_ctx.conjuncts)) { _lazy_read_ctx.can_lazy_read = false; } try { @@ -1619,6 +1789,12 @@ std::string OrcReader::get_field_name_lower_case(const orc::Type* orc_type, int Status OrcReader::get_next_block(Block* block, size_t* read_rows, bool* eof) { RETURN_IF_ERROR(get_next_block_impl(block, read_rows, eof)); + if (*eof) { + COUNTER_UPDATE(_orc_profile.selected_row_group_count, + _reader_metrics.SelectedRowGroupCount); + COUNTER_UPDATE(_orc_profile.evaluated_row_group_count, + _reader_metrics.EvaluatedRowGroupCount); + } if (_orc_filter) { RETURN_IF_ERROR(_orc_filter->get_status()); } diff --git a/be/src/vec/exec/format/orc/vorc_reader.h b/be/src/vec/exec/format/orc/vorc_reader.h index 0807f4949e5850..8c73957e79e4e0 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.h +++ b/be/src/vec/exec/format/orc/vorc_reader.h @@ -18,9 +18,9 @@ #pragma once #include -#include -#include +#include +#include #include #include #include @@ -51,6 +51,8 @@ #include "vec/exec/format/format_common.h" #include "vec/exec/format/generic_reader.h" #include "vec/exec/format/table/transactional_hive_reader.h" +#include "vec/exprs/vliteral.h" +#include "vec/exprs/vslot_ref.h" namespace doris { class RuntimeState; @@ -80,13 +82,6 @@ namespace doris::vectorized { class ORCFileInputStream; -struct OrcPredicate { - std::string col_name; - orc::PredicateDataType data_type; - std::vector literals; - SQLFilterOp op; -}; - struct LazyReadContext { VExprContextSPtrs conjuncts; bool can_lazy_read = false; @@ -228,6 +223,8 @@ class OrcReader : public GenericReader { RuntimeProfile::Counter* decode_value_time = nullptr; RuntimeProfile::Counter* decode_null_map_time = nullptr; RuntimeProfile::Counter* filter_block_time = nullptr; + RuntimeProfile::Counter* selected_row_group_count = nullptr; + RuntimeProfile::Counter* evaluated_row_group_count = nullptr; }; class ORCFilterImpl : public orc::ORCFilter { @@ -291,8 +288,23 @@ class OrcReader : public GenericReader { bool* is_hive1_orc); static bool _check_acid_schema(const orc::Type& type); static const orc::Type& _remove_acid(const orc::Type& type); - bool _init_search_argument( - std::unordered_map* colname_to_value_range); + std::tuple _make_orc_literal( + const VSlotRef* slot_ref, const VLiteral* literal); + bool _check_slot_can_push_down(const VExprSPtr& expr); + bool _check_rest_children_can_push_down(const VExprSPtr& expr); + bool _check_expr_can_push_down(const VExprSPtr& expr); + bool _build_less_than(const VExprSPtr& expr, + std::unique_ptr& builder); + bool _build_less_than_equals(const VExprSPtr& expr, + std::unique_ptr& builder); + bool _build_equals(const VExprSPtr& expr, std::unique_ptr& builder); + bool _build_filter_in(const VExprSPtr& expr, + std::unique_ptr& builder); + bool _build_is_null(const VExprSPtr& expr, + std::unique_ptr& builder); + bool _build_search_argument(const VExprSPtr& expr, + std::unique_ptr& builder); + bool _init_search_argument(const VExprContextSPtrs& conjuncts); void _init_bloom_filter( std::unordered_map* colname_to_value_range); void _init_system_properties(); @@ -578,11 +590,14 @@ class OrcReader : public GenericReader { bool _is_hive1_orc_or_use_idx = false; std::unordered_map _col_name_to_file_col_name; + // TODO: check if we can remove _col_name_to_file_col_name_low_case + std::unordered_map _col_name_to_file_col_name_low_case; std::unordered_map _type_map; std::vector _col_orc_type; std::unique_ptr _file_input_stream; Statistics _statistics; OrcProfile _orc_profile; + orc::ReaderMetrics _reader_metrics; std::unique_ptr _batch; std::unique_ptr _reader; diff --git a/be/test/exec/test_data/orc_scanner/orders.orc b/be/test/exec/test_data/orc_scanner/orders.orc new file mode 100644 index 0000000000000000000000000000000000000000..6fad5043288d430f6f158fd3efedf5050ad185c3 GIT binary patch literal 1293 zcmbtT&ubGw6n?WmXD7SqbWL@#ly;2~V_R+dBW(;Q!KjoX&C-hz#TeSC!8BWv&7mG7 zSn%Mosri|?xy;4cc{?6SJNruyCXs5W-w26vlzwJz`auSPAn%{Wn+AosGd(8tK;3{+? z01Y7SB7VstB6;W-0B?VO;<&g0d0gCs#cA9X-XH|MQGnf}rh5+_HpIJOJzPh|Kb``& zEj!;P(wR8u7DM-X zRw+f<#Bcl&>-nQ3tZg28Lfzh4{%ac~o+lyJ^Fe8RLWZu~y-Q`@RU8XnUnH?f+&3ge z5}SmYwm~FWQ8k?!4)e=?a}|AB-Ea)QywYw3y{WYew}ML0ZaJo34*Z}}UvJh{s!jxx zWHz>1398P3HFh6SzgY`5932UIo_C^tx%H^FJ`^YAq`$USZ3GF4%9fJX6B>=C%(R5- z0UWxe5O+sUlUyiD?vNre#pOc$7jX&ZivR!s literal 0 HcmV?d00001 diff --git a/be/test/testutil/desc_tbl_builder.cpp b/be/test/testutil/desc_tbl_builder.cpp index 4cba9a44a4b0d1..6404d1c5449165 100644 --- a/be/test/testutil/desc_tbl_builder.cpp +++ b/be/test/testutil/desc_tbl_builder.cpp @@ -17,20 +17,9 @@ #include "testutil/desc_tbl_builder.h" -#include -#include -#include +#include -#include - -#include "common/object_pool.h" #include "common/status.h" -#include "gtest/gtest_pred_impl.h" -#include "runtime/define_primitive_type.h" -#include "runtime/descriptors.h" -#include "util/bit_util.h" - -using std::vector; namespace doris { @@ -44,7 +33,7 @@ TupleDescBuilder& DescriptorTblBuilder::declare_tuple() { // item_id of -1 indicates no itemTupleId static TSlotDescriptor make_slot_descriptor(int id, int parent_id, const TypeDescriptor& type, - int slot_idx, int item_id) { + const std::string& name, int slot_idx, int item_id) { int null_byte = slot_idx / 8; int null_bit = slot_idx % 8; TSlotDescriptor slot_desc; @@ -58,6 +47,7 @@ static TSlotDescriptor make_slot_descriptor(int id, int parent_id, const TypeDes slot_desc.__set_nullIndicatorBit(null_bit); slot_desc.__set_slotIdx(slot_idx); slot_desc.__set_isMaterialized(true); + slot_desc.__set_colName(name); // if (item_id != -1) { // slot_desc.__set_itemTupleId(item_id); // } @@ -78,8 +68,9 @@ DescriptorTbl* DescriptorTblBuilder::build() { int tuple_id = 0; int slot_id = 0; - for (int i = 0; i < _tuples_descs.size(); ++i) { - build_tuple(_tuples_descs[i]->slot_types(), &thrift_desc_tbl, &tuple_id, &slot_id); + for (auto& _tuples_desc : _tuples_descs) { + build_tuple(_tuples_desc->slot_types(), _tuples_desc->slot_names(), &thrift_desc_tbl, + &tuple_id, &slot_id); } Status status = DescriptorTbl::create(_obj_pool, thrift_desc_tbl, &desc_tbl); @@ -87,7 +78,8 @@ DescriptorTbl* DescriptorTblBuilder::build() { return desc_tbl; } -TTupleDescriptor DescriptorTblBuilder::build_tuple(const vector& slot_types, +TTupleDescriptor DescriptorTblBuilder::build_tuple(const std::vector& slot_types, + const std::vector& slot_names, TDescriptorTable* thrift_desc_tbl, int* next_tuple_id, int* slot_id) { // We never materialize struct slots (there's no in-memory representation of structs, @@ -95,7 +87,8 @@ TTupleDescriptor DescriptorTblBuilder::build_tuple(const vector& // still have a struct item type. In this case, the array item tuple contains the // "inlined" struct fields. if (slot_types.size() == 1 && slot_types[0].type == TYPE_STRUCT) { - return build_tuple(slot_types[0].children, thrift_desc_tbl, next_tuple_id, slot_id); + return build_tuple(slot_types[0].children, slot_types[0].field_names, thrift_desc_tbl, + next_tuple_id, slot_id); } int tuple_id = *next_tuple_id; @@ -111,7 +104,7 @@ TTupleDescriptor DescriptorTblBuilder::build_tuple(const vector& // } thrift_desc_tbl->slotDescriptors.push_back( - make_slot_descriptor(*slot_id, tuple_id, slot_types[i], i, item_id)); + make_slot_descriptor(*slot_id, tuple_id, slot_types[i], slot_names[i], i, item_id)); thrift_desc_tbl->__isset.slotDescriptors = true; ++(*slot_id); } diff --git a/be/test/testutil/desc_tbl_builder.h b/be/test/testutil/desc_tbl_builder.h index c29ef9acd43eb5..968b29bd00143c 100644 --- a/be/test/testutil/desc_tbl_builder.h +++ b/be/test/testutil/desc_tbl_builder.h @@ -20,15 +20,16 @@ #include +#include #include +#include "common/object_pool.h" +#include "runtime/descriptors.h" #include "runtime/types.h" namespace doris { -class ObjectPool; class TupleDescBuilder; -class DescriptorTbl; // Aids in the construction of a DescriptorTbl by declaring tuples and slots // associated with those tuples. @@ -40,6 +41,7 @@ class DescriptorTbl; // DescriptorTblBuilder builder; // builder.declare_tuple() << TYPE_TINYINT << TYPE_TIMESTAMP; // gets TupleId 0 // builder.declare_tuple() << TYPE_FLOAT; // gets TupleId 1 +// builder.declare_tuple() << std::make_tuple(TYPE_INT, "col1") << std::make_tuple(TYPE_STRING, "col2"); // gets Tuple with type and name // DescriptorTbl desc_tbl = builder.build(); class DescriptorTblBuilder { public: @@ -57,20 +59,31 @@ class DescriptorTblBuilder { std::vector _tuples_descs; TTupleDescriptor build_tuple(const std::vector& slot_types, + const std::vector& slot_names, TDescriptorTable* thrift_desc_tbl, int* tuple_id, int* slot_id); }; class TupleDescBuilder { public: + using SlotType = std::tuple; + TupleDescBuilder& operator<<(const SlotType& slot) { + _slot_types.push_back(std::get<0>(slot)); + _slot_names.push_back(std::get<1>(slot)); + return *this; + } + TupleDescBuilder& operator<<(const TypeDescriptor& slot_type) { _slot_types.push_back(slot_type); + _slot_names.emplace_back(""); return *this; } std::vector slot_types() const { return _slot_types; } + std::vector slot_names() const { return _slot_names; } private: std::vector _slot_types; + std::vector _slot_names; }; } // end namespace doris diff --git a/be/test/vec/exec/orc_reader_test.cpp b/be/test/vec/exec/orc_reader_test.cpp new file mode 100644 index 00000000000000..ec5bd9b519de63 --- /dev/null +++ b/be/test/vec/exec/orc_reader_test.cpp @@ -0,0 +1,155 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include + +#include +#include +#include +#include + +#include "orc/sargs/SearchArgument.hh" +#include "runtime/define_primitive_type.h" +#include "runtime/exec_env.h" +#include "runtime/runtime_state.h" +#include "testutil/desc_tbl_builder.h" +#include "vec/exec/format/orc/orc_memory_pool.h" +#include "vec/exec/format/orc/vorc_reader.h" +#include "vec/exprs/vexpr_context.h" +#include "vec/exprs/vexpr_fwd.h" +#include "vec/utils/util.hpp" +namespace doris::vectorized { +class OrcReaderTest : public testing::Test { +public: + OrcReaderTest() = default; + ~OrcReaderTest() override = default; + +private: + static constexpr const char* CANNOT_PUSH_DOWN_ERROR = "can't push down"; + std::string build_search_argument(const std::string& expr) { + // build orc_reader for table orders + std::vector column_names = { + "o_orderkey", "o_custkey", "o_orderstatus", "o_totalprice", "o_orderdate", + "o_orderpriority", "o_clerk", "o_shippriority", "o_comment"}; + ObjectPool object_pool; + DescriptorTblBuilder builder(&object_pool); + builder.declare_tuple() << std::make_tuple(TYPE_INT, "o_orderkey") + << std::make_tuple(TYPE_INT, "o_custkey") + << std::make_tuple(TYPE_STRING, "o_orderstatus") + << std::make_tuple(TYPE_DOUBLE, "o_totalprice") + << std::make_tuple(TYPE_DATE, "o_orderdate") + << std::make_tuple(TYPE_STRING, "o_orderpriority") + << std::make_tuple(TYPE_STRING, "o_clerk") + << std::make_tuple(TYPE_INT, "o_shippriority") + << std::make_tuple(TYPE_STRING, "o_comment"); + DescriptorTbl* desc_tbl = builder.build(); + auto* tuple_desc = const_cast(desc_tbl->get_tuple_descriptor(0)); + RowDescriptor row_desc(tuple_desc, false); + TFileScanRangeParams params; + TFileRangeDesc range; + range.path = "./be/test/exec/test_data/orc_scanner/orders.orc"; + range.start_offset = 0; + range.size = 1293; + auto reader = OrcReader::create_unique(params, range, "", nullptr, true); + auto status = reader->init_reader(&column_names, nullptr, {}, false, tuple_desc, &row_desc, + nullptr, nullptr); + EXPECT_TRUE(status.ok()); + + // deserialize expr + auto exprx = apache::thrift::from_json_string(expr); + VExprContextSPtr context; + status = VExpr::create_expr_tree(exprx, context); + EXPECT_TRUE(status.ok()); + + // prepare expr context + RuntimeState state; + state.set_desc_tbl(desc_tbl); + status = context->prepare(&state, row_desc); + EXPECT_TRUE(status.ok()); + + // build search argument + auto sarg_builder = orc::SearchArgumentFactory::newBuilder(); + auto res = reader->_build_search_argument(context->root(), sarg_builder); + if (!res) { + return CANNOT_PUSH_DOWN_ERROR; + } + return sarg_builder->build()->toString(); + } +}; + +TEST_F(OrcReaderTest, test_build_search_argument) { + ExecEnv::GetInstance()->set_orc_memory_pool(new ORCMemoryPool()); + std:: + vector + exprs = + { + // select count(o_orderkey) from tpch1_orc.orders where o_orderkey < 100 or o_orderkey > 5999900 or o_orderkey in (1000000, 2000000, 3000000); + R"|({"1":{"lst":["rec",13,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":11},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"lt(int, int)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":5},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":100}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":13},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"gt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"gt(int, int)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":5},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":5999900}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":11},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":5},"4":{"i32":4},"11":{"rec":{"1":{"tf":0}}},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":1000000}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":2000000}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":3000000}}},"20":{"i32":-1},"29":{"tf":0}}]}})|", + // select count(o_orderkey) from tpch1_orc.orders where o_orderkey is null or (o_orderkey between 100 and 1000 and o_orderkey not in (200, 300, 400)); + R"|({"1":{"lst":["rec",16,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"4":{"i32":1},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"is_null_pred"}}},"2":{"i32":0},"3":{"lst":["rec",1,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"is_null_pred(int)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"29":{"tf":0}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":2},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":2},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":14},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"ge"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"ge(int, int)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":5},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":100}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":12},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"le"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"le(int, int)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":5},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":1000}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":11},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":6},"4":{"i32":4},"11":{"rec":{"1":{"tf":1}}},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":200}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":300}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":400}}},"20":{"i32":-1},"29":{"tf":0}}]}})|", + // select count(o_orderkey) from tpch1_orc.orders where o_orderkey is null or (o_orderkey between 1000000 and 1200000 and o_orderkey != 1100000); + R"|({"1":{"lst":["rec",14,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"4":{"i32":1},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"is_null_pred"}}},"2":{"i32":0},"3":{"lst":["rec",1,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"is_null_pred(int)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"29":{"tf":0}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":2},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":2},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":14},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"ge"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"ge(int, int)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":5},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":1000000}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":12},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"le"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"le(int, int)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":5},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":1200000}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":10},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"ne"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"ne(int, int)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":5},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":1100000}}},"20":{"i32":-1},"29":{"tf":0}}]}})|", + // SELECT count(o_orderkey) FROM tpch1_orc.orders WHERE o_orderkey IN (1000000, 2000000, 3000000) OR (o_orderdate >= '1994-01-01' AND o_orderdate <= '1994-12-31'); + R"|({"1":{"lst":["rec",13,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":11},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":5},"4":{"i32":4},"11":{"rec":{"1":{"tf":0}}},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":1000000}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":2000000}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":3000000}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":2},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":14},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"ge"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":26}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":26}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"ge(datev2, datev2)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":26},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":26}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":4},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderdate"}},{"1":{"i32":7},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":26}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"8":{"rec":{"1":{"str":"1994-01-01"}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":12},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"le"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":26}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":26}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"le(datev2, datev2)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":26},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":26}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":4},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderdate"}},{"1":{"i32":7},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":26}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"8":{"rec":{"1":{"str":"1994-12-31"}}},"20":{"i32":-1},"29":{"tf":0}}]}})|", + // select count(o_orderkey) from tpch1_orc.orders where o_orderkey < 2 or (o_comment like '%delayed%' and o_orderpriority = '1-URGENT'); + R"|({"1":{"lst":["rec",11,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":11},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"lt(int, int)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":5},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":2}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":2},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"like"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":23},"2":{"i32":2147483643}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":9}}}}]},"3":{"i64":9}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"like(text, varchar(9))"},"9":{"rec":{"1":{"str":""}}},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":23},"2":{"i32":2147483643}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":8},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_comment"}},{"1":{"i32":17},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"16":{"rec":{"1":{"str":"%delayed%"}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":9},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"eq"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":23},"2":{"i32":2147483643}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"eq(text, varchar(65533))"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":23},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":23},"2":{"i32":2147483643}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":5},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderpriority"}},{"1":{"i32":17},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"16":{"rec":{"1":{"str":"1-URGENT"}}},"20":{"i32":-1},"29":{"tf":0}}]}})|", + // select count(o_orderkey) from tpch1_orc.orders where o_orderkey < 1 + 1; + R"|({"1":{"lst":["rec",3,{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":11},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"lt(int, int)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":5},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":2}}},"20":{"i32":-1},"29":{"tf":0}}]}})|", + // SELECT count(o_orderkey) FROM tpch1_orc.orders WHERE o_comment LIKE '%delayed%' OR o_orderpriority = '1-URGENT'; + R"|({"1":{"lst":["rec",7,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"like"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":23},"2":{"i32":2147483643}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":9}}}}]},"3":{"i64":9}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"like(text, varchar(9))"},"9":{"rec":{"1":{"str":""}}},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":23},"2":{"i32":2147483643}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":8},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_comment"}},{"1":{"i32":17},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"16":{"rec":{"1":{"str":"%delayed%"}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":9},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"eq"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":23},"2":{"i32":2147483643}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"eq(text, varchar(65533))"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":23},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":23},"2":{"i32":2147483643}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":5},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderpriority"}},{"1":{"i32":17},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"16":{"rec":{"1":{"str":"1-URGENT"}}},"20":{"i32":-1},"29":{"tf":0}}]}})|", + // select count(o_orderkey) from tpch1_orc.orders where o_orderkey between 1 and 100 or random() > 0.5; + R"|({"1":{"lst":["rec",11,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":2},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":14},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"ge"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"ge(int, int)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":5},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":1}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":12},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"le"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"le(int, int)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":5},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":100}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":13},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"gt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":8}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":8}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"gt(double, double)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":8},"29":{"tf":0}},{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":8}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"random"}}},"2":{"i32":0},"3":{"lst":["rec",0]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":8}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"random()"},"9":{"rec":{"1":{"str":""}}},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"29":{"tf":0}},{"1":{"i32":8},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":8}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"9":{"rec":{"1":{"dbl":0.5}}},"20":{"i32":-1},"29":{"tf":0}}]}})|", + // select count(o_orderkey) from tpch1_orc.orders where lower(o_orderpriority) = '1-urgent'; + R"|({"1":{"lst":["rec",4,{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":9},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"eq"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"eq(varchar(65533), varchar(65533))"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":15},"29":{"tf":1}},{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}}},"4":{"i32":1},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lower"}}},"2":{"i32":0},"3":{"lst":["rec",1,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":23},"2":{"i32":2147483643}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"lower(text)"},"9":{"rec":{"1":{"str":""}}},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":23},"2":{"i32":2147483643}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":5},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderpriority"}},{"1":{"i32":17},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"16":{"rec":{"1":{"str":"1-urgent"}}},"20":{"i32":-1},"29":{"tf":0}}]}})|", + // select count(o_orderkey) from tpch1_orc.orders where o_orderkey * 2 < 60; + R"|({"1":{"lst":["rec",5,{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":11},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"lt(bigint, bigint)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":6},"29":{"tf":1}},{"1":{"i32":1},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]},"3":{"i64":-1}}},"3":{"i32":55},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"multiply"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":3}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"multiply(int, tinyint)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":3}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":2}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":60}}},"20":{"i32":-1},"29":{"tf":0}}]}})|", + // select count(o_orderkey) from tpch1_orc.orders where o_orderdate is not null; + R"|({"1":{"lst":["rec",4,{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":9},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"eq"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":27},"3":{"i32":18},"4":{"i32":0}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":27},"3":{"i32":18},"4":{"i32":0}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"eq(datetimev2(0), datetimev2(0))"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":27},"29":{"tf":1}},{"1":{"i32":5},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":27},"3":{"i32":18},"4":{"i32":0}}}}]},"3":{"i64":-1}}},"3":{"i32":4},"4":{"i32":1},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"casttodatetimev2"}}},"2":{"i32":0},"3":{"lst":["rec",1,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":26}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":27},"3":{"i32":18},"4":{"i32":0}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"casttodatetimev2(datev2)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":26}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":4},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderdate"}},{"1":{"i32":7},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":27},"3":{"i32":18},"4":{"i32":0}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"8":{"rec":{"1":{"str":"2024-11-12 21:13:02"}}},"20":{"i32":-1},"29":{"tf":0}}]}})|", + }; + std::vector result_search_arguments = { + "leaf-0 = (o_orderkey < 100), leaf-1 = (o_orderkey <= 5999900), leaf-2 " + "= (o_orderkey " + "in " + "[1000000, 2000000, 3000000]), expr = (or leaf-0 (not leaf-1) leaf-2)", + "leaf-0 = (o_orderkey is null), leaf-1 = (o_orderkey < 100), leaf-2 = " + "(o_orderkey <= " + "1000), leaf-3 = (o_orderkey in [200, 300, 400]), expr = (and (or " + "leaf-0 (not leaf-1)) " + "(or leaf-0 leaf-2) (or leaf-0 (not leaf-3)))", + "leaf-0 = (o_orderkey is null), leaf-1 = (o_orderkey < 1000000), leaf-2 = (o_orderkey " + "<= 1200000), leaf-3 = (o_orderkey = 1100000), expr = (and (or leaf-0 (not leaf-1)) " + "(or leaf-0 leaf-2) (or leaf-0 (not leaf-3)))", + "leaf-0 = (o_orderkey in [1000000, 2000000, 3000000]), leaf-1 = (o_orderdate < " + "17121205), leaf-2 = (o_orderdate <= 17121205), expr = (and (or leaf-0 (not leaf-1)) " + "(or leaf-0 leaf-2))", + "leaf-0 = (o_orderkey < 2), leaf-1 = (o_orderpriority = 1-URGENT), expr = (or leaf-0 " + "leaf-1)", + "leaf-0 = (o_orderkey < 2), expr = leaf-0", + CANNOT_PUSH_DOWN_ERROR, + CANNOT_PUSH_DOWN_ERROR, + CANNOT_PUSH_DOWN_ERROR, + CANNOT_PUSH_DOWN_ERROR, + CANNOT_PUSH_DOWN_ERROR, + }; + for (int i = 0; i < exprs.size(); i++) { + auto search_argument = build_search_argument(exprs[i]); + ASSERT_EQ(search_argument, result_search_arguments[i]); + } +} + +} // namespace doris::vectorized diff --git a/regression-test/data/external_table_p0/hive/test_hive_orc.out b/regression-test/data/external_table_p0/hive/test_hive_orc.out index 066c5d4b4d3b5f..b34f276020c7cc 100644 --- a/regression-test/data/external_table_p0/hive/test_hive_orc.out +++ b/regression-test/data/external_table_p0/hive/test_hive_orc.out @@ -110,6 +110,30 @@ tablets tinyint_col 179 182 182 187 183 181 177 183 177 187 183 202 202 186 528 -- !string_col_dict_plain_mixed3 -- 10240 +-- !predicate_pushdown1 -- +55 + +-- !predicate_pushdown2 -- +228 + +-- !predicate_pushdown3 -- +53 + +-- !predicate_pushdown4 -- +50000 + +-- !predicate_pushdown5 -- +90425 + +-- !predicate_pushdown6 -- +279428 + +-- !predicate_pushdown7 -- +300343 + +-- !predicate_pushdown8 -- +1533 + -- !select_top50 -- 4 55 999742610 400899305488827731 false 6.5976813E8 7.8723304616937395E17 \N base tennis pit vertical friday 2022-08-19T07:29:58 \N tablets smallint_col 2019-02-07 [7.53124931825377e+17] ["NbSSBtwzpxNSkkwga"] tablets smallint_col 2 49 999613702 105493714032727452 \N 6.3322381E8 9.8642324410240179E17 Unveil bright recruit participate. Suspect impression camera mathematical revelation. Fault live2 elbow debt west hydrogen current. how literary 2022-09-03T17:20:21 481707.1065 tablets boolean_col 2020-01-12 [] ["HoMrAnn", "wteEFvIwoZsVpVQdscMb", null, "zcGFmv", "kGEBBckbMtX", "hrEtCGFdPWZK"] tablets boolean_col @@ -273,6 +297,30 @@ tablets tinyint_col 179 182 182 187 183 181 177 183 177 187 183 202 202 186 528 -- !string_col_dict_plain_mixed3 -- 10240 +-- !predicate_pushdown1 -- +55 + +-- !predicate_pushdown2 -- +228 + +-- !predicate_pushdown3 -- +53 + +-- !predicate_pushdown4 -- +50000 + +-- !predicate_pushdown5 -- +90425 + +-- !predicate_pushdown6 -- +279428 + +-- !predicate_pushdown7 -- +300343 + +-- !predicate_pushdown8 -- +1533 + -- !select_top50 -- 4 55 999742610 400899305488827731 false 6.5976813E8 7.8723304616937395E17 \N base tennis pit vertical friday 2022-08-19T07:29:58 \N tablets smallint_col 2019-02-07 [7.53124931825377e+17] ["NbSSBtwzpxNSkkwga"] tablets smallint_col 2 49 999613702 105493714032727452 \N 6.3322381E8 9.8642324410240179E17 Unveil bright recruit participate. Suspect impression camera mathematical revelation. Fault live2 elbow debt west hydrogen current. how literary 2022-09-03T17:20:21 481707.1065 tablets boolean_col 2020-01-12 [] ["HoMrAnn", "wteEFvIwoZsVpVQdscMb", null, "zcGFmv", "kGEBBckbMtX", "hrEtCGFdPWZK"] tablets boolean_col diff --git a/regression-test/suites/external_table_p0/hive/test_hive_orc.groovy b/regression-test/suites/external_table_p0/hive/test_hive_orc.groovy index 0f837c0abd3088..6457d2b3edd5d8 100644 --- a/regression-test/suites/external_table_p0/hive/test_hive_orc.groovy +++ b/regression-test/suites/external_table_p0/hive/test_hive_orc.groovy @@ -81,6 +81,17 @@ suite("test_hive_orc", "all_types,p0,external,hive,external_docker,external_dock qt_string_col_dict_plain_mixed3 """select count(col2) from string_col_dict_plain_mixed_orc where col1 like '%Test%';""" } + def predicate_pushdown = { + qt_predicate_pushdown1 """ select count(o_orderkey) from tpch1_orc.orders where o_orderkey is not null and (o_orderkey < 100 or o_orderkey > 5999900 or o_orderkey in (1000000, 2000000, 3000000)); """ + qt_predicate_pushdown2 """ select count(o_orderkey) from tpch1_orc.orders where o_orderkey is null or (o_orderkey between 100 and 1000 and o_orderkey not in (200, 300, 400)); """ + qt_predicate_pushdown3 """ select count(o_orderkey) from tpch1_orc.orders where o_orderkey is not null and (o_orderkey < 100 or o_orderkey > 5999900 or o_orderkey = 3000000); """ + qt_predicate_pushdown4 """ select count(o_orderkey) from tpch1_orc.orders where o_orderkey is null or (o_orderkey between 1000000 and 1200000 and o_orderkey != 1100000); """ + qt_predicate_pushdown5 """ SELECT count(o_orderkey) FROM tpch1_orc.orders WHERE (o_orderdate >= '1994-01-01' AND o_orderdate <= '1994-12-31') AND (o_orderpriority = '5-LOW' OR o_orderpriority = '3-MEDIUM') AND o_totalprice > 2000;""" + qt_predicate_pushdown6 """ SELECT count(o_orderkey) FROM tpch1_orc.orders WHERE o_orderstatus <> 'F' AND o_custkey < 54321; """ + qt_predicate_pushdown7 """ SELECT count(o_orderkey) FROM tpch1_orc.orders WHERE o_comment LIKE '%delayed%' OR o_orderpriority = '1-URGENT'; """ + qt_predicate_pushdown8 """ SELECT count(o_orderkey) FROM tpch1_orc.orders WHERE o_orderkey IN (1000000, 2000000, 3000000) OR o_clerk = 'Clerk#000000470'; """ + } + String enabled = context.config.otherConfigs.get("enableHiveTest") if (enabled == null || !enabled.equalsIgnoreCase("true")) { logger.info("diable Hive test.") @@ -108,6 +119,7 @@ suite("test_hive_orc", "all_types,p0,external,hive,external_docker,external_dock only_partition_col() decimals() string_col_dict_plain_mixed() + predicate_pushdown() sql """drop catalog if exists ${catalog_name}"""