Skip to content

Commit

Permalink
Revert #43255 & #44615 (#45096)
Browse files Browse the repository at this point in the history
Revert "branch-2.1: [enhance](orc) Optimize ORC Predicate Pushdown for
OR-connected Predicate #43255 (#44438)"
Revert "[fix](orc) check all the cases before build_search_argument
(#44615) (#44801)"
  • Loading branch information
morningman authored Dec 6, 2024
1 parent 4f45fef commit d4a6fd1
Show file tree
Hide file tree
Showing 9 changed files with 186 additions and 610 deletions.
1 change: 0 additions & 1 deletion be/src/runtime/exec_env.h
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,6 @@ class ExecEnv {
static void set_tracking_memory(bool tracking_memory) {
_s_tracking_memory.store(tracking_memory, std::memory_order_acquire);
}
void set_orc_memory_pool(orc::MemoryPool* pool) { _orc_memory_pool = pool; }
#endif
LoadStreamMapPool* load_stream_map_pool() { return _load_stream_map_pool.get(); }

Expand Down
486 changes: 155 additions & 331 deletions be/src/vec/exec/format/orc/vorc_reader.cpp

Large diffs are not rendered by default.

45 changes: 11 additions & 34 deletions be/src/vec/exec/format/orc/vorc_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
#pragma once

#include <cctz/time_zone.h>
#include <stddef.h>
#include <stdint.h>

#include <cstddef>
#include <cstdint>
#include <list>
#include <memory>
#include <orc/OrcFile.hh>
Expand All @@ -41,7 +41,6 @@
#include "orc/Reader.hh"
#include "orc/Type.hh"
#include "orc/Vector.hh"
#include "orc/sargs/Literal.hh"
#include "runtime/types.h"
#include "util/runtime_profile.h"
#include "vec/aggregate_functions/aggregate_function.h"
Expand All @@ -52,8 +51,6 @@
#include "vec/exec/format/format_common.h"
#include "vec/exec/format/generic_reader.h"
#include "vec/exec/format/table/transactional_hive_reader.h"
#include "vec/exprs/vliteral.h"
#include "vec/exprs/vslot_ref.h"

namespace doris {
class RuntimeState;
Expand Down Expand Up @@ -83,6 +80,13 @@ namespace doris::vectorized {

class ORCFileInputStream;

struct OrcPredicate {
std::string col_name;
orc::PredicateDataType data_type;
std::vector<orc::Literal> literals;
SQLFilterOp op;
};

struct LazyReadContext {
VExprContextSPtrs conjuncts;
bool can_lazy_read = false;
Expand Down Expand Up @@ -224,8 +228,6 @@ class OrcReader : public GenericReader {
RuntimeProfile::Counter* decode_value_time = nullptr;
RuntimeProfile::Counter* decode_null_map_time = nullptr;
RuntimeProfile::Counter* filter_block_time = nullptr;
RuntimeProfile::Counter* selected_row_group_count = nullptr;
RuntimeProfile::Counter* evaluated_row_group_count = nullptr;
};

class ORCFilterImpl : public orc::ORCFilter {
Expand Down Expand Up @@ -289,27 +291,8 @@ class OrcReader : public GenericReader {
bool* is_hive1_orc);
static bool _check_acid_schema(const orc::Type& type);
static const orc::Type& _remove_acid(const orc::Type& type);

// functions for building search argument until _init_search_argument
std::tuple<bool, orc::Literal, orc::PredicateDataType> _make_orc_literal(
const VSlotRef* slot_ref, const VLiteral* literal);
bool _check_slot_can_push_down(const VExprSPtr& expr);
bool _check_literal_can_push_down(const VExprSPtr& expr, uint16_t child_id);
bool _check_rest_children_can_push_down(const VExprSPtr& expr);
bool _check_expr_can_push_down(const VExprSPtr& expr);
void _build_less_than(const VExprSPtr& expr,
std::unique_ptr<orc::SearchArgumentBuilder>& builder);
void _build_less_than_equals(const VExprSPtr& expr,
std::unique_ptr<orc::SearchArgumentBuilder>& builder);
void _build_equals(const VExprSPtr& expr, std::unique_ptr<orc::SearchArgumentBuilder>& builder);
void _build_filter_in(const VExprSPtr& expr,
std::unique_ptr<orc::SearchArgumentBuilder>& builder);
void _build_is_null(const VExprSPtr& expr,
std::unique_ptr<orc::SearchArgumentBuilder>& builder);
bool _build_search_argument(const VExprSPtr& expr,
std::unique_ptr<orc::SearchArgumentBuilder>& builder);
bool _init_search_argument(const VExprContextSPtrs& conjuncts);

bool _init_search_argument(
std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range);
void _init_bloom_filter(
std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range);
void _init_system_properties();
Expand Down Expand Up @@ -595,14 +578,11 @@ class OrcReader : public GenericReader {
bool _is_hive1_orc_or_use_idx = false;

std::unordered_map<std::string, std::string> _col_name_to_file_col_name;
// TODO: check if we can remove _col_name_to_file_col_name_low_case
std::unordered_map<std::string, std::string> _col_name_to_file_col_name_low_case;
std::unordered_map<std::string, const orc::Type*> _type_map;
std::vector<const orc::Type*> _col_orc_type;
std::unique_ptr<ORCFileInputStream> _file_input_stream;
Statistics _statistics;
OrcProfile _orc_profile;
orc::ReaderMetrics _reader_metrics;

std::unique_ptr<orc::ColumnVectorBatch> _batch;
std::unique_ptr<orc::Reader> _reader;
Expand Down Expand Up @@ -649,9 +629,6 @@ class OrcReader : public GenericReader {
std::unordered_map<std::string, std::string> _table_col_to_file_col;
//support iceberg position delete .
std::vector<int64_t>* _position_delete_ordered_rowids = nullptr;
std::unordered_map<const VSlotRef*, orc::PredicateDataType>
_vslot_ref_to_orc_predicate_data_type;
std::unordered_map<const VLiteral*, orc::Literal> _vliteral_to_orc_literal;
};

class ORCFileInputStream : public orc::InputStream, public ProfileCollector {
Expand Down
Binary file removed be/test/exec/test_data/orc_scanner/orders.orc
Binary file not shown.
29 changes: 18 additions & 11 deletions be/test/testutil/desc_tbl_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,20 @@

#include "testutil/desc_tbl_builder.h"

#include <gtest/gtest.h>
#include <glog/logging.h>
#include <gtest/gtest-message.h>
#include <gtest/gtest-test-part.h>

#include <vector>

#include "common/object_pool.h"
#include "common/status.h"
#include "gtest/gtest_pred_impl.h"
#include "runtime/define_primitive_type.h"
#include "runtime/descriptors.h"
#include "util/bit_util.h"

using std::vector;

namespace doris {

Expand All @@ -33,7 +44,7 @@ TupleDescBuilder& DescriptorTblBuilder::declare_tuple() {

// item_id of -1 indicates no itemTupleId
static TSlotDescriptor make_slot_descriptor(int id, int parent_id, const TypeDescriptor& type,
const std::string& name, int slot_idx, int item_id) {
int slot_idx, int item_id) {
int null_byte = slot_idx / 8;
int null_bit = slot_idx % 8;
TSlotDescriptor slot_desc;
Expand All @@ -47,7 +58,6 @@ static TSlotDescriptor make_slot_descriptor(int id, int parent_id, const TypeDes
slot_desc.__set_nullIndicatorBit(null_bit);
slot_desc.__set_slotIdx(slot_idx);
slot_desc.__set_isMaterialized(true);
slot_desc.__set_colName(name);
// if (item_id != -1) {
// slot_desc.__set_itemTupleId(item_id);
// }
Expand All @@ -68,27 +78,24 @@ DescriptorTbl* DescriptorTblBuilder::build() {
int tuple_id = 0;
int slot_id = 0;

for (auto& _tuples_desc : _tuples_descs) {
build_tuple(_tuples_desc->slot_types(), _tuples_desc->slot_names(), &thrift_desc_tbl,
&tuple_id, &slot_id);
for (int i = 0; i < _tuples_descs.size(); ++i) {
build_tuple(_tuples_descs[i]->slot_types(), &thrift_desc_tbl, &tuple_id, &slot_id);
}

Status status = DescriptorTbl::create(_obj_pool, thrift_desc_tbl, &desc_tbl);
EXPECT_TRUE(status.ok());
return desc_tbl;
}

TTupleDescriptor DescriptorTblBuilder::build_tuple(const std::vector<TypeDescriptor>& slot_types,
const std::vector<std::string>& slot_names,
TTupleDescriptor DescriptorTblBuilder::build_tuple(const vector<TypeDescriptor>& slot_types,
TDescriptorTable* thrift_desc_tbl,
int* next_tuple_id, int* slot_id) {
// We never materialize struct slots (there's no in-memory representation of structs,
// instead the materialized fields appear directly in the tuple), but array types can
// still have a struct item type. In this case, the array item tuple contains the
// "inlined" struct fields.
if (slot_types.size() == 1 && slot_types[0].type == TYPE_STRUCT) {
return build_tuple(slot_types[0].children, slot_types[0].field_names, thrift_desc_tbl,
next_tuple_id, slot_id);
return build_tuple(slot_types[0].children, thrift_desc_tbl, next_tuple_id, slot_id);
}

int tuple_id = *next_tuple_id;
Expand All @@ -104,7 +111,7 @@ TTupleDescriptor DescriptorTblBuilder::build_tuple(const std::vector<TypeDescrip
// }

thrift_desc_tbl->slotDescriptors.push_back(
make_slot_descriptor(*slot_id, tuple_id, slot_types[i], slot_names[i], i, item_id));
make_slot_descriptor(*slot_id, tuple_id, slot_types[i], i, item_id));
thrift_desc_tbl->__isset.slotDescriptors = true;
++(*slot_id);
}
Expand Down
17 changes: 2 additions & 15 deletions be/test/testutil/desc_tbl_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,15 @@

#include <gen_cpp/Descriptors_types.h>

#include <tuple>
#include <vector>

#include "common/object_pool.h"
#include "runtime/descriptors.h"
#include "runtime/types.h"

namespace doris {

class ObjectPool;
class TupleDescBuilder;
class DescriptorTbl;

// Aids in the construction of a DescriptorTbl by declaring tuples and slots
// associated with those tuples.
Expand All @@ -41,7 +40,6 @@ class TupleDescBuilder;
// DescriptorTblBuilder builder;
// builder.declare_tuple() << TYPE_TINYINT << TYPE_TIMESTAMP; // gets TupleId 0
// builder.declare_tuple() << TYPE_FLOAT; // gets TupleId 1
// builder.declare_tuple() << std::make_tuple(TYPE_INT, "col1") << std::make_tuple(TYPE_STRING, "col2"); // gets Tuple with type and name
// DescriptorTbl desc_tbl = builder.build();
class DescriptorTblBuilder {
public:
Expand All @@ -59,31 +57,20 @@ class DescriptorTblBuilder {
std::vector<TupleDescBuilder*> _tuples_descs;

TTupleDescriptor build_tuple(const std::vector<TypeDescriptor>& slot_types,
const std::vector<std::string>& slot_names,
TDescriptorTable* thrift_desc_tbl, int* tuple_id, int* slot_id);
};

class TupleDescBuilder {
public:
using SlotType = std::tuple<TypeDescriptor, std::string>;
TupleDescBuilder& operator<<(const SlotType& slot) {
_slot_types.push_back(std::get<0>(slot));
_slot_names.push_back(std::get<1>(slot));
return *this;
}

TupleDescBuilder& operator<<(const TypeDescriptor& slot_type) {
_slot_types.push_back(slot_type);
_slot_names.emplace_back("");
return *this;
}

std::vector<TypeDescriptor> slot_types() const { return _slot_types; }
std::vector<std::string> slot_names() const { return _slot_names; }

private:
std::vector<TypeDescriptor> _slot_types;
std::vector<std::string> _slot_names;
};

} // end namespace doris
Expand Down
Loading

0 comments on commit d4a6fd1

Please sign in to comment.