Skip to content

Commit

Permalink
[fix](inverted index) add in list to fast execute logic after hit index
Browse files Browse the repository at this point in the history
  • Loading branch information
zzzxl1993 committed Jun 11, 2024
1 parent f5569dc commit cdcb75e
Show file tree
Hide file tree
Showing 13 changed files with 173 additions and 45 deletions.
4 changes: 2 additions & 2 deletions be/src/olap/column_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -249,9 +249,9 @@ class ColumnPredicate {
case PredicateType::GE:
return "ge";
case PredicateType::IN_LIST:
return "in_list";
return "in";
case PredicateType::NOT_IN_LIST:
return "not_in_list";
return "not_in";
case PredicateType::IS_NULL:
return "is_null";
case PredicateType::IS_NOT_NULL:
Expand Down
15 changes: 8 additions & 7 deletions be/src/olap/rowset/segment_v2/segment_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -698,16 +698,16 @@ Status SegmentIterator::_execute_predicates_except_leafnode_of_andnode(
_column_predicate_info->column_name = expr->expr_name();
} else if (_is_literal_node(node_type)) {
auto v_literal_expr = std::dynamic_pointer_cast<doris::vectorized::VLiteral>(expr);
_column_predicate_info->query_values.push_back(v_literal_expr->value());
_column_predicate_info->query_values.insert(v_literal_expr->value());
} else if (node_type == TExprNodeType::BINARY_PRED || node_type == TExprNodeType::MATCH_PRED ||
node_type == TExprNodeType::IN_PRED) {
if (node_type == TExprNodeType::MATCH_PRED) {
_column_predicate_info->query_op = "match";
} else if (node_type == TExprNodeType::IN_PRED) {
if (expr->op() == TExprOpcode::type::FILTER_IN) {
_column_predicate_info->query_op = "in_list";
_column_predicate_info->query_op = "in";
} else {
_column_predicate_info->query_op = "not_in_list";
_column_predicate_info->query_op = "not_in";
}
} else {
_column_predicate_info->query_op = expr->fn().name.function_name;
Expand Down Expand Up @@ -920,7 +920,8 @@ std::string SegmentIterator::_gen_predicate_result_sign(ColumnPredicate* predica
std::string SegmentIterator::_gen_predicate_result_sign(ColumnPredicateInfo* predicate_info) {
std::string pred_result_sign;
pred_result_sign = BeConsts::BLOCK_TEMP_COLUMN_PREFIX + predicate_info->column_name + "_" +
predicate_info->query_op + "_" + join(predicate_info->query_values, ",");
predicate_info->query_op + "_" +
boost::join(predicate_info->query_values, ",");
return pred_result_sign;
}

Expand Down Expand Up @@ -2470,15 +2471,15 @@ void SegmentIterator::_calculate_pred_in_remaining_conjunct_root(
}
} else if (_is_literal_node(node_type)) {
auto v_literal_expr = static_cast<const doris::vectorized::VLiteral*>(expr.get());
_column_predicate_info->query_values.push_back(v_literal_expr->value());
_column_predicate_info->query_values.insert(v_literal_expr->value());
} else {
if (node_type == TExprNodeType::MATCH_PRED) {
_column_predicate_info->query_op = "match";
} else if (node_type == TExprNodeType::IN_PRED) {
if (expr->op() == TExprOpcode::type::FILTER_IN) {
_column_predicate_info->query_op = "in_list";
_column_predicate_info->query_op = "in";
} else {
_column_predicate_info->query_op = "not_in_list";
_column_predicate_info->query_op = "not_in";
}
} else if (node_type != TExprNodeType::COMPOUND_PRED) {
_column_predicate_info->query_op = expr->fn().name.function_name;
Expand Down
4 changes: 2 additions & 2 deletions be/src/olap/rowset/segment_v2/segment_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ struct ColumnPredicateInfo {
std::string debug_string() const {
std::stringstream ss;
ss << "column_name=" << column_name << ", query_op=" << query_op
<< ", query_value=" << join(query_values, ",");
<< ", query_value=" << boost::join(query_values, ",");
return ss.str();
}

Expand All @@ -97,7 +97,7 @@ struct ColumnPredicateInfo {
}

std::string column_name;
std::vector<std::string> query_values;
std::set<std::string> query_values;
std::string query_op;
};

Expand Down
30 changes: 2 additions & 28 deletions be/src/vec/exprs/vectorized_fn_call.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,8 @@ Status VectorizedFnCall::execute(VExprContext* context, vectorized::Block* block
// prepare a column to save result
block->insert({nullptr, _data_type, _expr_name});
if (_can_fast_execute) {
auto can_fast_execute = fast_execute(context->fn_context(_fn_context_index), *block,
arguments, num_columns_without_result, block->rows());
auto can_fast_execute = fast_execute(*block, arguments, num_columns_without_result,
block->rows(), _function->get_name());
if (can_fast_execute) {
*result_column_id = num_columns_without_result;
return Status::OK();
Expand All @@ -163,32 +163,6 @@ Status VectorizedFnCall::execute(VExprContext* context, vectorized::Block* block
return Status::OK();
}

// fast_execute can direct copy expr filter result which build by apply index in segment_iterator
bool VectorizedFnCall::fast_execute(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, size_t result,
size_t input_rows_count) {
auto query_value = block.get_by_position(arguments[1]).to_string(0);
std::string column_name = block.get_by_position(arguments[0]).name;
auto result_column_name = BeConsts::BLOCK_TEMP_COLUMN_PREFIX + column_name + "_" +
_function->get_name() + "_" + query_value;
if (!block.has(result_column_name)) {
return false;
}

auto result_column =
block.get_by_name(result_column_name).column->convert_to_full_column_if_const();
auto& result_info = block.get_by_position(result);
if (result_info.type->is_nullable()) {
block.replace_by_position(result,
ColumnNullable::create(std::move(result_column),
ColumnUInt8::create(input_rows_count, 0)));
} else {
block.replace_by_position(result, std::move(result_column));
}

return true;
}

const std::string& VectorizedFnCall::expr_name() const {
return _expr_name;
}
Expand Down
3 changes: 0 additions & 3 deletions be/src/vec/exprs/vectorized_fn_call.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,6 @@ class VectorizedFnCall : public VExpr {
}
static std::string debug_string(const std::vector<VectorizedFnCall*>& exprs);

bool fast_execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count);

protected:
FunctionBasePtr _function;
bool _can_fast_execute = false;
Expand Down
39 changes: 39 additions & 0 deletions be/src/vec/exprs/vexpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -449,4 +449,43 @@ Status VExpr::check_constant(const Block& block, ColumnNumbers arguments) const
return Status::OK();
}

bool VExpr::fast_execute(Block& block, const ColumnNumbers& arguments, size_t result,
size_t input_rows_count, const std::string& function_name) {
std::string result_column_name = gen_predicate_result_sign(block, arguments, function_name);
if (!block.has(result_column_name)) {
return false;
}

auto result_column =
block.get_by_name(result_column_name).column->convert_to_full_column_if_const();
auto& result_info = block.get_by_position(result);
if (result_info.type->is_nullable()) {
block.replace_by_position(result,
ColumnNullable::create(std::move(result_column),
ColumnUInt8::create(input_rows_count, 0)));
} else {
block.replace_by_position(result, std::move(result_column));
}

return true;
}

std::string VExpr::gen_predicate_result_sign(Block& block, const ColumnNumbers& arguments,
const std::string& function_name) {
std::string pred_result_sign;
std::string column_name = block.get_by_position(arguments[0]).name;
pred_result_sign +=
BeConsts::BLOCK_TEMP_COLUMN_PREFIX + column_name + "_" + function_name + "_";
if (function_name == "in") {
std::set<std::string> values;
for (size_t i = 1; i < arguments.size(); i++) {
values.insert(block.get_by_position(arguments[i]).to_string(0));
}
pred_result_sign += boost::join(values, ",");
} else {
pred_result_sign += block.get_by_position(arguments[1]).to_string(0);
}
return pred_result_sign;
}

} // namespace doris::vectorized
7 changes: 7 additions & 0 deletions be/src/vec/exprs/vexpr.h
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,13 @@ class VExpr {
return nullptr;
}

// fast_execute can direct copy expr filter result which build by apply index in segment_iterator
bool fast_execute(Block& block, const ColumnNumbers& arguments, size_t result,
size_t input_rows_count, const std::string& function_name);

std::string gen_predicate_result_sign(Block& block, const ColumnNumbers& arguments,
const std::string& function_name);

protected:
/// Simple debug string that provides no expr subclass-specific information
std::string debug_string(const std::string& expr_name) const {
Expand Down
13 changes: 13 additions & 0 deletions be/src/vec/exprs/vin_predicate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ Status VInPredicate::prepare(RuntimeState* state, const RowDescriptor& desc,
}

VExpr::register_function_context(state, context);

_can_fast_execute = _function->can_fast_execute();

return Status::OK();
}

Expand Down Expand Up @@ -100,6 +103,16 @@ Status VInPredicate::execute(VExprContext* context, Block* block, int* result_co
size_t num_columns_without_result = block->columns();
// prepare a column to save result
block->insert({nullptr, _data_type, _expr_name});

if (_can_fast_execute) {
auto can_fast_execute = fast_execute(*block, arguments, num_columns_without_result,
block->rows(), _function->get_name());
if (can_fast_execute) {
*result_column_id = num_columns_without_result;
return Status::OK();
}
}

RETURN_IF_ERROR(_function->execute(context->fn_context(_fn_context_index), *block, arguments,
num_columns_without_result, block->rows(), false));
*result_column_id = num_columns_without_result;
Expand Down
2 changes: 2 additions & 0 deletions be/src/vec/exprs/vin_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,5 +64,7 @@ class VInPredicate final : public VExpr {

const bool _is_not_in;
static const constexpr char* function_name = "in";

bool _can_fast_execute = false;
};
} // namespace doris::vectorized
3 changes: 2 additions & 1 deletion be/src/vec/functions/function.h
Original file line number Diff line number Diff line change
Expand Up @@ -594,7 +594,8 @@ class DefaultFunction final : public IFunctionBase {
bool can_fast_execute() const override {
auto function_name = function->get_name();
return function_name == "eq" || function_name == "ne" || function_name == "lt" ||
function_name == "gt" || function_name == "le" || function_name == "ge";
function_name == "gt" || function_name == "le" || function_name == "ge" ||
function_name == "in";
}

bool is_deterministic_in_scope_of_query() const override {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1417,7 +1417,7 @@ public void setEnableLeftZigZag(boolean enableLeftZigZag) {
@VariableMgr.VarAttr(name = ENABLE_INVERTED_INDEX_COMPOUND_INLIST,
description = {"让compound inlist条件可以使用倒排索引",
"Let the compound inlist condition use an inverted index"})
public boolean enableInvertedIndexCompoundInlist = false;
public boolean enableInvertedIndexCompoundInlist = true;

public Set<Integer> getIgnoredRuntimeFilterIds() {
return Arrays.stream(ignoreRuntimeFilterIds.split(",[\\s]*"))
Expand Down
60 changes: 60 additions & 0 deletions regression-test/data/inverted_index_p0/test_compound_inlist.out
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,63 @@
-- !sql --
68

-- !sql --
17

-- !sql --
17

-- !sql --
8

-- !sql --
8

-- !sql --
8

-- !sql --
8

-- !sql --
17

-- !sql --
17

-- !sql --
160

-- !sql --
160

-- !sql --
861

-- !sql --
861

-- !sql --
77

-- !sql --
77

-- !sql --
68

-- !sql --
68

-- !sql --
68

-- !sql --
68

-- !sql --
68

-- !sql --
68

Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// under the License.


suite("test_compound_inlist", "p0"){
suite("test_compound_inlist", "nonConcurrent"){
def indexTbName1 = "test_compound_inlist_1"
def indexTbName2 = "test_compound_inlist_2"

Expand Down Expand Up @@ -134,6 +134,40 @@ suite("test_compound_inlist", "p0"){
qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName1} where ((request match_phrase 'hm' and clientip match_phrase '1') or (request match_phrase 'jpg' and clientip match_phrase '2')) or (((request match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 'english' and clientip match_phrase '4')) and status in (1, 2, 304, 200) and status not in (1, 2, 304)); """
qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName2} where ((request match_phrase 'hm' and clientip match_phrase '1') or (request match_phrase 'jpg' and clientip match_phrase '2')) or (((request match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 'english' and clientip match_phrase '4')) and status in (1, 2, 304, 200) and status not in (1, 2, 304)); """

sql """ set enable_common_expr_pushdown = false; """

qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName1} where (((request match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 'english' and clientip match_phrase '4')) and status in (1, 2, 304)); """
qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName2} where (((request match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 'english' and clientip match_phrase '4')) and status in (1, 2, 304)); """

qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName1} where (((request match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 'english' and clientip match_phrase '4')) and status in (1, 2, 200)); """
qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName2} where (((request match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 'english' and clientip match_phrase '4')) and status in (1, 2, 200)); """

qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName1} where (((request match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 'english' and clientip match_phrase '4')) and status not in (1, 2, 304)); """
qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName2} where (((request match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 'english' and clientip match_phrase '4')) and status not in (1, 2, 304)); """

qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName1} where (((request match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 'english' and clientip match_phrase '4')) and status not in (1, 2, 200)); """
qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName2} where (((request match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 'english' and clientip match_phrase '4')) and status not in (1, 2, 200)); """

qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName1} where (((request match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 'english' and clientip match_phrase '4')) or status in (1, 2, 304)); """
qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName2} where (((request match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 'english' and clientip match_phrase '4')) or status in (1, 2, 304)); """

qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName1} where (((request match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 'english' and clientip match_phrase '4')) or status in (1, 2, 200)); """
qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName2} where (((request match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 'english' and clientip match_phrase '4')) or status in (1, 2, 200)); """

qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName1} where ((request match_phrase 'hm' and clientip match_phrase '1') or (request match_phrase 'jpg' and clientip match_phrase '2')) or (((request match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 'english' and clientip match_phrase '4')) and status in (1, 2, 304)); """
qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName2} where ((request match_phrase 'hm' and clientip match_phrase '1') or (request match_phrase 'jpg' and clientip match_phrase '2')) or (((request match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 'english' and clientip match_phrase '4')) and status in (1, 2, 304)); """

qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName1} where ((request match_phrase 'hm' and clientip match_phrase '1') or (request match_phrase 'jpg' and clientip match_phrase '2')) or (((request match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 'english' and clientip match_phrase '4')) and status in (1, 2, 200)); """
qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName2} where ((request match_phrase 'hm' and clientip match_phrase '1') or (request match_phrase 'jpg' and clientip match_phrase '2')) or (((request match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 'english' and clientip match_phrase '4')) and status in (1, 2, 200)); """

qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName1} where ((request match_phrase 'hm' and clientip match_phrase '1') or (request match_phrase 'jpg' and clientip match_phrase '2')) or (((request match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 'english' and clientip match_phrase '4')) and status not in (1, 2, 304)); """
qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName2} where ((request match_phrase 'hm' and clientip match_phrase '1') or (request match_phrase 'jpg' and clientip match_phrase '2')) or (((request match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 'english' and clientip match_phrase '4')) and status not in (1, 2, 304)); """

qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName1} where ((request match_phrase 'hm' and clientip match_phrase '1') or (request match_phrase 'jpg' and clientip match_phrase '2')) or (((request match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 'english' and clientip match_phrase '4')) and status in (1, 2, 304, 200) and status not in (1, 2, 304)); """
qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName2} where ((request match_phrase 'hm' and clientip match_phrase '1') or (request match_phrase 'jpg' and clientip match_phrase '2')) or (((request match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 'english' and clientip match_phrase '4')) and status in (1, 2, 304, 200) and status not in (1, 2, 304)); """

sql """ set enable_common_expr_pushdown = true; """

} finally {
//try_sql("DROP TABLE IF EXISTS ${testTable}")
}
Expand Down

0 comments on commit cdcb75e

Please sign in to comment.