Skip to content

Commit

Permalink
[opt](inverted index) Add Inverted Index Cache Toggle (apache#45718)
Browse files Browse the repository at this point in the history
Problem Summary:
1. Adding an inverted index cache toggle can help with debugging.
  • Loading branch information
zzzxl1993 authored Dec 23, 2024
1 parent cccc9bb commit a032ece
Show file tree
Hide file tree
Showing 6 changed files with 260 additions and 38 deletions.
74 changes: 57 additions & 17 deletions be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,16 +164,48 @@ Status InvertedIndexReader::read_null_bitmap(const io::IOContext* io_ctx,
return Status::OK();
}

Status InvertedIndexReader::handle_query_cache(RuntimeState* runtime_state,
InvertedIndexQueryCache* cache,
const InvertedIndexQueryCache::CacheKey& cache_key,
InvertedIndexQueryCacheHandle* cache_handler,
OlapReaderStatistics* stats,
std::shared_ptr<roaring::Roaring>& bit_map) {
const auto& query_options = runtime_state->query_options();
if (query_options.enable_inverted_index_query_cache &&
cache->lookup(cache_key, cache_handler)) {
DBUG_EXECUTE_IF("InvertedIndexReader.handle_query_cache_hit", {
return Status::Error<ErrorCode::INTERNAL_ERROR>("handle query cache hit");
});
stats->inverted_index_query_cache_hit++;
SCOPED_RAW_TIMER(&stats->inverted_index_query_bitmap_copy_timer);
bit_map = cache_handler->get_bitmap();
return Status::OK();
}
DBUG_EXECUTE_IF("InvertedIndexReader.handle_query_cache_miss", {
return Status::Error<ErrorCode::INTERNAL_ERROR>("handle query cache miss");
});
stats->inverted_index_query_cache_miss++;
return Status::Error<ErrorCode::KEY_NOT_FOUND>("cache miss");
}

Status InvertedIndexReader::handle_searcher_cache(
InvertedIndexCacheHandle* inverted_index_cache_handle, const io::IOContext* io_ctx,
OlapReaderStatistics* stats) {
RuntimeState* runtime_state, InvertedIndexCacheHandle* inverted_index_cache_handle,
const io::IOContext* io_ctx, OlapReaderStatistics* stats) {
auto index_file_key = _inverted_index_file_reader->get_index_file_cache_key(&_index_meta);
InvertedIndexSearcherCache::CacheKey searcher_cache_key(index_file_key);
if (InvertedIndexSearcherCache::instance()->lookup(searcher_cache_key,
const auto& query_options = runtime_state->query_options();
if (query_options.enable_inverted_index_searcher_cache &&
InvertedIndexSearcherCache::instance()->lookup(searcher_cache_key,
inverted_index_cache_handle)) {
DBUG_EXECUTE_IF("InvertedIndexReader.handle_searcher_cache_hit", {
return Status::Error<ErrorCode::INTERNAL_ERROR>("handle searcher cache hit");
});
stats->inverted_index_searcher_cache_hit++;
return Status::OK();
} else {
DBUG_EXECUTE_IF("InvertedIndexReader.handle_searcher_cache_miss", {
return Status::Error<ErrorCode::INTERNAL_ERROR>("handle searcher cache miss");
});
// searcher cache miss
stats->inverted_index_searcher_cache_miss++;
auto mem_tracker = std::make_unique<MemTracker>("InvertedIndexSearcherCacheWithRead");
Expand Down Expand Up @@ -311,14 +343,16 @@ Status FullTextIndexReader::query(const io::IOContext* io_ctx, OlapReaderStatist
InvertedIndexQueryCacheHandle cache_handler;

std::shared_ptr<roaring::Roaring> term_match_bitmap = nullptr;
auto cache_status = handle_query_cache(cache, cache_key, &cache_handler, stats, bit_map);
auto cache_status =
handle_query_cache(runtime_state, cache, cache_key, &cache_handler, stats, bit_map);
if (cache_status.ok()) {
return Status::OK();
}
FulltextIndexSearcherPtr* searcher_ptr = nullptr;

InvertedIndexCacheHandle inverted_index_cache_handle;
RETURN_IF_ERROR(handle_searcher_cache(&inverted_index_cache_handle, io_ctx, stats));
RETURN_IF_ERROR(
handle_searcher_cache(runtime_state, &inverted_index_cache_handle, io_ctx, stats));
auto searcher_variant = inverted_index_cache_handle.get_index_searcher();
searcher_ptr = std::get_if<FulltextIndexSearcherPtr>(&searcher_variant);
if (searcher_ptr != nullptr) {
Expand Down Expand Up @@ -379,7 +413,8 @@ Status StringTypeInvertedIndexReader::query(const io::IOContext* io_ctx,
search_str};
auto* cache = InvertedIndexQueryCache::instance();
InvertedIndexQueryCacheHandle cache_handler;
auto cache_status = handle_query_cache(cache, cache_key, &cache_handler, stats, bit_map);
auto cache_status =
handle_query_cache(runtime_state, cache, cache_key, &cache_handler, stats, bit_map);
if (cache_status.ok()) {
return Status::OK();
}
Expand All @@ -393,7 +428,8 @@ Status StringTypeInvertedIndexReader::query(const io::IOContext* io_ctx,
auto result = std::make_shared<roaring::Roaring>();
FulltextIndexSearcherPtr* searcher_ptr = nullptr;
InvertedIndexCacheHandle inverted_index_cache_handle;
RETURN_IF_ERROR(handle_searcher_cache(&inverted_index_cache_handle, io_ctx, stats));
RETURN_IF_ERROR(
handle_searcher_cache(runtime_state, &inverted_index_cache_handle, io_ctx, stats));
auto searcher_variant = inverted_index_cache_handle.get_index_searcher();
searcher_ptr = std::get_if<FulltextIndexSearcherPtr>(&searcher_variant);
if (searcher_ptr != nullptr) {
Expand Down Expand Up @@ -609,11 +645,12 @@ Status BkdIndexReader::invoke_bkd_query(const void* query_value, InvertedIndexQu
}

Status BkdIndexReader::try_query(const io::IOContext* io_ctx, OlapReaderStatistics* stats,
const std::string& column_name, const void* query_value,
InvertedIndexQueryType query_type, uint32_t* count) {
RuntimeState* runtime_state, const std::string& column_name,
const void* query_value, InvertedIndexQueryType query_type,
uint32_t* count) {
try {
std::shared_ptr<lucene::util::bkd::bkd_reader> r;
auto st = get_bkd_reader(r, io_ctx, stats);
auto st = get_bkd_reader(r, io_ctx, stats, runtime_state);
if (!st.ok()) {
LOG(WARNING) << "get bkd reader for "
<< _inverted_index_file_reader->get_index_file_path(&_index_meta)
Expand All @@ -629,7 +666,8 @@ Status BkdIndexReader::try_query(const io::IOContext* io_ctx, OlapReaderStatisti
auto* cache = InvertedIndexQueryCache::instance();
InvertedIndexQueryCacheHandle cache_handler;
std::shared_ptr<roaring::Roaring> bit_map;
auto cache_status = handle_query_cache(cache, cache_key, &cache_handler, stats, bit_map);
auto cache_status =
handle_query_cache(runtime_state, cache, cache_key, &cache_handler, stats, bit_map);
if (cache_status.ok()) {
*count = bit_map->cardinality();
return Status::OK();
Expand All @@ -653,7 +691,7 @@ Status BkdIndexReader::query(const io::IOContext* io_ctx, OlapReaderStatistics*

try {
std::shared_ptr<lucene::util::bkd::bkd_reader> r;
auto st = get_bkd_reader(r, io_ctx, stats);
auto st = get_bkd_reader(r, io_ctx, stats, runtime_state);
if (!st.ok()) {
LOG(WARNING) << "get bkd reader for "
<< _inverted_index_file_reader->get_index_file_path(&_index_meta)
Expand All @@ -668,7 +706,8 @@ Status BkdIndexReader::query(const io::IOContext* io_ctx, OlapReaderStatistics*
query_str};
auto* cache = InvertedIndexQueryCache::instance();
InvertedIndexQueryCacheHandle cache_handler;
auto cache_status = handle_query_cache(cache, cache_key, &cache_handler, stats, bit_map);
auto cache_status =
handle_query_cache(runtime_state, cache, cache_key, &cache_handler, stats, bit_map);
if (cache_status.ok()) {
return Status::OK();
}
Expand All @@ -690,10 +729,11 @@ Status BkdIndexReader::query(const io::IOContext* io_ctx, OlapReaderStatistics*
}

Status BkdIndexReader::get_bkd_reader(BKDIndexSearcherPtr& bkd_reader, const io::IOContext* io_ctx,
OlapReaderStatistics* stats) {
OlapReaderStatistics* stats, RuntimeState* runtime_state) {
BKDIndexSearcherPtr* bkd_searcher = nullptr;
InvertedIndexCacheHandle inverted_index_cache_handle;
RETURN_IF_ERROR(handle_searcher_cache(&inverted_index_cache_handle, io_ctx, stats));
RETURN_IF_ERROR(
handle_searcher_cache(runtime_state, &inverted_index_cache_handle, io_ctx, stats));
auto searcher_variant = inverted_index_cache_handle.get_index_searcher();
bkd_searcher = std::get_if<BKDIndexSearcherPtr>(&searcher_variant);
if (bkd_searcher) {
Expand Down Expand Up @@ -1138,8 +1178,8 @@ Status InvertedIndexIterator::try_read_from_inverted_index(const std::string& co
query_type == InvertedIndexQueryType::LESS_EQUAL_QUERY ||
query_type == InvertedIndexQueryType::LESS_THAN_QUERY ||
query_type == InvertedIndexQueryType::EQUAL_QUERY) {
RETURN_IF_ERROR(
_reader->try_query(&_io_ctx, _stats, column_name, query_value, query_type, count));
RETURN_IF_ERROR(_reader->try_query(&_io_ctx, _stats, _runtime_state, column_name,
query_value, query_type, count));
}
return Status::OK();
}
Expand Down
38 changes: 17 additions & 21 deletions be/src/olap/rowset/segment_v2/inverted_index_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,9 @@ class InvertedIndexReader : public std::enable_shared_from_this<InvertedIndexRea
const void* query_value, InvertedIndexQueryType query_type,
std::shared_ptr<roaring::Roaring>& bit_map) = 0;
virtual Status try_query(const io::IOContext* io_ctx, OlapReaderStatistics* stats,
const std::string& column_name, const void* query_value,
InvertedIndexQueryType query_type, uint32_t* count) = 0;
RuntimeState* runtime_state, const std::string& column_name,
const void* query_value, InvertedIndexQueryType query_type,
uint32_t* count) = 0;

Status read_null_bitmap(const io::IOContext* io_ctx, OlapReaderStatistics* stats,
InvertedIndexQueryCacheHandle* cache_handle,
Expand All @@ -208,22 +209,14 @@ class InvertedIndexReader : public std::enable_shared_from_this<InvertedIndexRea
[[nodiscard]] bool has_null() const { return _has_null; }
void set_has_null(bool has_null) { _has_null = has_null; }

virtual Status handle_query_cache(InvertedIndexQueryCache* cache,
virtual Status handle_query_cache(RuntimeState* runtime_state, InvertedIndexQueryCache* cache,
const InvertedIndexQueryCache::CacheKey& cache_key,
InvertedIndexQueryCacheHandle* cache_handler,
OlapReaderStatistics* stats,
std::shared_ptr<roaring::Roaring>& bit_map) {
if (cache->lookup(cache_key, cache_handler)) {
stats->inverted_index_query_cache_hit++;
SCOPED_RAW_TIMER(&stats->inverted_index_query_bitmap_copy_timer);
bit_map = cache_handler->get_bitmap();
return Status::OK();
}
stats->inverted_index_query_cache_miss++;
return Status::Error<ErrorCode::KEY_NOT_FOUND>("cache miss");
}
std::shared_ptr<roaring::Roaring>& bit_map);

virtual Status handle_searcher_cache(InvertedIndexCacheHandle* inverted_index_cache_handle,
virtual Status handle_searcher_cache(RuntimeState* runtime_state,
InvertedIndexCacheHandle* inverted_index_cache_handle,
const io::IOContext* io_ctx, OlapReaderStatistics* stats);
std::string get_index_file_path();
static Status create_index_searcher(lucene::store::Directory* dir, IndexSearcherPtr* searcher,
Expand Down Expand Up @@ -262,8 +255,9 @@ class FullTextIndexReader : public InvertedIndexReader {
const void* query_value, InvertedIndexQueryType query_type,
std::shared_ptr<roaring::Roaring>& bit_map) override;
Status try_query(const io::IOContext* io_ctx, OlapReaderStatistics* stats,
const std::string& column_name, const void* query_value,
InvertedIndexQueryType query_type, uint32_t* count) override {
RuntimeState* runtime_state, const std::string& column_name,
const void* query_value, InvertedIndexQueryType query_type,
uint32_t* count) override {
return Status::Error<ErrorCode::NOT_IMPLEMENTED_ERROR>(
"FullTextIndexReader not support try_query");
}
Expand All @@ -289,8 +283,9 @@ class StringTypeInvertedIndexReader : public InvertedIndexReader {
const void* query_value, InvertedIndexQueryType query_type,
std::shared_ptr<roaring::Roaring>& bit_map) override;
Status try_query(const io::IOContext* io_ctx, OlapReaderStatistics* stats,
const std::string& column_name, const void* query_value,
InvertedIndexQueryType query_type, uint32_t* count) override {
RuntimeState* runtime_state, const std::string& column_name,
const void* query_value, InvertedIndexQueryType query_type,
uint32_t* count) override {
return Status::Error<ErrorCode::NOT_IMPLEMENTED_ERROR>(
"StringTypeInvertedIndexReader not support try_query");
}
Expand Down Expand Up @@ -350,8 +345,9 @@ class BkdIndexReader : public InvertedIndexReader {
const void* query_value, InvertedIndexQueryType query_type,
std::shared_ptr<roaring::Roaring>& bit_map) override;
Status try_query(const io::IOContext* io_ctx, OlapReaderStatistics* stats,
const std::string& column_name, const void* query_value,
InvertedIndexQueryType query_type, uint32_t* count) override;
RuntimeState* runtime_state, const std::string& column_name,
const void* query_value, InvertedIndexQueryType query_type,
uint32_t* count) override;
Status invoke_bkd_try_query(const void* query_value, InvertedIndexQueryType query_type,
std::shared_ptr<lucene::util::bkd::bkd_reader> r, uint32_t* count);
Status invoke_bkd_query(const void* query_value, InvertedIndexQueryType query_type,
Expand All @@ -364,7 +360,7 @@ class BkdIndexReader : public InvertedIndexReader {

InvertedIndexReaderType type() override;
Status get_bkd_reader(BKDIndexSearcherPtr& reader, const io::IOContext* io_ctx,
OlapReaderStatistics* stats);
OlapReaderStatistics* stats, RuntimeState* runtime_state);

private:
const TypeInfo* _type_info {};
Expand Down
16 changes: 16 additions & 0 deletions fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
Original file line number Diff line number Diff line change
Expand Up @@ -679,6 +679,8 @@ public class SessionVariable implements Serializable, Writable {

public static final String ENABLE_MATCH_WITHOUT_INVERTED_INDEX = "enable_match_without_inverted_index";
public static final String ENABLE_FALLBACK_ON_MISSING_INVERTED_INDEX = "enable_fallback_on_missing_inverted_index";
public static final String ENABLE_INVERTED_INDEX_SEARCHER_CACHE = "enable_inverted_index_searcher_cache";
public static final String ENABLE_INVERTED_INDEX_QUERY_CACHE = "enable_inverted_index_query_cache";

public static final String IN_LIST_VALUE_COUNT_THRESHOLD = "in_list_value_count_threshold";

Expand Down Expand Up @@ -2304,6 +2306,18 @@ public void setIgnoreShapePlanNodes(String ignoreShapePlanNodes) {
})
public boolean enableFallbackOnMissingInvertedIndex = true;

@VariableMgr.VarAttr(name = ENABLE_INVERTED_INDEX_SEARCHER_CACHE, description = {
"开启后会缓存倒排索引searcher",
"Enabling this will cache the inverted index searcher."
})
public boolean enableInvertedIndexSearcherCache = true;

@VariableMgr.VarAttr(name = ENABLE_INVERTED_INDEX_QUERY_CACHE, description = {
"开启后会缓存倒排索引查询结果",
"Enabling this will cache the results of inverted index queries."
})
public boolean enableInvertedIndexQueryCache = true;

@VariableMgr.VarAttr(name = IN_LIST_VALUE_COUNT_THRESHOLD, description = {
"in条件value数量大于这个threshold后将不会走fast_execute",
"When the number of values in the IN condition exceeds this threshold,"
Expand Down Expand Up @@ -3990,6 +4004,8 @@ public TQueryOptions toThrift() {

tResult.setEnableMatchWithoutInvertedIndex(enableMatchWithoutInvertedIndex);
tResult.setEnableFallbackOnMissingInvertedIndex(enableFallbackOnMissingInvertedIndex);
tResult.setEnableInvertedIndexSearcherCache(enableInvertedIndexSearcherCache);
tResult.setEnableInvertedIndexQueryCache(enableInvertedIndexQueryCache);
tResult.setHiveOrcUseColumnNames(hiveOrcUseColumnNames);
tResult.setHiveParquetUseColumnNames(hiveParquetUseColumnNames);
tResult.setKeepCarriageReturn(keepCarriageReturn);
Expand Down
4 changes: 4 additions & 0 deletions gensrc/thrift/PaloInternalService.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,10 @@ struct TQueryOptions {
141: optional bool ignore_runtime_filter_error = false;
142: optional bool enable_fixed_len_to_uint32_v2 = false;
143: optional bool enable_shared_exchange_sink_buffer = true;

144: optional bool enable_inverted_index_searcher_cache = true;
145: optional bool enable_inverted_index_query_cache = true;

// For cloud, to control if the content would be written into file cache
// In write path, to control if the content would be written into file cache.
// In read path, read from file cache or remote storage when execute query.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --
863

-- !sql --
863

-- !sql --
863

-- !sql --
863

-- !sql --
350

-- !sql --
863

-- !sql --
350

Loading

0 comments on commit a032ece

Please sign in to comment.