Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[fix](memory) Fix metadata memory tracking and profile #44739

Merged
merged 3 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions be/src/common/daemon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -500,15 +500,18 @@ void Daemon::cache_adjust_capacity_thread() {
void Daemon::cache_prune_stale_thread() {
int32_t interval = config::cache_periodic_prune_stale_sweep_sec;
while (!_stop_background_threads_latch.wait_for(std::chrono::seconds(interval))) {
if (interval <= 0) {
LOG(WARNING) << "config of cache clean interval is illegal: [" << interval
<< "], force set to 3600 ";
interval = 3600;
if (config::cache_periodic_prune_stale_sweep_sec <= 0) {
LOG(WARNING) << "config of cache clean interval is: [" << interval
<< "], it means the cache prune stale thread is disabled, will wait 3s "
"and check again.";
interval = 3;
continue;
}
if (config::disable_memory_gc) {
continue;
}
CacheManager::instance()->for_each_cache_prune_stale();
interval = config::cache_periodic_prune_stale_sweep_sec;
}
}

Expand Down
1 change: 0 additions & 1 deletion be/src/olap/delta_writer_v2.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ namespace doris {

class FlushToken;
class MemTable;
class MemTracker;
class Schema;
class StorageEngine;
class TupleDescriptor;
Expand Down
1 change: 0 additions & 1 deletion be/src/olap/memtable_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ namespace doris {

class FlushToken;
class MemTable;
class MemTracker;
class StorageEngine;
class TupleDescriptor;
class SlotDescriptor;
Expand Down
73 changes: 52 additions & 21 deletions be/src/olap/metadata_adder.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,17 @@
#include <bvar/bvar.h>
#include <stdint.h>

#include "runtime/exec_env.h"
#include "runtime/memory/mem_tracker_limiter.h"
#include "util/runtime_profile.h"

namespace doris {

inline bvar::Adder<int64_t> g_rowset_meta_mem_bytes("doris_rowset_meta_mem_bytes");
inline bvar::Adder<int64_t> g_rowset_meta_num("doris_rowset_meta_num");

inline bvar::Adder<int64_t> g_all_rowsets_mem_bytes("doris_all_rowsets_mem_bytes");
inline bvar::Adder<int64_t> g_all_rowsets_num("doris_all_rowsets_num");
inline bvar::Adder<int64_t> g_rowset_mem_bytes("doris_rowset_mem_bytes");
inline bvar::Adder<int64_t> g_rowset_num("doris_rowset_num");

inline bvar::Adder<int64_t> g_tablet_meta_mem_bytes("doris_tablet_meta_mem_bytes");
inline bvar::Adder<int64_t> g_tablet_meta_num("doris_tablet_meta_num");
Expand All @@ -42,8 +44,9 @@ inline bvar::Adder<int64_t> g_tablet_index_num("doris_tablet_index_num");
inline bvar::Adder<int64_t> g_tablet_schema_mem_bytes("doris_tablet_schema_mem_bytes");
inline bvar::Adder<int64_t> g_tablet_schema_num("doris_tablet_schema_num");

inline bvar::Adder<int64_t> g_all_segments_mem_bytes("doris_all_segments_mem_bytes");
inline bvar::Adder<int64_t> g_all_segments_num("doris_all_segments_num");
inline bvar::Adder<int64_t> g_segment_mem_bytes("doris_segment_mem_bytes");
inline bvar::Adder<int64_t> g_segment_num("doris_segment_num");
inline bvar::Adder<int64_t> g_segment_estimate_mem_bytes("doris_segment_estimate_mem_bytes");

inline bvar::Adder<int64_t> g_column_reader_mem_bytes("doris_column_reader_mem_bytes");
inline bvar::Adder<int64_t> g_column_reader_num("doris_column_reader_num");
Expand Down Expand Up @@ -96,6 +99,10 @@ class ZoneMapIndexReader;
When a derived Class extends MetadataAdder, then the Class's number and fixed length field's memory can be counted automatically.
But if the Class has variable length field, then you should overwrite get_metadata_size and call update_metadata_size when the Class's memory changes.

get_metadata_size is only the memory of the metadata object itself, not include child objects,
for example, TabletMeta::get_metadata_size does not include the memory of TabletSchema.
Note, the memory allocated by Doris Allocator is not included.

There are some special situations that need to be noted:
1. when the derived Class override copy constructor, you'd better update memory size(call update_metadata_size) if derived class's
memory changed in its copy constructor or you not call MetadataAdder's copy constructor.
Expand All @@ -111,6 +118,31 @@ class MetadataAdder {

static void dump_metadata_object(RuntimeProfile* object_heap_dump_snapshot);

static int64_t get_all_tablets_size() {
return g_tablet_meta_mem_bytes.get_value() + g_tablet_column_mem_bytes.get_value() +
g_tablet_index_mem_bytes.get_value() + g_tablet_schema_mem_bytes.get_value();
}

static int64_t get_all_rowsets_size() {
return g_rowset_meta_mem_bytes.get_value() + g_rowset_mem_bytes.get_value();
}

static int64_t get_all_segments_size() {
return g_segment_mem_bytes.get_value() + g_column_reader_mem_bytes.get_value() +
g_bitmap_index_reader_mem_bytes.get_value() +
g_bloom_filter_index_reader_mem_bytes.get_value() +
g_index_page_reader_mem_bytes.get_value() +
g_indexed_column_reader_mem_bytes.get_value() +
g_inverted_index_reader_mem_bytes.get_value() +
g_ordinal_index_reader_mem_bytes.get_value() +
g_zone_map_index_reader_mem_bytes.get_value();
}

// Doris currently uses the estimated segments memory as the basis, maybe it is more realistic.
static int64_t get_all_segments_estimate_size() {
return g_segment_estimate_mem_bytes.get_value();
}

protected:
MetadataAdder(const MetadataAdder& other);

Expand All @@ -122,7 +154,6 @@ class MetadataAdder {

MetadataAdder<T>& operator=(const MetadataAdder<T>& other) = default;

private:
int64_t _current_meta_size {0};

void add_mem_size(int64_t val);
Expand Down Expand Up @@ -167,7 +198,7 @@ void MetadataAdder<T>::add_mem_size(int64_t val) {
if constexpr (std::is_same_v<T, RowsetMeta>) {
g_rowset_meta_mem_bytes << val;
} else if constexpr (std::is_same_v<T, Rowset>) {
g_all_rowsets_mem_bytes << val;
g_rowset_mem_bytes << val;
} else if constexpr (std::is_same_v<T, TabletMeta>) {
g_tablet_meta_mem_bytes << val;
} else if constexpr (std::is_same_v<T, TabletColumn>) {
Expand All @@ -177,7 +208,7 @@ void MetadataAdder<T>::add_mem_size(int64_t val) {
} else if constexpr (std::is_same_v<T, TabletSchema>) {
g_tablet_schema_mem_bytes << val;
} else if constexpr (std::is_same_v<T, segment_v2::Segment>) {
g_all_segments_mem_bytes << val;
g_segment_mem_bytes << val;
} else if constexpr (std::is_same_v<T, segment_v2::ColumnReader>) {
g_column_reader_mem_bytes << val;
} else if constexpr (std::is_same_v<T, segment_v2::BitmapIndexReader>) {
Expand Down Expand Up @@ -208,7 +239,7 @@ void MetadataAdder<T>::add_num(int64_t val) {
if constexpr (std::is_same_v<T, RowsetMeta>) {
g_rowset_meta_num << val;
} else if constexpr (std::is_same_v<T, Rowset>) {
g_all_rowsets_num << val;
g_rowset_num << val;
} else if constexpr (std::is_same_v<T, TabletMeta>) {
g_tablet_meta_num << val;
} else if constexpr (std::is_same_v<T, TabletColumn>) {
Expand All @@ -218,7 +249,7 @@ void MetadataAdder<T>::add_num(int64_t val) {
} else if constexpr (std::is_same_v<T, TabletSchema>) {
g_tablet_schema_num << val;
} else if constexpr (std::is_same_v<T, segment_v2::Segment>) {
g_all_segments_num << val;
g_segment_num << val;
} else if constexpr (std::is_same_v<T, segment_v2::ColumnReader>) {
g_column_reader_num << val;
} else if constexpr (std::is_same_v<T, segment_v2::BitmapIndexReader>) {
Expand Down Expand Up @@ -250,12 +281,12 @@ void MetadataAdder<T>::dump_metadata_object(RuntimeProfile* object_heap_dump_sna
COUNTER_SET(rowset_meta_mem_bytes_counter, g_rowset_meta_mem_bytes.get_value());
COUNTER_SET(rowset_meta_num_counter, g_rowset_meta_num.get_value());

RuntimeProfile::Counter* all_rowsets_mem_bytes_counter =
ADD_COUNTER(object_heap_dump_snapshot, "AllRowsetsMemBytes", TUnit::BYTES);
RuntimeProfile::Counter* all_rowsets_num_counter =
ADD_COUNTER(object_heap_dump_snapshot, "AllRowsetsNum", TUnit::UNIT);
COUNTER_SET(all_rowsets_mem_bytes_counter, g_all_rowsets_mem_bytes.get_value());
COUNTER_SET(all_rowsets_num_counter, g_all_rowsets_num.get_value());
RuntimeProfile::Counter* rowset_mem_bytes_counter =
ADD_COUNTER(object_heap_dump_snapshot, "RowsetMemBytes", TUnit::BYTES);
RuntimeProfile::Counter* rowset_num_counter =
ADD_COUNTER(object_heap_dump_snapshot, "RowsetNum", TUnit::UNIT);
COUNTER_SET(rowset_mem_bytes_counter, g_rowset_mem_bytes.get_value());
COUNTER_SET(rowset_num_counter, g_rowset_num.get_value());

RuntimeProfile::Counter* tablet_meta_mem_bytes_counter =
ADD_COUNTER(object_heap_dump_snapshot, "TabletMetaMemBytes", TUnit::BYTES);
Expand Down Expand Up @@ -285,12 +316,12 @@ void MetadataAdder<T>::dump_metadata_object(RuntimeProfile* object_heap_dump_sna
COUNTER_SET(tablet_schema_mem_bytes_counter, g_tablet_schema_mem_bytes.get_value());
COUNTER_SET(tablet_schema_num_counter, g_tablet_schema_num.get_value());

RuntimeProfile::Counter* all_segments_mem_bytes_counter =
ADD_COUNTER(object_heap_dump_snapshot, "AllSegmentsMemBytes", TUnit::BYTES);
RuntimeProfile::Counter* all_segments_num_counter =
ADD_COUNTER(object_heap_dump_snapshot, "AllSegmentsNum", TUnit::UNIT);
COUNTER_SET(all_segments_mem_bytes_counter, g_all_segments_mem_bytes.get_value());
COUNTER_SET(all_segments_num_counter, g_all_segments_num.get_value());
RuntimeProfile::Counter* segment_mem_bytes_counter =
ADD_COUNTER(object_heap_dump_snapshot, "SegmentMemBytes", TUnit::BYTES);
RuntimeProfile::Counter* segment_num_counter =
ADD_COUNTER(object_heap_dump_snapshot, "SegmentNum", TUnit::UNIT);
COUNTER_SET(segment_mem_bytes_counter, g_segment_mem_bytes.get_value());
COUNTER_SET(segment_num_counter, g_segment_num.get_value());

RuntimeProfile::Counter* column_reader_mem_bytes_counter =
ADD_COUNTER(object_heap_dump_snapshot, "ColumnReaderMemBytes", TUnit::BYTES);
Expand Down
5 changes: 3 additions & 2 deletions be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ Status IndexedColumnReader::load(bool use_page_cache, bool kept_in_memory,
_sole_data_page = PagePointer(_meta.ordinal_index_meta().root_page());
} else {
RETURN_IF_ERROR(load_index_page(_meta.ordinal_index_meta().root_page(),
&_ordinal_index_page_handle, &_ordinal_index_reader));
&_ordinal_index_page_handle,
_ordinal_index_reader.get()));
_has_index_page = true;
}
}
Expand All @@ -92,7 +93,7 @@ Status IndexedColumnReader::load(bool use_page_cache, bool kept_in_memory,
_sole_data_page = PagePointer(_meta.value_index_meta().root_page());
} else {
RETURN_IF_ERROR(load_index_page(_meta.value_index_meta().root_page(),
&_value_index_page_handle, &_value_index_reader));
&_value_index_page_handle, _value_index_reader.get()));
_has_index_page = true;
}
}
Expand Down
15 changes: 9 additions & 6 deletions be/src/olap/rowset/segment_v2/indexed_column_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,12 @@ class EncodingInfo;
class IndexedColumnReader : public MetadataAdder<IndexedColumnReader> {
public:
explicit IndexedColumnReader(io::FileReaderSPtr file_reader, const IndexedColumnMetaPB& meta)
: _file_reader(std::move(file_reader)), _meta(meta) {}
: _file_reader(std::move(file_reader)), _meta(meta) {
_ordinal_index_reader = std::make_unique<IndexPageReader>();
_value_index_reader = std::make_unique<IndexPageReader>();
}

~IndexedColumnReader();
~IndexedColumnReader() override;

Status load(bool use_page_cache, bool kept_in_memory,
OlapReaderStatistics* index_load_stats = nullptr);
Expand Down Expand Up @@ -90,8 +93,8 @@ class IndexedColumnReader : public MetadataAdder<IndexedColumnReader> {
bool _has_index_page = false;
// valid only when the column contains only one data page
PagePointer _sole_data_page;
IndexPageReader _ordinal_index_reader;
IndexPageReader _value_index_reader;
std::unique_ptr<IndexPageReader> _ordinal_index_reader;
std::unique_ptr<IndexPageReader> _value_index_reader;
PageHandle _ordinal_index_page_handle;
PageHandle _value_index_page_handle;

Expand All @@ -108,8 +111,8 @@ class IndexedColumnIterator {
explicit IndexedColumnIterator(const IndexedColumnReader* reader,
OlapReaderStatistics* stats = nullptr)
: _reader(reader),
_ordinal_iter(&reader->_ordinal_index_reader),
_value_iter(&reader->_value_index_reader),
_ordinal_iter(reader->_ordinal_index_reader.get()),
_value_iter(reader->_value_index_reader.get()),
_stats(stats) {}

// Seek to the given ordinal entry. Entry 0 is the first entry.
Expand Down
12 changes: 6 additions & 6 deletions be/src/olap/rowset/segment_v2/page_handle.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
#include "util/slice.h" // for Slice

namespace doris {

// After disable page cache, sometimes we need to know the percentage of data pages in query memory.
inline bvar::Adder<int64_t> g_page_no_cache_mem_bytes("doris_page_no_cache_mem_bytes");

namespace segment_v2 {

// When a column page is read into memory, we use this to store it.
Expand All @@ -37,8 +41,7 @@ class PageHandle {
// This class will take the ownership of input data's memory. It will
// free it when deconstructs.
PageHandle(DataPage* data) : _is_data_owner(true), _data(data) {
_page_tracker = ExecEnv::GetInstance()->page_no_cache_mem_tracker();
_page_tracker->consume(_data->capacity());
g_page_no_cache_mem_bytes << _data->capacity();
}

// This class will take the content of cache data, and will make input
Expand All @@ -51,20 +54,18 @@ class PageHandle {
// we can use std::exchange if we switch c++14 on
std::swap(_is_data_owner, other._is_data_owner);
std::swap(_data, other._data);
_page_tracker = ExecEnv::GetInstance()->page_no_cache_mem_tracker();
}

PageHandle& operator=(PageHandle&& other) noexcept {
std::swap(_is_data_owner, other._is_data_owner);
std::swap(_data, other._data);
_cache_data = std::move(other._cache_data);
_page_tracker = ExecEnv::GetInstance()->page_no_cache_mem_tracker();
return *this;
}

~PageHandle() {
if (_is_data_owner) {
_page_tracker->release(_data->capacity());
g_page_no_cache_mem_bytes << -_data->capacity();
delete _data;
} else {
DCHECK(_data == nullptr);
Expand All @@ -85,7 +86,6 @@ class PageHandle {
// otherwise _cache_data is valid, and data is belong to cache.
bool _is_data_owner = false;
DataPage* _data = nullptr;
std::shared_ptr<MemTracker> _page_tracker;
PageCacheHandle _cache_data;

// Don't allow copy and assign
Expand Down
17 changes: 14 additions & 3 deletions be/src/olap/rowset/segment_v2/segment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,11 @@ Segment::Segment(uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr table
_tablet_schema(std::move(tablet_schema)),
_idx_file_info(idx_file_info) {}

Segment::~Segment() = default;
Segment::~Segment() {
g_segment_estimate_mem_bytes << -_tracked_meta_mem_usage;
// if failed, fix `_tracked_meta_mem_usage` accuracy
DCHECK(_tracked_meta_mem_usage == meta_mem_usage());
}

io::UInt128Wrapper Segment::file_cache_key(std::string_view rowset_id, uint32_t seg_id) {
return io::BlockFileCache::hash(fmt::format("{}_{}.dat", rowset_id, seg_id));
Expand All @@ -174,6 +178,12 @@ int64_t Segment::get_metadata_size() const {
(_pk_index_meta ? _pk_index_meta->ByteSizeLong() : 0);
}

void Segment::update_metadata_size() {
xinyiZzz marked this conversation as resolved.
Show resolved Hide resolved
MetadataAdder::update_metadata_size();
g_segment_estimate_mem_bytes << _meta_mem_usage - _tracked_meta_mem_usage;
_tracked_meta_mem_usage = _meta_mem_usage;
}

Status Segment::_open() {
_footer_pb = std::make_unique<SegmentFooterPB>();
RETURN_IF_ERROR(_parse_footer(_footer_pb.get()));
Expand All @@ -191,8 +201,6 @@ Status Segment::_open() {
_meta_mem_usage += _pk_index_meta->ByteSizeLong();
}

update_metadata_size();

_meta_mem_usage += sizeof(*this);
_meta_mem_usage += _tablet_schema->num_columns() * config::estimated_mem_per_column_reader;

Expand All @@ -201,6 +209,8 @@ Status Segment::_open() {
// 0.01 comes from PrimaryKeyIndexBuilder::init
_meta_mem_usage += BloomFilter::optimal_bit_num(_num_rows, 0.01) / 8;

update_metadata_size();

return Status::OK();
}

Expand Down Expand Up @@ -467,6 +477,7 @@ Status Segment::_load_pk_bloom_filter() {
// for BE UT "segment_cache_test"
return _load_pk_bf_once.call([this] {
_meta_mem_usage += 100;
update_metadata_size();
return Status::OK();
});
}
Expand Down
7 changes: 4 additions & 3 deletions be/src/olap/rowset/segment_v2/segment.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ class IDataType;
class ShortKeyIndexDecoder;
class Schema;
class StorageReadOptions;
class MemTracker;
class PrimaryKeyIndexReader;
class RowwiseIterator;
struct RowLocation;
Expand Down Expand Up @@ -93,6 +92,7 @@ class Segment : public std::enable_shared_from_this<Segment>, public MetadataAdd
~Segment();

int64_t get_metadata_size() const override;
void update_metadata_size();

Status new_iterator(SchemaSPtr schema, const StorageReadOptions& read_options,
std::unique_ptr<RowwiseIterator>* iter);
Expand Down Expand Up @@ -163,6 +163,8 @@ class Segment : public std::enable_shared_from_this<Segment>, public MetadataAdd

io::FileReaderSPtr file_reader() { return _file_reader; }

// Including the column reader memory.
// another method `get_metadata_size` not include the column reader, only the segment object itself.
int64_t meta_mem_usage() const { return _meta_mem_usage; }

// Identify the column by unique id or path info
Expand Down Expand Up @@ -249,9 +251,8 @@ class Segment : public std::enable_shared_from_this<Segment>, public MetadataAdd
// 1. Tracking memory use by segment meta data such as footer or index page.
// 2. Tracking memory use by segment column reader
// The memory consumed by querying is tracked in segment iterator.
// TODO: Segment::_meta_mem_usage Unknown value overflow, causes the value of SegmentMeta mem tracker
// is similar to `-2912341218700198079`. So, temporarily put it in experimental type tracker.
int64_t _meta_mem_usage;
int64_t _tracked_meta_mem_usage = 0;

RowsetId _rowset_id;
TabletSchemaSPtr _tablet_schema;
Expand Down
1 change: 0 additions & 1 deletion be/src/olap/rowset_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ namespace doris {
class CalcDeleteBitmapToken;
class FlushToken;
class MemTable;
class MemTracker;
class StorageEngine;
class TupleDescriptor;
class SlotDescriptor;
Expand Down
Loading
Loading