Skip to content

Commit

Permalink
branch-2.1: [fix](memory) Fix compatibility with CgroupV2 #44579 (#44934
Browse files Browse the repository at this point in the history
)

Cherry-picked from #44579

Co-authored-by: Xinyi Zou <[email protected]>
  • Loading branch information
github-actions[bot] and xinyiZzz authored Dec 4, 2024
1 parent dcf3eb3 commit c4bd0e8
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 7 deletions.
23 changes: 19 additions & 4 deletions be/src/common/cgroup_memory_ctl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

#include "common/status.h"
#include "util/cgroup_util.h"
#include "util/error_util.h"

namespace doris {

Expand Down Expand Up @@ -84,14 +85,23 @@ struct CgroupsV2Reader : CGroupMemoryCtl::ICgroupsReader {
: _mount_file_dir(std::move(mount_file_dir)) {}

Status read_memory_limit(int64_t* value) override {
RETURN_IF_ERROR(CGroupUtil::read_int_line_from_cgroup_file((_mount_file_dir / "memory.max"),
value));
std::filesystem::path file_path = _mount_file_dir / "memory.max";
std::string line;
std::ifstream file_stream(file_path, std::ios::in);
getline(file_stream, line);
if (file_stream.fail() || file_stream.bad()) {
return Status::CgroupError("Error reading {}: {}", file_path.string(),
get_str_err_msg());
}
if (line == "max") {
*value = std::numeric_limits<int64_t>::max();
return Status::OK();
}
RETURN_IF_ERROR(CGroupUtil::read_int_line_from_cgroup_file(file_path, value));
return Status::OK();
}

Status read_memory_usage(int64_t* value) override {
// memory.current contains a single number
// the reason why we subtract it described here: https://github.com/ClickHouse/ClickHouse/issues/64652#issuecomment-2149630667
RETURN_IF_ERROR(CGroupUtil::read_int_line_from_cgroup_file(
(_mount_file_dir / "memory.current"), value));
std::unordered_map<std::string, int64_t> metrics_map;
Expand All @@ -100,7 +110,12 @@ struct CgroupsV2Reader : CGroupMemoryCtl::ICgroupsReader {
if (*value < metrics_map["inactive_file"]) {
return Status::CgroupError("CgroupsV2Reader read_memory_usage negative memory usage");
}
// the reason why we subtract inactive_file described here:
// https://github.com/ClickHouse/ClickHouse/issues/64652#issuecomment-2149630667
*value -= metrics_map["inactive_file"];
// Part of "slab" that might be reclaimed, such as dentries and inodes.
// https://arthurchiao.art/blog/cgroupv2-zh/
*value -= metrics_map["slab_reclaimable"];
return Status::OK();
}

Expand Down
18 changes: 15 additions & 3 deletions be/src/util/mem_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,9 +196,10 @@ void MemInfo::refresh_proc_meminfo() {
_s_cgroup_mem_limit = std::numeric_limits<int64_t>::max();
// find cgroup limit failed, wait 300s, 1000 * 100ms.
_s_cgroup_mem_refresh_wait_times = -3000;
LOG(INFO) << "Refresh cgroup memory limit failed, refresh again after 300s, cgroup "
"mem limit: "
<< _s_cgroup_mem_limit;
LOG(WARNING)
<< "Refresh cgroup memory limit failed, refresh again after 300s, cgroup "
"mem limit: "
<< _s_cgroup_mem_limit << ", " << status;
} else {
// wait 10s, 100 * 100ms, avoid too frequently.
_s_cgroup_mem_refresh_wait_times = -100;
Expand All @@ -207,11 +208,16 @@ void MemInfo::refresh_proc_meminfo() {
_s_cgroup_mem_refresh_wait_times++;
}

// cgroup mem limit is refreshed every 10 seconds,
// cgroup mem usage is refreshed together with memInfo every time, which is very frequent.
if (_s_cgroup_mem_limit != std::numeric_limits<int64_t>::max()) {
auto status = CGroupMemoryCtl::find_cgroup_mem_usage(&_s_cgroup_mem_usage);
if (!status.ok()) {
_s_cgroup_mem_usage = std::numeric_limits<int64_t>::min();
_s_cgroup_mem_refresh_state = false;
LOG_EVERY_N(WARNING, 500)
<< "Refresh cgroup memory usage failed, cgroup mem limit: "
<< _s_cgroup_mem_limit << ", " << status;
} else {
_s_cgroup_mem_refresh_state = true;
}
Expand Down Expand Up @@ -274,6 +280,12 @@ void MemInfo::refresh_proc_meminfo() {
mem_available = _mem_info_bytes["MemAvailable"];
}
if (_s_cgroup_mem_refresh_state) {
// Note, CgroupV2 MemAvailable is usually a little smaller than Process MemAvailable.
// Process `MemAvailable = MemFree - LowWaterMark + (PageCache - min(PageCache / 2, LowWaterMark))`,
// from `MemAvailable` in `/proc/meminfo`, calculated by OS.
// CgroupV2 `MemAvailable = cgroup_mem_limit - cgroup_mem_usage`,
// `cgroup_mem_usage = memory.current - inactive_file - slab_reclaimable`, in fact,
// there seems to be some memory that can be reused in `cgroup_mem_usage`.
if (mem_available < 0) {
mem_available = _s_cgroup_mem_limit - _s_cgroup_mem_usage;
} else {
Expand Down

0 comments on commit c4bd0e8

Please sign in to comment.