diff --git a/be/src/common/cgroup_memory_ctl.cpp b/be/src/common/cgroup_memory_ctl.cpp index a29432bdb4ede5..dddcbd50338d82 100644 --- a/be/src/common/cgroup_memory_ctl.cpp +++ b/be/src/common/cgroup_memory_ctl.cpp @@ -27,6 +27,7 @@ #include "common/status.h" #include "util/cgroup_util.h" +#include "util/error_util.h" namespace doris { @@ -84,14 +85,23 @@ struct CgroupsV2Reader : CGroupMemoryCtl::ICgroupsReader { : _mount_file_dir(std::move(mount_file_dir)) {} Status read_memory_limit(int64_t* value) override { - RETURN_IF_ERROR(CGroupUtil::read_int_line_from_cgroup_file((_mount_file_dir / "memory.max"), - value)); + std::filesystem::path file_path = _mount_file_dir / "memory.max"; + std::string line; + std::ifstream file_stream(file_path, std::ios::in); + getline(file_stream, line); + if (file_stream.fail() || file_stream.bad()) { + return Status::CgroupError("Error reading {}: {}", file_path.string(), + get_str_err_msg()); + } + if (line == "max") { + *value = std::numeric_limits::max(); + return Status::OK(); + } + RETURN_IF_ERROR(CGroupUtil::read_int_line_from_cgroup_file(file_path, value)); return Status::OK(); } Status read_memory_usage(int64_t* value) override { - // memory.current contains a single number - // the reason why we subtract it described here: https://github.com/ClickHouse/ClickHouse/issues/64652#issuecomment-2149630667 RETURN_IF_ERROR(CGroupUtil::read_int_line_from_cgroup_file( (_mount_file_dir / "memory.current"), value)); std::unordered_map metrics_map; @@ -100,7 +110,12 @@ struct CgroupsV2Reader : CGroupMemoryCtl::ICgroupsReader { if (*value < metrics_map["inactive_file"]) { return Status::CgroupError("CgroupsV2Reader read_memory_usage negative memory usage"); } + // the reason why we subtract inactive_file described here: + // https://github.com/ClickHouse/ClickHouse/issues/64652#issuecomment-2149630667 *value -= metrics_map["inactive_file"]; + // Part of "slab" that might be reclaimed, such as dentries and inodes. + // https://arthurchiao.art/blog/cgroupv2-zh/ + *value -= metrics_map["slab_reclaimable"]; return Status::OK(); } diff --git a/be/src/util/mem_info.cpp b/be/src/util/mem_info.cpp index b1bcfdcc56b430..fef10e679e675a 100644 --- a/be/src/util/mem_info.cpp +++ b/be/src/util/mem_info.cpp @@ -196,9 +196,10 @@ void MemInfo::refresh_proc_meminfo() { _s_cgroup_mem_limit = std::numeric_limits::max(); // find cgroup limit failed, wait 300s, 1000 * 100ms. _s_cgroup_mem_refresh_wait_times = -3000; - LOG(INFO) << "Refresh cgroup memory limit failed, refresh again after 300s, cgroup " - "mem limit: " - << _s_cgroup_mem_limit; + LOG(WARNING) + << "Refresh cgroup memory limit failed, refresh again after 300s, cgroup " + "mem limit: " + << _s_cgroup_mem_limit << ", " << status; } else { // wait 10s, 100 * 100ms, avoid too frequently. _s_cgroup_mem_refresh_wait_times = -100; @@ -207,11 +208,16 @@ void MemInfo::refresh_proc_meminfo() { _s_cgroup_mem_refresh_wait_times++; } + // cgroup mem limit is refreshed every 10 seconds, + // cgroup mem usage is refreshed together with memInfo every time, which is very frequent. if (_s_cgroup_mem_limit != std::numeric_limits::max()) { auto status = CGroupMemoryCtl::find_cgroup_mem_usage(&_s_cgroup_mem_usage); if (!status.ok()) { _s_cgroup_mem_usage = std::numeric_limits::min(); _s_cgroup_mem_refresh_state = false; + LOG_EVERY_N(WARNING, 500) + << "Refresh cgroup memory usage failed, cgroup mem limit: " + << _s_cgroup_mem_limit << ", " << status; } else { _s_cgroup_mem_refresh_state = true; } @@ -274,6 +280,12 @@ void MemInfo::refresh_proc_meminfo() { mem_available = _mem_info_bytes["MemAvailable"]; } if (_s_cgroup_mem_refresh_state) { + // Note, CgroupV2 MemAvailable is usually a little smaller than Process MemAvailable. + // Process `MemAvailable = MemFree - LowWaterMark + (PageCache - min(PageCache / 2, LowWaterMark))`, + // from `MemAvailable` in `/proc/meminfo`, calculated by OS. + // CgroupV2 `MemAvailable = cgroup_mem_limit - cgroup_mem_usage`, + // `cgroup_mem_usage = memory.current - inactive_file - slab_reclaimable`, in fact, + // there seems to be some memory that can be reused in `cgroup_mem_usage`. if (mem_available < 0) { mem_available = _s_cgroup_mem_limit - _s_cgroup_mem_usage; } else {