From 35263c7013ea11a1c5fbba1a3768a04a10acaec1 Mon Sep 17 00:00:00 2001 From: zhengyu Date: Fri, 29 Nov 2024 10:40:01 +0800 Subject: [PATCH] [fix](cloud) serialize cache init to avoid unstable cache pick (#44429) The original paralleled cache init will causing unstable pick of cache base path because the choice depends on the order of init which could be different after each BE reboot. Thus, cause cache missing and duplicate cache block across multiple caches (disk space waste). This commit will serialize the init process of multiple cache and using fixed order, i.e. the order explicitly declared in be conf: file_cache_path. Signed-off-by: zhengyu --- be/src/runtime/exec_env_init.cpp | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp index b91adb4de9836e..d70bedbfe8ad6a 100644 --- a/be/src/runtime/exec_env_init.cpp +++ b/be/src/runtime/exec_env_init.cpp @@ -422,33 +422,24 @@ void ExecEnv::init_file_cache_factory(std::vector& cache_paths << ", reason=" << rest.msg(); exit(-1); } - std::vector file_cache_init_threads; - std::list cache_status; + doris::Status cache_status; for (auto& cache_path : cache_paths) { if (cache_path_set.find(cache_path.path) != cache_path_set.end()) { LOG(WARNING) << fmt::format("cache path {} is duplicate", cache_path.path); continue; } - file_cache_init_threads.emplace_back([&, status = &cache_status.emplace_back()]() { - *status = doris::io::FileCacheFactory::instance()->create_file_cache( - cache_path.path, cache_path.init_settings()); - }); - - cache_path_set.emplace(cache_path.path); - } - - for (std::thread& thread : file_cache_init_threads) { - if (thread.joinable()) { - thread.join(); - } - } - for (const auto& status : cache_status) { - if (!status.ok()) { - LOG(FATAL) << "failed to init file cache, err: " << status; - exit(-1); + cache_status = doris::io::FileCacheFactory::instance()->create_file_cache( + cache_path.path, cache_path.init_settings()); + if (!cache_status.ok()) { + if (!doris::config::ignore_broken_disk) { + LOG(FATAL) << "failed to init file cache, err: " << cache_status; + exit(-1); + } + LOG(WARNING) << "failed to init file cache, err: " << cache_status; } + cache_path_set.emplace(cache_path.path); } }