From 2e3c3a51b971eebb5666371a6540b98e07b4a22b Mon Sep 17 00:00:00 2001 From: Nicolas Belouin Date: Tue, 30 Apr 2024 16:04:05 +0200 Subject: [PATCH] Improve reclaimer logging Signed-off-by: Nicolas Belouin --- .../plugin_manager/device_plugin_instance_controller.rs | 1 + agent/src/plugin_manager/device_plugin_slot_reclaimer.rs | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/agent/src/plugin_manager/device_plugin_instance_controller.rs b/agent/src/plugin_manager/device_plugin_instance_controller.rs index 9ec4c8fc5..6f2e7a46d 100644 --- a/agent/src/plugin_manager/device_plugin_instance_controller.rs +++ b/agent/src/plugin_manager/device_plugin_instance_controller.rs @@ -858,6 +858,7 @@ pub async fn reconcile( plugin } Some(plugin) => { + // TODO: Add a way to handle a change in the instance's capacity. plugin.update_slots(&instance.spec.device_usage).await?; plugin.clone() } diff --git a/agent/src/plugin_manager/device_plugin_slot_reclaimer.rs b/agent/src/plugin_manager/device_plugin_slot_reclaimer.rs index 4843aa3fd..6d5ed39f6 100644 --- a/agent/src/plugin_manager/device_plugin_slot_reclaimer.rs +++ b/agent/src/plugin_manager/device_plugin_slot_reclaimer.rs @@ -75,9 +75,7 @@ pub async fn start_reclaimer(dp_manager: Arc) { loop { trace!("reclaiming unused slots - start"); if let Ok(used_slots) = get_used_slots().await { - trace!("used slots: {:?}", used_slots); let theoretical_slots = dp_manager.get_used_slots().await; - trace!("theoretical slots: {:?}", theoretical_slots); let mut new_stalled_slots: HashMap = HashMap::new(); let reclaim_iteration_start = Instant::now(); for slot_to_reclaim in theoretical_slots.difference(&used_slots) { @@ -91,6 +89,12 @@ pub async fn start_reclaimer(dp_manager: Arc) { .await .is_err() { + warn!( + "Failed to free slot {}, will try again in {}s", + slot_to_reclaim, + SLOT_RECLAIM_INTERVAL.as_secs() + ); + // To try again we just keep the slot as stalled new_stalled_slots.insert(slot_to_reclaim.to_string(), at.to_owned()); }; } else {