From 8e45a101a1a5355d2ebc1be63505eff3cf9566a5 Mon Sep 17 00:00:00 2001 From: paulfantom Date: Fri, 22 Nov 2024 13:43:06 +0100 Subject: [PATCH] npd: test a solution to a bug in npd --- base/node-problem-detector/repository.yaml | 3 +- base/node-problem-detector/values.yaml | 42 +++++++++++++++++++++- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/base/node-problem-detector/repository.yaml b/base/node-problem-detector/repository.yaml index 967b38f49..8777a12af 100644 --- a/base/node-problem-detector/repository.yaml +++ b/base/node-problem-detector/repository.yaml @@ -5,4 +5,5 @@ metadata: namespace: node-problem-detector spec: interval: 5m - url: https://charts.deliveryhero.io/ + oci: true + url: oci://ghcr.io/deliveryhero/helm-charts diff --git a/base/node-problem-detector/values.yaml b/base/node-problem-detector/values.yaml index b1bc5dede..250580956 100644 --- a/base/node-problem-detector/values.yaml +++ b/base/node-problem-detector/values.yaml @@ -20,7 +20,47 @@ resources: memory: 18Mi settings: + #log_monitors: + # - /config/kernel-monitor.json + # - /config/readonly-monitor.json + # - /config/health-checker-containerd.json log_monitors: - /config/kernel-monitor.json - /config/readonly-monitor.json - - /config/health-checker-containerd.json + - /custom-config/docker-monitor-filelog.json + + custom_monitor_definitions: + health-checker-containerd.json: | + { + "plugin": "custom", + "pluginConfig": { + "invoke_interval": "10s", + "timeout": "3m", + "max_output_length": "80", + "concurrency": "1" + }, + "source": "health-checker", + "metricsReporting": true, + "conditions": [ + { + "type": "ContainerRuntimeUnhealthy", + "reason": "ContainerRuntimeIsHealthy", + "message": "Container runtime on the node is functioning properly" + } + ], + "rules": [ + { + "type": "permanent", + "condition": "ContainerRuntimeUnhealthy", + "reason": "ContainerdUnhealthy", + "path": "/home/kubernetes/bin/health-checker", + "args": [ + "--component=cri", + "--enable-repair=true", + "--cooldown-time=2m", + "--health-check-timeout=60s" + ], + "timeout": "3m" + } + ] + }