Skip to content

Commit

Permalink
npd: test a solution to a bug in npd
Browse files Browse the repository at this point in the history
  • Loading branch information
paulfantom committed Nov 22, 2024
1 parent 15937c7 commit 8e45a10
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 2 deletions.
3 changes: 2 additions & 1 deletion base/node-problem-detector/repository.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ metadata:
namespace: node-problem-detector
spec:
interval: 5m
url: https://charts.deliveryhero.io/
oci: true
url: oci://ghcr.io/deliveryhero/helm-charts
42 changes: 41 additions & 1 deletion base/node-problem-detector/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,47 @@ resources:
memory: 18Mi

settings:
#log_monitors:
# - /config/kernel-monitor.json
# - /config/readonly-monitor.json
# - /config/health-checker-containerd.json
log_monitors:
- /config/kernel-monitor.json
- /config/readonly-monitor.json
- /config/health-checker-containerd.json
- /custom-config/docker-monitor-filelog.json

custom_monitor_definitions:
health-checker-containerd.json: |
{
"plugin": "custom",
"pluginConfig": {
"invoke_interval": "10s",
"timeout": "3m",
"max_output_length": "80",
"concurrency": "1"
},
"source": "health-checker",
"metricsReporting": true,
"conditions": [
{
"type": "ContainerRuntimeUnhealthy",
"reason": "ContainerRuntimeIsHealthy",
"message": "Container runtime on the node is functioning properly"
}
],
"rules": [
{
"type": "permanent",
"condition": "ContainerRuntimeUnhealthy",
"reason": "ContainerdUnhealthy",
"path": "/home/kubernetes/bin/health-checker",
"args": [
"--component=cri",
"--enable-repair=true",
"--cooldown-time=2m",
"--health-check-timeout=60s"
],
"timeout": "3m"
}
]
}

0 comments on commit 8e45a10

Please sign in to comment.