diff --git a/argocd-helm-charts/prometheus-linuxaid/templates/prometheus.yaml b/argocd-helm-charts/prometheus-linuxaid/templates/prometheus.yaml index 7c358ff98..200ca16ce 100644 --- a/argocd-helm-charts/prometheus-linuxaid/templates/prometheus.yaml +++ b/argocd-helm-charts/prometheus-linuxaid/templates/prometheus.yaml @@ -47,5 +47,13 @@ spec: requests: storage: {{ .Values.prometheus.storage.size }} storageClassName: {{ .Values.prometheus.storage.className }} + containers: + - name: prometheus + startupProbe: + failureThreshold: {{ .Values.prometheus.startupProbe.failureThreshold }} + livenessProbe: + failureThreshold: {{ .Values.prometheus.livenessProbe.failureThreshold }} + readinessProbe: + failureThreshold: {{ .Values.prometheus.readinessProbe.failureThreshold }} version: {{ .Values.prometheus.version }} enableRemoteWriteReceiver: true diff --git a/argocd-helm-charts/prometheus-linuxaid/values.yaml b/argocd-helm-charts/prometheus-linuxaid/values.yaml index ee44646b0..15490fbb0 100644 --- a/argocd-helm-charts/prometheus-linuxaid/values.yaml +++ b/argocd-helm-charts/prometheus-linuxaid/values.yaml @@ -12,6 +12,14 @@ prometheus: requests: cpu: 600m memory: 2Gi + # in prometheus pods that takes some recovery time due to large number of WALs we need to increase the probe time. + # ref. - https://github.com/prometheus-operator/prometheus-operator/blob/378d36df448366414de53a66a64020cd053002b7/pkg/prometheus/statefulset.go#L641-L643 + startupProbe: + failureThreshold: 240 + livenessProbe: + failureThreshold: 240 + readinessProbe: + failureThreshold: 240 grafana: server: graphs.obmondo.com version: 11.1.4