diff --git a/CHANGELOG.md b/CHANGELOG.md index 5bc028cc..affffd8c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## master / unreleased +* [ENHANCEMENT] Alertmanager: Add `grpc` port #494 +* [ENHANCEMENT] Alertmanager: Expose 9094 TCP and UDP for gossip cluster #494 + * If the AlertManager headless service existed prior to applying the change, it will have only one port set, which is a known issue. See [kubernetes/kubernetes#39188](https://github.com/kubernetes/kubernetes/issues/39188). Re-creating the headless service can resolve this issue + ## 2.2.0 / 2024-01-16 * [CHANGE] Removed `config.storage.engine` and any reference of it #488 diff --git a/README.md b/README.md index a3c1d335..e9374ce8 100644 --- a/README.md +++ b/README.md @@ -216,6 +216,7 @@ Kubernetes: `^1.19.0-0` | compactor.​terminationGracePeriodSeconds | int | `240` | | | compactor.​tolerations | list | `[]` | | | compactor.​topologySpreadConstraints | list | `[]` | | +| config.​alertmanager.​cluster | object | `{"listen_address":"0.0.0.0:9094"}` | Disable alertmanager gossip cluster by setting empty listen_address to empty string | | config.​alertmanager.​enable_api | bool | `false` | Enable the experimental alertmanager config api. | | config.​alertmanager.​external_url | string | `"/api/prom/alertmanager"` | | | config.​api.​prometheus_http_prefix | string | `"/prometheus"` | | diff --git a/ci/test-deployment-values.yaml b/ci/test-deployment-values.yaml index 0cae94b9..af3b5bdc 100644 --- a/ci/test-deployment-values.yaml +++ b/ci/test-deployment-values.yaml @@ -125,7 +125,7 @@ runtimeconfigmap: annotations: foo: bar alertmanager: - replicas: 1 + replicas: 3 statefulSet: enabled: false extraVolumes: diff --git a/ci/test-sts-values.yaml b/ci/test-sts-values.yaml index 6ee029c0..5825575a 100644 --- a/ci/test-sts-values.yaml +++ b/ci/test-sts-values.yaml @@ -116,7 +116,7 @@ runtimeconfigmap: annotations: foo: bar alertmanager: - replicas: 1 + replicas: 3 statefulSet: enabled: true extraVolumes: diff --git a/templates/alertmanager/alertmanager-statefulset.yaml b/templates/alertmanager/alertmanager-statefulset.yaml index 60d87d25..7d6c0d4e 100644 --- a/templates/alertmanager/alertmanager-statefulset.yaml +++ b/templates/alertmanager/alertmanager-statefulset.yaml @@ -1,3 +1,5 @@ +{{- $svcClusterAddress := ((.Values.config.alertmanager.cluster).listen_address) | default "0.0.0.0:9094" }} +{{- $svcClusterPort := (split ":" $svcClusterAddress)._1 }} {{- if .Values.alertmanager.enabled -}} {{- if .Values.alertmanager.statefulSet.enabled -}} apiVersion: apps/v1 @@ -152,6 +154,15 @@ spec: args: - "-target=alertmanager" - "-config.file=/etc/cortex/cortex.yaml" + {{- if and (gt (int .Values.alertmanager.replicas) 1) (ne .Values.config.alertmanager.cluster.listen_address "") }} + {{- $fullName := include "cortex.alertmanagerFullname" . }} + {{- $peers := list }} + {{- range $i := until (int .Values.alertmanager.replicas) }} + {{- $peer := printf "%s-%d.%s-headless.%s.svc.cluster.local:%s" $fullName $i $fullName $.Release.Namespace $svcClusterPort }} + {{- $peers = append $peers $peer }} + {{- end }} + - "-alertmanager.cluster.peers={{ join "," $peers }}" + {{- end }} {{- range $key, $value := .Values.alertmanager.extraArgs }} - "-{{ $key }}={{ $value }}" {{- end }} @@ -175,6 +186,15 @@ spec: - name: gossip containerPort: {{ .Values.config.memberlist.bind_port }} protocol: TCP + - name: grpc + containerPort: {{ .Values.config.server.grpc_listen_port }} + protocol: TCP + - containerPort: {{ $svcClusterPort }} + name: alert-clu-tcp + protocol: TCP + - containerPort: {{ $svcClusterPort }} + name: alert-clu-udp + protocol: UDP startupProbe: {{- toYaml .Values.alertmanager.startupProbe | nindent 12 }} livenessProbe: diff --git a/templates/alertmanager/alertmanager-svc-headless.yaml b/templates/alertmanager/alertmanager-svc-headless.yaml index 420c90b6..53db5857 100644 --- a/templates/alertmanager/alertmanager-svc-headless.yaml +++ b/templates/alertmanager/alertmanager-svc-headless.yaml @@ -1,5 +1,4 @@ {{- if .Values.alertmanager.enabled -}} -{{- if .Values.alertmanager.statefulSet.enabled -}} apiVersion: v1 kind: Service metadata: @@ -21,7 +20,18 @@ spec: protocol: TCP name: http-metrics targetPort: http-metrics + - port: {{ .Values.config.server.grpc_listen_port }} + protocol: TCP + name: grpc + targetPort: grpc + - port: 9094 + protocol: UDP + name: alert-clu-udp + targetPort: alert-clu-udp + - port: 9094 + protocol: TCP + name: alert-clu-tcp + targetPort: alert-clu-tcp selector: {{- include "cortex.alertmanagerSelectorLabels" . | nindent 4 }} {{- end -}} -{{- end -}} diff --git a/values.yaml b/values.yaml index 908b4cf4..1d9012c0 100644 --- a/values.yaml +++ b/values.yaml @@ -126,6 +126,10 @@ config: runtime_config: file: /etc/cortex-runtime-config/runtime_config.yaml alertmanager: + # -- Enable alertmanager gossip cluster + # -- Disable alertmanager gossip cluster by setting empty listen_address to empty string + cluster: + listen_address: '0.0.0.0:9094' # -- Enable the experimental alertmanager config api. enable_api: false external_url: '/api/prom/alertmanager'