Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use gRPC-based liveness probe instead of tetra status #2478

Merged
merged 2 commits into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions cmd/tetragon/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
"github.com/cilium/tetragon/pkg/fileutils"
"github.com/cilium/tetragon/pkg/filters"
tetragonGrpc "github.com/cilium/tetragon/pkg/grpc"
"github.com/cilium/tetragon/pkg/health"
"github.com/cilium/tetragon/pkg/logger"
"github.com/cilium/tetragon/pkg/metrics"
"github.com/cilium/tetragon/pkg/metrics/metricsconfig"
Expand Down Expand Up @@ -440,6 +441,10 @@ func tetragonExecute() error {
}
}

if option.Config.HealthServerAddress != "" {
health.StartHealthServer(ctx, option.Config.HealthServerAddress, option.Config.HealthServerInterval)
}

log.WithField("enabled", option.Config.ExportFilename != "").WithField("fileName", option.Config.ExportFilename).Info("Exporter configuration")
obs.AddListener(pm)
saveInitInfo()
Expand Down
3 changes: 3 additions & 0 deletions docs/content/en/docs/reference/helm-chart.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions docs/data/tetragon_flags.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions install/kubernetes/tetragon/README.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 4 additions & 9 deletions install/kubernetes/tetragon/templates/_container_tetragon.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -67,17 +67,12 @@
{{- if .Values.tetragon.livenessProbe }}
livenessProbe:
{{- toYaml .Values.tetragon.livenessProbe | nindent 4 }}
{{- else if .Values.tetragon.grpc.enabled }}
{{- else if .Values.tetragon.healthGrpc.enabled }}
livenessProbe:
timeoutSeconds: 60
exec:
command:
- tetra
- status
- --server-address
- {{ .Values.tetragon.grpc.address }}
- --retries
- "5"
grpc:
port: {{ .Values.tetragon.healthGrpc.port }}
service: "liveness"
{{- end -}}
{{- end -}}

Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ data:
server-address: {{ .Values.tetragon.grpc.address }}
{{- else }}
{{- end }}
{{- if .Values.tetragon.healthGrpc.enabled }}
health-server-address: :{{ .Values.tetragon.healthGrpc.port }}
health-server-interval: {{ .Values.tetragon.healthGrpc.interval | quote }}
{{- else }}
health-server-address: ""
{{- end }}
{{- if .Values.tetragon.tcpStatsSampleSegs }}
tcp-stats-sample-segs: {{ .Values.tetragon.tcpStatsSampleSegs | quote }}
{{- end }}
Expand Down
7 changes: 7 additions & 0 deletions install/kubernetes/tetragon/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,13 @@ tetragon:
enablePolicyFilterDebug: false
# -- Enable latency monitoring in message handling
enableMsgHandlingLatency: false
healthGrpc:
# -- Whether to enable health gRPC server.
enabled: true
# -- The port at which to expose health gRPC.
port: 6789
# -- The interval at which to check the health of the agent.
interval: 10
# -- Location of the host proc filesystem in the runtime environment. If the runtime runs in the
# host, the path is /proc. Exceptions to this are environments like kind, where the runtime itself
# does not run on the host.
Expand Down
67 changes: 67 additions & 0 deletions pkg/health/server.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright Authors of Tetragon

package health

import (
"context"
"net"
"time"

"github.com/cilium/tetragon/api/v1/tetragon"
"github.com/cilium/tetragon/pkg/logger"
"google.golang.org/grpc"
gh "google.golang.org/grpc/health"
"google.golang.org/grpc/health/grpc_health_v1"
)

var (
log = logger.GetLogger()
)

func StartHealthServer(ctx context.Context, address string, interval int) {
// Create a new health server and mark it as serving.
healthServer := gh.NewServer()
healthServer.SetServingStatus("liveness", grpc_health_v1.HealthCheckResponse_SERVING)

// Create a new gRPC server for health checks and register the healthServer.
grpcHealthServer := grpc.NewServer()
grpc_health_v1.RegisterHealthServer(grpcHealthServer, healthServer)

// Start the gRPC server for the health checks.
go func() {
// the gRPC server for the health checks listens on port 6789
listener, err := net.Listen("tcp", address)
if err != nil {
log.WithError(err).Fatal("Failed to listen for gRPC healthserver")
}

log.WithField("address", address).WithField("interval", interval).Info("Starting gRPC health server")
if err = grpcHealthServer.Serve(listener); err != nil {
log.WithError(err).Fatal("Failed to start gRPC healthserver")
}
}()

// Check the agent health periodically. To check if our agent is health we call
// health.GetHealth() and we report the status to the healthServer.
go func() {
ticker := time.NewTicker(time.Duration(interval) * time.Second)
for {
select {
case <-ticker.C:
servingStatus := grpc_health_v1.HealthCheckResponse_NOT_SERVING
if response, err := GetHealth(); err == nil {
if st := response.GetHealthStatus(); len(st) > 0 && st[0].Status == tetragon.HealthStatusResult_HEALTH_STATUS_RUNNING {
servingStatus = grpc_health_v1.HealthCheckResponse_SERVING
}
}
healthServer.SetServingStatus("liveness", servingStatus)
case <-ctx.Done():
ticker.Stop()
healthServer.Shutdown() // set all services to NOT_SERVING
grpcHealthServer.Stop()
return
}
}
}()
}
3 changes: 3 additions & 0 deletions pkg/option/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ type config struct {
CgroupRate CgroupRate

UsernameMetadata int

HealthServerAddress string
HealthServerInterval int
}

var (
Expand Down
8 changes: 8 additions & 0 deletions pkg/option/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,9 @@ const (
KeyCgroupRate = "cgroup-rate"

KeyUsernameMetadata = "username-metadata"

KeyHealthServerAddress = "health-server-address"
KeyHealthTimeInterval = "health-server-interval"
)

type UsernameMetadaCode int
Expand Down Expand Up @@ -211,6 +214,8 @@ func ReadAndSetFlags() error {
}

Config.CgroupRate = ParseCgroupRate(viper.GetString(KeyCgroupRate))
Config.HealthServerAddress = viper.GetString(KeyHealthServerAddress)
Config.HealthServerInterval = viper.GetInt(KeyHealthTimeInterval)
return nil
}

Expand Down Expand Up @@ -362,4 +367,7 @@ func AddFlags(flags *pflag.FlagSet) {
flags.String(KeyUsernameMetadata, "disabled", "Resolve UIDs to user names for processes running in host namespace")

flags.String(KeyCgroupRate, "", "Base sensor events cgroup rate <events,interval> disabled by default ('1000/1s' means rate 1000 events per second")

flags.String(KeyHealthServerAddress, ":6789", "Health server address (e.g. ':6789')(use '' to disabled it)")
flags.Int(KeyHealthTimeInterval, 10, "Health server interval in seconds")
}
117 changes: 117 additions & 0 deletions vendor/google.golang.org/grpc/health/client.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading