From 7874d9d0e16adf3265be154490e216ae9eba806b Mon Sep 17 00:00:00 2001 From: William Findlay Date: Tue, 29 Aug 2023 15:39:14 -0400 Subject: [PATCH] cmd/tetra: add retries with exponential backoff [upstream commit 4f4feb8c4db064e389a99186d5934c3022583f45] Allow the user to retry failed gRPC connections with exponential backoff. Signed-off-by: William Findlay --- cmd/tetra/common/client.go | 42 ++++++++++++++++++++++++++++++++------ cmd/tetra/common/flags.go | 1 + cmd/tetra/main.go | 1 + 3 files changed, 38 insertions(+), 6 deletions(-) diff --git a/cmd/tetra/common/client.go b/cmd/tetra/common/client.go index 7ae5d28629d..b801e878807 100644 --- a/cmd/tetra/common/client.go +++ b/cmd/tetra/common/client.go @@ -7,6 +7,7 @@ import ( "context" "os/signal" "syscall" + "time" "github.com/cilium/tetragon/api/v1/tetragon" "github.com/cilium/tetragon/pkg/logger" @@ -15,18 +16,47 @@ import ( "google.golang.org/grpc/credentials/insecure" ) +func connect(ctx context.Context) (*grpc.ClientConn, string, error) { + connCtx, connCancel := context.WithTimeout(ctx, viper.GetDuration(KeyTimeout)) + defer connCancel() + + var conn *grpc.ClientConn + var serverAddr string + var err error + + conn, err = grpc.DialContext(connCtx, viper.GetString(KeyServerAddress), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) + + return conn, serverAddr, err +} + func CliRunErr(fn func(ctx context.Context, cli tetragon.FineGuidanceSensorsClient), fnErr func(err error)) { ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM) defer cancel() - connCtx, connCancel := context.WithTimeout(ctx, viper.GetDuration(KeyTimeout)) - defer connCancel() - conn, err := grpc.DialContext(connCtx, viper.GetString(KeyServerAddress), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) - if err != nil { - fnErr(err) - logger.GetLogger().WithError(err).Fatal("Failed to connect") + var conn *grpc.ClientConn + var serverAddr string + var err error + + backoff := time.Second + attempts := 0 + for { + conn, serverAddr, err = connect(ctx) + if err != nil { + if attempts < viper.GetInt(KeyRetries) { + // Exponential backoff + attempts++ + logger.GetLogger().WithField("server-address", serverAddr).WithField("attempts", attempts).WithError(err).Error("Connection attempt failed, retrying...") + time.Sleep(backoff) + backoff *= 2 + continue + } + logger.GetLogger().WithField("server-address", serverAddr).WithField("attempts", attempts).WithError(err).Fatal("Failed to connect to server") + fnErr(err) + } + break } defer conn.Close() + client := tetragon.NewFineGuidanceSensorsClient(conn) fn(ctx, client) } diff --git a/cmd/tetra/common/flags.go b/cmd/tetra/common/flags.go index 4aedd327081..7bde83f2aa6 100644 --- a/cmd/tetra/common/flags.go +++ b/cmd/tetra/common/flags.go @@ -9,4 +9,5 @@ const ( KeyOutput = "output" // string KeyServerAddress = "server-address" // string KeyTimeout = "timeout" // duration + KeyRetries = "retries" // int ) diff --git a/cmd/tetra/main.go b/cmd/tetra/main.go index 7decf19c838..97bd73d84e2 100644 --- a/cmd/tetra/main.go +++ b/cmd/tetra/main.go @@ -44,6 +44,7 @@ func new() *cobra.Command { flags.BoolP(common.KeyDebug, "d", false, "Enable debug messages") flags.String(common.KeyServerAddress, "localhost:54321", "gRPC server address") flags.Duration(common.KeyTimeout, 10*time.Second, "Connection timeout") + flags.Int(common.KeyRetries, 0, "Connection retries with exponential backoff") viper.BindPFlags(flags) return rootCmd }