From d6f55c341fca66963894e8cfcdbbdf0e42a00240 Mon Sep 17 00:00:00 2001 From: Mahe Tardy Date: Fri, 2 Aug 2024 20:00:41 +0200 Subject: [PATCH] tetra: fix getevents cmd unexpected timeout After commit 663a1e2ed51afa8590c07bac08822a98c0d12a43, we stop connecting to the server prior making the RPC. It creates a buggy situation when we are using streams: the connection get cancelled at timeout even if it's successfully streaming. Let's remove the timeout for the getevents command and only use the context based on the interrupt signals. Signed-off-by: Mahe Tardy --- cmd/tetra/common/client.go | 23 ++++++++++++++--------- cmd/tetra/getevents/getevents.go | 23 +++++++++++++---------- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/cmd/tetra/common/client.go b/cmd/tetra/common/client.go index 4556a812ea9..4e0683a78c7 100644 --- a/cmd/tetra/common/client.go +++ b/cmd/tetra/common/client.go @@ -72,15 +72,23 @@ func CliRun(fn func(ctx context.Context, cli tetragon.FineGuidanceSensorsClient) type ClientWithContext struct { Client tetragon.FineGuidanceSensorsClient - Ctx context.Context - conn *grpc.ClientConn - cancel context.CancelFunc + // Ctx is a combination of the signal context and the timeout context + Ctx context.Context + // SignalCtx is only the signal context, you might want to use that context + // when the command should never timeout (like a stream command) + SignalCtx context.Context + conn *grpc.ClientConn + // The signal context is the parent of the timeout context, so cancelling + // signal will cancel its child, timeout + signalCancel context.CancelFunc + timeoutCancel context.CancelFunc } // Close cleanup resources, it closes the connection and cancel the context func (c ClientWithContext) Close() { c.conn.Close() - c.cancel() + c.signalCancel() + c.timeoutCancel() // this should be a nop } // NewClientWithDefaultContextAndAddress returns a client to a tetragon @@ -94,11 +102,8 @@ func NewClientWithDefaultContextAndAddress() (*ClientWithContext, error) { func NewClient(ctx context.Context, address string, timeout time.Duration) (*ClientWithContext, error) { c := &ClientWithContext{} - var timeoutContext context.Context - timeoutContext, c.cancel = context.WithTimeout(ctx, timeout) - // we don't need the cancelFunc here as calling cancel on timeout, the - // parent, will cancel its children. - c.Ctx, _ = signal.NotifyContext(timeoutContext, syscall.SIGINT, syscall.SIGTERM) + c.SignalCtx, c.signalCancel = signal.NotifyContext(ctx, syscall.SIGINT, syscall.SIGTERM) + c.Ctx, c.timeoutCancel = context.WithTimeout(c.SignalCtx, timeout) var err error c.conn, err = grpc.NewClient(address, diff --git a/cmd/tetra/getevents/getevents.go b/cmd/tetra/getevents/getevents.go index c683eb1029c..589ad62f859 100644 --- a/cmd/tetra/getevents/getevents.go +++ b/cmd/tetra/getevents/getevents.go @@ -14,7 +14,6 @@ import ( "github.com/cilium/tetragon/api/v1/tetragon" "github.com/cilium/tetragon/cmd/tetra/common" "github.com/cilium/tetragon/pkg/encoder" - "github.com/cilium/tetragon/pkg/logger" "github.com/spf13/cobra" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" @@ -116,23 +115,23 @@ func getRequest(includeFields, excludeFields []string, filter *tetragon.Filter) } } -func getEvents(ctx context.Context, client tetragon.FineGuidanceSensorsClient) { +func getEvents(ctx context.Context, client tetragon.FineGuidanceSensorsClient) error { request := getRequest(Options.IncludeFields, Options.ExcludeFields, GetFilter()) stream, err := client.GetEvents(ctx, request) if err != nil { - logger.GetLogger().WithError(err).Fatal("Failed to call GetEvents") + return fmt.Errorf("failed to call GetEvents: %w", err) } eventEncoder := GetEncoder(os.Stdout, encoder.ColorMode(Options.Color), Options.Timestamps, Options.Output == "compact", Options.TTYEncode, Options.StackTraces) for { res, err := stream.Recv() if err != nil { if !errors.Is(err, context.Canceled) && status.Code(err) != codes.Canceled && !errors.Is(err, io.EOF) { - logger.GetLogger().WithError(err).Fatal("Failed to receive events") + return fmt.Errorf("failed to receive events: %w", err) } - return + return nil } if err = eventEncoder.Encode(res); err != nil { - logger.GetLogger().WithError(err).WithField("event", res).Debug("Failed to encode event") + return fmt.Errorf("failed to encode event %#v: %w", res, err) } } } @@ -181,15 +180,19 @@ redirection of events to the stdin. Examples: return nil }, - Run: func(_ *cobra.Command, _ []string) { + RunE: func(_ *cobra.Command, _ []string) error { fi, _ := os.Stdin.Stat() if fi.Mode()&os.ModeNamedPipe != 0 { // read events from stdin - getEvents(context.Background(), newIOReaderClient(os.Stdin, common.Debug)) - return + return getEvents(context.Background(), newIOReaderClient(os.Stdin, common.Debug)) } // connect to server - common.CliRun(getEvents) + c, err := common.NewClientWithDefaultContextAndAddress() + if err != nil { + return fmt.Errorf("failed create gRPC client: %w", err) + } + defer c.Close() + return getEvents(c.SignalCtx, c.Client) }, }