From 40a77240d161a327d29fdd0318d9a92c2de6e1a8 Mon Sep 17 00:00:00 2001 From: ddl-ebrown Date: Wed, 29 May 2024 10:40:31 -0700 Subject: [PATCH] Return non-0 exit code from copilot upload failure - The current copilot behavior is to: * try to upload the specified file * if an error occurs, try to write an error file instead * if the error file write succeeds, return success -- otherwise error This behavior is not ideal, because it suppresses real issues like the inability to find the expected output file. While a case can be made that the sidecar succeeded in handling its responsibility, important information is lost in the process that is useful to other containers in the job. Generallyk, it's better to consider the inability to find the original file a failure of the job to produce the expected file in the expected location (useful especially for container tasks). That means that a failure to upload the file should also return a non-0 exit code. - The new behavior is therefore: * try to upload the specified file * if an error occurs, try to write an error file instead * if the error file write succeeds, return the original error * if the error file write fails, return a joined version of the errors Signed-off-by: ddl-ebrown --- flytecopilot/cmd/sidecar.go | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/flytecopilot/cmd/sidecar.go b/flytecopilot/cmd/sidecar.go index 09abdb31e5c..2f597565bac 100644 --- a/flytecopilot/cmd/sidecar.go +++ b/flytecopilot/cmd/sidecar.go @@ -2,6 +2,7 @@ package cmd import ( "context" + stderrors "errors" "fmt" "time" @@ -144,10 +145,14 @@ func (u *UploadOptions) Sidecar(ctx context.Context) error { if err := u.uploader(ctx); err != nil { logger.Errorf(ctx, "Uploading failed, err %s", err) - if err := u.UploadError(ctx, "OutputUploadFailed", err, storage.DataReference(u.remoteOutputsPrefix)); err != nil { - logger.Errorf(ctx, "Failed to write error document, err :%s", err) - return err + if uerr := u.UploadError(ctx, "OutputUploadFailed", err, storage.DataReference(u.remoteOutputsPrefix)); uerr != nil { + logger.Errorf(ctx, "Failed to write error document, err :%s", uerr) + return stderrors.Join(uerr, err) } + + // failure to upload should still return err exit code from process + logger.Error(ctx, "Setting sidecar command error return value") + return err } return nil } @@ -164,7 +169,10 @@ func NewUploadCommand(opts *RootOptions) *cobra.Command { Short: "uploads flyteData from the localpath to a remote dir.", Long: `Currently it looks at the outputs.pb and creates one file per variable.`, RunE: func(cmd *cobra.Command, args []string) error { - return uploadOptions.Sidecar(context.Background()) + err := uploadOptions.Sidecar(context.Background()) + // failure to upload should still return err exit code from process + logger.Error(cmd.Context(), "NewUploadCommand returning failure through RunE, err %s", err) + return err }, }