diff --git a/example_graceful_shutdown_test.go b/example_graceful_shutdown_test.go index 296152a3..95ce52e6 100644 --- a/example_graceful_shutdown_test.go +++ b/example_graceful_shutdown_test.go @@ -32,16 +32,22 @@ type WaitsForCancelOnlyWorker struct { func (w *WaitsForCancelOnlyWorker) Work(ctx context.Context, job *river.Job[WaitsForCancelOnlyArgs]) error { fmt.Printf("Working job that doesn't finish until cancelled\n") close(w.jobStarted) + <-ctx.Done() fmt.Printf("Job cancelled\n") - return nil + + // In the event of cancellation, an error should be returned so that the job + // goes back in the retry queue. + return ctx.Err() } // Example_gracefulShutdown demonstrates a realistic-looking stop loop for -// River. It listens for a SIGTERM (like might be received on a platform like -// Heroku to stop a process) and when receives, tries a soft stop that waits for -// work to finish. If it doesn't finish in time, a second SIGTERM will initiate -// a hard stop that cancels all jobs using context cancellation. +// River. It listens for SIGINT/SIGTERM (like might be received by a Ctrl+C +// locally or on a platform like Heroku to stop a process) and when received, +// tries a soft stop that waits for work to finish. If it doesn't finish in +// time, a second SIGINT/SIGTERM will initiate a hard stop that cancels all jobs +// using context cancellation. A third will give up on the stop procedure and +// exit uncleanly. func Example_gracefulShutdown() { ctx := context.Background() @@ -83,19 +89,21 @@ func Example_gracefulShutdown() { riverClientStopped := make(chan struct{}) - sigterm := make(chan os.Signal, 1) - signal.Notify(sigterm, syscall.SIGTERM) + sigintOrTerm := make(chan os.Signal, 1) + signal.Notify(sigintOrTerm, syscall.SIGINT, syscall.SIGTERM) // This is meant to be a realistic-looking stop goroutine that might go in a - // real program. It waits for SIGTERM and when received, tries to stop + // real program. It waits for SIGINT/SIGTERM and when received, tries to stop // gracefully by allowing a chance for jobs to finish. But if that isn't - // working, a second SIGTERM will tell it to terminate with prejudice and - // it'll issue a hard stop that cancels the context of all active jobs. + // working, a second SIGINT/SIGTERM will tell it to terminate with prejudice and + // it'll issue a hard stop that cancels the context of all active jobs. In + // case that doesn't work, a third SIGINT/SIGTERM ignores River's stop procedure + // completely and exits uncleanly. go func() { defer close(riverClientStopped) - <-sigterm - fmt.Printf("Received SIGTERM; initiating soft stop (try to wait for jobs to finish)\n") + <-sigintOrTerm + fmt.Printf("Received SIGINT/SIGTERM; initiating soft stop (try to wait for jobs to finish)\n") softStopSucceeded := make(chan struct{}) go func() { @@ -107,14 +115,41 @@ func Example_gracefulShutdown() { close(softStopSucceeded) }() + // Wait for soft stop to succeed, or another SIGINT/SIGTERM. select { - case <-sigterm: - fmt.Printf("Received SIGTERM again; initiating hard stop (cancel everything)\n") - if err := riverClient.StopAndCancel(ctx); err != nil { - panic(err) - } + case <-sigintOrTerm: + fmt.Printf("Received SIGINT/SIGTERM again; initiating hard stop (cancel everything)\n") + + case <-time.After(10 * time.Second): + fmt.Printf("Soft stop timeout; initiating hard stop (cancel everything)\n") + case <-softStopSucceeded: // Will never be reached in this example. + return + } + + hardStopSucceeded := make(chan struct{}) + go func() { + if err := riverClient.StopAndCancel(ctx); err != nil { + if !errors.Is(err, context.Canceled) { + panic(err) + } + } + close(hardStopSucceeded) + }() + + // As long as all jobs respect context cancellation, StopAndCancel will + // always work. However, in the case of a bug where a job blocks despite + // being cancelled, it may be necessary to either ignore River's stop + // result (what's shown here) or have a supervisor kill the process. + select { + case <-sigintOrTerm: + fmt.Printf("Received SIGINT/SIGTERM again; ignoring stop procedure and exiting unsafely\n") + + case <-time.After(10 * time.Second): + fmt.Printf("Hard stop timeout; ignoring stop procedure and exiting unsafely\n") + + case <-hardStopSucceeded: } }() @@ -123,9 +158,9 @@ func Example_gracefulShutdown() { // Cheat a little by sending a SIGTERM manually for the purpose of this // example (normally this will be sent by user or supervisory process). The - // first SIGTERM tries a soft stop in which jobs are givcen a chance to + // first SIGTERM tries a soft stop in which jobs are given a chance to // finish up. - sigterm <- syscall.SIGTERM + sigintOrTerm <- syscall.SIGTERM // The soft stop will never work in this example because our job only // respects context cancellation, but wait a short amount of time to give it @@ -137,13 +172,14 @@ func Example_gracefulShutdown() { fmt.Printf("Soft stop succeeded\n") case <-time.After(100 * time.Millisecond): - sigterm <- syscall.SIGTERM + sigintOrTerm <- syscall.SIGTERM <-riverClientStopped } // Output: // Working job that doesn't finish until cancelled - // Received SIGTERM; initiating soft stop (try to wait for jobs to finish) - // Received SIGTERM again; initiating hard stop (cancel everything) + // Received SIGINT/SIGTERM; initiating soft stop (try to wait for jobs to finish) + // Received SIGINT/SIGTERM again; initiating hard stop (cancel everything) // Job cancelled + // jobExecutor: Job failed }