Skip to content

Commit

Permalink
fix: recover from panic that happens during the panic recovery
Browse files Browse the repository at this point in the history
  • Loading branch information
vgarvardt committed Dec 28, 2023
1 parent c723446 commit 35ca6b9
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 4 deletions.
15 changes: 11 additions & 4 deletions error.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,17 @@ import (
"time"
)

// ErrJobPanicked is returned when the job failed ot be handled because it is panicked.
// Error is normally returned wrapped, so use `errors.Is(err, gue.ErrJobPanicked)` to ensure this is the error you're
// looking for.
var ErrJobPanicked = errors.New("job panicked")
var (
// ErrJobPanicked is returned when the job failed ot be handled because it is panicked.
// Error is normally returned wrapped, so use `errors.Is(err, gue.ErrJobPanicked)` to ensure this is the error you're
// looking for.
ErrJobPanicked = errors.New("job panicked")

// ErrHookJobDonePanicked is returned when the hook job done panicked while panicked job recovery.
// Error is normally returned wrapped, so use `errors.Is(err, gue.ErrHookJobDonePanicked)` to ensure this is the error you're
// looking for.
ErrHookJobDonePanicked = errors.New("hook job done panicked in job panic recovery")
)

// ErrJobReschedule interface implementation allows errors to reschedule jobs in the individual basis.
type ErrJobReschedule interface {
Expand Down
38 changes: 38 additions & 0 deletions worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,8 @@ func (w *Worker) recoverPanic(ctx context.Context, j *Job, logger adapter.Logger
return
}

defer w.recoverPanicRecovery(ctx, j, logger)

ctx, span := w.tracer.Start(ctx, "Worker.recoverPanic")
defer span.End()

Expand Down Expand Up @@ -371,6 +373,42 @@ func (w *Worker) recoverPanic(ctx context.Context, j *Job, logger adapter.Logger
}

Check warning on line 373 in worker.go

View check run for this annotation

Codecov / codecov/patch

worker.go#L371-L373

Added lines #L371 - L373 were not covered by tests
}

// recoverPanicRecovery tries to handle panics in hook job done thrown in the process of panicked job recovery.
// A stacktrace is stored into Job last_error.
func (w *Worker) recoverPanicRecovery(ctx context.Context, j *Job, logger adapter.Logger) {
r := recover()
if r == nil {
return
}

ctx, span := w.tracer.Start(ctx, "Worker.recoverPanicRecovery")
defer span.End()

// record an error on the job with panic message and stacktrace
stackBuf := make([]byte, w.panicStackBufSize)
n := runtime.Stack(stackBuf, false)

buf := new(bytes.Buffer)
_, printRErr := fmt.Fprintf(buf, "%v\n", r)
_, printStackErr := fmt.Fprintln(buf, string(stackBuf[:n]))
_, printEllipsisErr := fmt.Fprintln(buf, "[...]")
stacktrace := buf.String()

if err := errors.Join(printRErr, printStackErr, printEllipsisErr); err != nil {
logger.Error("Could not build panicked hook job done stacktrace", adapter.Err(err), adapter.F("runtime-stack", string(stackBuf[:n])))
}

Check warning on line 399 in worker.go

View check run for this annotation

Codecov / codecov/patch

worker.go#L398-L399

Added lines #L398 - L399 were not covered by tests

w.mWorked.Add(ctx, 1, metric.WithAttributes(attrJobType.String(j.Type), attrSuccess.Bool(false)))
span.RecordError(ErrJobPanicked, trace.WithAttributes(attribute.String("stacktrace", stacktrace)))
logger.Error("Job panicked", adapter.F("stacktrace", stacktrace))

errPanic := fmt.Errorf("%w (%w):\n%s", ErrHookJobDonePanicked, ErrJobPanicked, stacktrace)
if err := j.Error(ctx, errPanic); err != nil {
span.RecordError(fmt.Errorf("failed to mark panicked job (hook job done) as error: %w", err))
logger.Error("Got an error on setting an error to a panicked job (hook job done)", adapter.Err(err))

Check warning on line 408 in worker.go

View check run for this annotation

Codecov / codecov/patch

worker.go#L407-L408

Added lines #L407 - L408 were not covered by tests
}
}

// WorkerPool is a pool of Workers, each working jobs from the queue
// at the specified interval using the WorkMap.
type WorkerPool struct {
Expand Down

0 comments on commit 35ca6b9

Please sign in to comment.