From e3802b7164969e710c2ae455b319013e1c89962f Mon Sep 17 00:00:00 2001 From: Steven L Date: Mon, 25 Nov 2024 18:33:45 -0600 Subject: [PATCH] Document a significant caveat to SideEffect funcs (#1399) An unfortunate edge-case-use was discovered recently, and SideEffect does not have adequate protections to prevent breaking workflows at the moment. For now, just document it. A true fix will require failing calls to blocking or history-recording funcs while the callback runs, as they cannot be used safely. Future versions of this API should remove the context arg, to stop implying it can be used. --- internal/workflow.go | 57 ++--------------------------- workflow/workflow.go | 86 +++++++++++++++++++++++++++----------------- 2 files changed, 56 insertions(+), 87 deletions(-) diff --git a/internal/workflow.go b/internal/workflow.go index 2d0fb2e5d..6d84e8d8e 100644 --- a/internal/workflow.go +++ b/internal/workflow.go @@ -1522,45 +1522,7 @@ func (b EncodedValue) HasValue() bool { return b.value != nil } -// SideEffect executes the provided function once, records its result into the workflow history. The recorded result on -// history will be returned without executing the provided function during replay. This guarantees the deterministic -// requirement for workflow as the exact same result will be returned in replay. -// Common use case is to run some short non-deterministic code in workflow, like getting random number or new UUID. -// The only way to fail SideEffect is to panic which causes decision task failure. The decision task after timeout is -// rescheduled and re-executed giving SideEffect another chance to succeed. -// -// Caution: do not use SideEffect to modify closures. Always retrieve result from SideEffect's encoded return value. -// For example this code is BROKEN: -// -// // Bad example: -// var random int -// workflow.SideEffect(func(ctx workflow.Context) interface{} { -// random = rand.Intn(100) -// return nil -// }) -// // random will always be 0 in replay, thus this code is non-deterministic -// if random < 50 { -// .... -// } else { -// .... -// } -// -// On replay the provided function is not executed, the random will always be 0, and the workflow could takes a -// different path breaking the determinism. -// -// Here is the correct way to use SideEffect: -// -// // Good example: -// encodedRandom := SideEffect(func(ctx workflow.Context) interface{} { -// return rand.Intn(100) -// }) -// var random int -// encodedRandom.Get(&random) -// if random < 50 { -// .... -// } else { -// .... -// } +// SideEffect docs are in the public API to prevent duplication: [go.uber.org/cadence/workflow.SideEffect] func SideEffect(ctx Context, f func(ctx Context) interface{}) Value { i := getWorkflowInterceptor(ctx) return i.SideEffect(ctx, f) @@ -1584,22 +1546,7 @@ func (wc *workflowEnvironmentInterceptor) SideEffect(ctx Context, f func(ctx Con return encoded } -// MutableSideEffect executes the provided function once, then it looks up the history for the value with the given id. -// If there is no existing value, then it records the function result as a value with the given id on history; -// otherwise, it compares whether the existing value from history has changed from the new function result by calling the -// provided equals function. If they are equal, it returns the value without recording a new one in history; -// -// otherwise, it records the new value with the same id on history. -// -// Caution: do not use MutableSideEffect to modify closures. Always retrieve result from MutableSideEffect's encoded -// return value. -// -// The difference between MutableSideEffect() and SideEffect() is that every new SideEffect() call in non-replay will -// result in a new marker being recorded on history. However, MutableSideEffect() only records a new marker if the value -// changed. During replay, MutableSideEffect() will not execute the function again, but it will return the exact same -// value as it was returning during the non-replay run. -// -// One good use case of MutableSideEffect() is to access dynamically changing config without breaking determinism. +// MutableSideEffect docs are in the public API to prevent duplication: [go.uber.org/cadence/workflow.MutableSideEffect] func MutableSideEffect(ctx Context, id string, f func(ctx Context) interface{}, equals func(a, b interface{}) bool) Value { i := getWorkflowInterceptor(ctx) return i.MutableSideEffect(ctx, id, f, equals) diff --git a/workflow/workflow.go b/workflow/workflow.go index 23c0b58ae..9d4118133 100644 --- a/workflow/workflow.go +++ b/workflow/workflow.go @@ -265,65 +265,87 @@ func GetSignalChannel(ctx Context, signalName string) Channel { return internal.GetSignalChannel(ctx, signalName) } -// SideEffect executes the provided function once, records its result into the workflow history. The recorded result on -// history will be returned without executing the provided function during replay. This guarantees the deterministic -// requirement for workflow as the exact same result will be returned in replay. -// Common use case is to run some short non-deterministic code in workflow, like getting random number or new UUID. -// The only way to fail SideEffect is to panic which causes decision task failure. The decision task after timeout is -// rescheduled and re-executed giving SideEffect another chance to succeed. -// -// Caution: do not use SideEffect to modify closures. Always retrieve result from SideEffect's encoded return value. -// For example this code is BROKEN: +// SideEffect executes the provided callback, and records its result into the workflow history. +// During replay the recorded result will be returned instead, so the callback can do non-deterministic +// things (such as reading files or getting random numbers) without breaking determinism during replay. +// +// As there is no error return, the callback's code is assumed to be reliable. +// If you cannot retrieve the value for some reason, panicking and failing the decision task +// will cause it to be retried, possibly succeeding on another machine. +// For better error handling, use ExecuteLocalActivity instead. +// +// Caution: the callback MUST NOT call any blocking or history-creating APIs, +// e.g. workflow.Sleep or ExecuteActivity or calling .Get on any future. +// This will (potentially) work during the initial recording of history, but will +// fail when replaying because the data is not available when the call occurs +// (it becomes available later). +// +// In other words, in general: use this *only* for code that does not need the workflow.Context. +// Incorrect context-using calls are not currently prevented in tests or during execution, +// but they should be eventually. +// +// // Bad example: this will work until a replay occurs, +// // but then the workflow will fail to replay with a non-deterministic error. +// var out string +// err := workflow.SideEffect(func(ctx workflow.Context) interface{} { +// var signal data +// err := workflow.GetSignalChannel(ctx, "signal").Receive(ctx, &signal) +// _ = err // ignore err for the example +// return signal +// }).Get(&out) +// workflow.GetLogger(ctx).Info(out) +// +// Caution: do not use SideEffect to modify values outside the callback. +// Always retrieve result from SideEffect's encoded return value. +// For example this code will break during replay: // // // Bad example: // var random int // workflow.SideEffect(func(ctx workflow.Context) interface{} { -// random = rand.Intn(100) -// return nil +// random = rand.Intn(100) // this only occurs when recording history, not during replay +// return nil // }) // // random will always be 0 in replay, thus this code is non-deterministic // if random < 50 { -// .... +// .... // } else { -// .... +// .... // } // -// On replay the provided function is not executed, the random will always be 0, and the workflow could takes a -// different path breaking the determinism. +// On replay the provided function is not executed, the `random` var will always be 0, +// and the workflow could take a different path and break determinism. // -// Here is the correct way to use SideEffect: +// The only safe way to use SideEffect is to read from the returned encoded.Value: // // // Good example: // encodedRandom := SideEffect(func(ctx workflow.Context) interface{} { -// return rand.Intn(100) +// return rand.Intn(100) // }) // var random int // encodedRandom.Get(&random) // if random < 50 { -// .... +// .... // } else { -// .... +// .... // } func SideEffect(ctx Context, f func(ctx Context) interface{}) encoded.Value { return internal.SideEffect(ctx, f) } -// MutableSideEffect executes the provided function once, then it looks up the history for the value with the given id. -// If there is no existing value, then it records the function result as a value with the given id on history; -// otherwise, it compares whether the existing value from history has changed from the new function result by calling the -// provided equals function. If they are equal, it returns the value without recording a new one in history; -// -// otherwise, it records the new value with the same id on history. +// MutableSideEffect is similar to SideEffect, but it only records changed values per ID instead of every call. +// Changes are detected by calling the provided "equals" func - return true to mark a result +// as the same as before, and not record the new value. The first call per ID is always "changed" and will always be recorded. // -// Caution: do not use MutableSideEffect to modify closures. Always retrieve result from MutableSideEffect's encoded -// return value. +// This is intended for things you want to *check* frequently, but which *change* only occasionally, as non-changing +// results will not add anything to your history. This helps keep those workflows under its size/count limits, and +// can help keep replays more efficient than when using SideEffect or ExecuteLocalActivity (due to not storing duplicated data). // -// The difference between MutableSideEffect() and SideEffect() is that every new SideEffect() call in non-replay will -// result in a new marker being recorded on history. However, MutableSideEffect() only records a new marker if the value -// changed. During replay, MutableSideEffect() will not execute the function again, but it will return the exact same -// value as it was returning during the non-replay run. +// Caution: the callback MUST NOT block or call any history-modifying events, or replays will be broken. +// See SideEffect docs for more details. // -// One good use case of MutableSideEffect() is to access dynamically changing config without breaking determinism. +// Caution: do not use MutableSideEffect to modify closures. +// Always retrieve result from MutableSideEffect's encoded return value. +// See SideEffect docs for more details. func MutableSideEffect(ctx Context, id string, f func(ctx Context) interface{}, equals func(a, b interface{}) bool) encoded.Value { return internal.MutableSideEffect(ctx, id, f, equals) }