Skip to content

Commit

Permalink
autoupdate: implement time-based strategy
Browse files Browse the repository at this point in the history
  • Loading branch information
hugoShaka committed Dec 3, 2024
1 parent 8c41768 commit 2d7c21b
Show file tree
Hide file tree
Showing 2 changed files with 383 additions and 0 deletions.
87 changes: 87 additions & 0 deletions lib/autoupdate/rollout/strategy_timebased.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
package rollout

import (
"context"
"log/slog"

"github.com/gravitational/trace"
"github.com/jonboulle/clockwork"

"github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1"
update "github.com/gravitational/teleport/api/types/autoupdate"
)

type timeBasedStrategy struct {
log *slog.Logger
clock clockwork.Clock
}

func (h *timeBasedStrategy) name() string {
return update.AgentsStrategyTimeBased
}

func newTimeBasedStrategy(log *slog.Logger, clock clockwork.Clock) (rolloutStrategy, error) {
if log == nil {
return nil, trace.BadParameter("missing log")
}
if clock == nil {
return nil, trace.BadParameter("missing clock")
}
return &timeBasedStrategy{
log: log.With("strategy", update.AgentsStrategyTimeBased),
clock: clock,
}, nil
}

func (h *timeBasedStrategy) progressRollout(ctx context.Context, groups []*autoupdate.AutoUpdateAgentRolloutStatusGroup) error {
now := h.clock.Now()
// We always process every group regardless of the order.
var errors []error
for _, group := range groups {
switch group.State {
case autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED,
autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE:
// We start any group unstarted group in window.
// Done groups can transition back to active if they enter their maintenance window again.
// Some agents might have missed the previous windows and might expected to try again.
shouldBeActive, err := inWindow(group, now)
if err != nil {
// In time-based rollouts, groups are not dependent.
// Failing to transition a group should affect other groups.
// We reflect that something went wrong in the status and go to the next group.
setGroupState(group, group.State, updateReasonReconcilerError, now)
errors = append(errors, err)
continue
}
if shouldBeActive {
setGroupState(group, autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, updateReasonInWindow, now)
} else {
setGroupState(group, group.State, updateReasonOutsideWindow, now)
}
case autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK:
// We don't touch any group that was manually rolled back.
// Something happened and we should not try to update again.
case autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE:
// The group is currently being updated. We check if the maintenance
// is over and if we should transition it to the done state
shouldBeActive, err := inWindow(group, now)
if err != nil {
// In time-based rollouts, groups are not dependent.
// Failing to transition a group should affect other groups.
// We reflect that something went wrong in the status and go to the next group.
setGroupState(group, group.State, updateReasonReconcilerError, now)
errors = append(errors, err)
continue
}

if shouldBeActive {
setGroupState(group, autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, updateReasonInWindow, now)
} else {
setGroupState(group, autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE, updateReasonOutsideWindow, now)
}
default:
return trace.BadParameter("unknown autoupdate group state: %v", group.State)
}
}
return trace.NewAggregate(errors...)
}
296 changes: 296 additions & 0 deletions lib/autoupdate/rollout/strategy_timebased_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,296 @@
package rollout

import (
"context"
"testing"
"time"

"github.com/jonboulle/clockwork"
"github.com/stretchr/testify/require"
"google.golang.org/protobuf/types/known/timestamppb"

"github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1"
"github.com/gravitational/teleport/lib/utils"
)

func Test_progressGroupsTimeBased(t *testing.T) {
clock := clockwork.NewFakeClockAt(testSunday)
log := utils.NewSlogLoggerForTests()
strategy, err := newTimeBasedStrategy(log, clock)
require.NoError(t, err)

groupName := "test-group"
canStartToday := everyWeekday
cannotStartToday := everyWeekdayButSunday
lastUpdate := timestamppb.New(clock.Now().Add(-5 * time.Minute))
ctx := context.Background()

tests := []struct {
name string
initialState []*autoupdate.AutoUpdateAgentRolloutStatusGroup
expectedState []*autoupdate.AutoUpdateAgentRolloutStatusGroup
}{
{
name: "unstarted -> unstarted",
initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{
{
Name: groupName,
State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED,
LastUpdateTime: lastUpdate,
LastUpdateReason: updateReasonCreated,
ConfigDays: cannotStartToday,
ConfigStartHour: matchingStartHour,
},
},
expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{
{
Name: groupName,
State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED,
LastUpdateTime: timestamppb.New(clock.Now()),
LastUpdateReason: updateReasonOutsideWindow,
ConfigDays: cannotStartToday,
ConfigStartHour: matchingStartHour,
},
},
},
{
name: "unstarted -> active",
initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{
{
Name: groupName,
State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED,
LastUpdateTime: lastUpdate,
LastUpdateReason: updateReasonCreated,
ConfigDays: canStartToday,
ConfigStartHour: matchingStartHour,
},
},
expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{
{
Name: groupName,
State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE,
StartTime: timestamppb.New(clock.Now()),
LastUpdateTime: timestamppb.New(clock.Now()),
LastUpdateReason: updateReasonInWindow,
ConfigDays: canStartToday,
ConfigStartHour: matchingStartHour,
},
},
},
{
name: "done -> done",
initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{
{
Name: groupName,
State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE,
LastUpdateTime: lastUpdate,
LastUpdateReason: updateReasonOutsideWindow,
ConfigDays: cannotStartToday,
ConfigStartHour: matchingStartHour,
},
},
expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{
{
Name: groupName,
State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE,
LastUpdateTime: lastUpdate,
LastUpdateReason: updateReasonOutsideWindow,
ConfigDays: cannotStartToday,
ConfigStartHour: matchingStartHour,
},
},
},
{
name: "done -> active",
initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{
{
Name: groupName,
State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE,
LastUpdateTime: lastUpdate,
StartTime: lastUpdate,
LastUpdateReason: updateReasonOutsideWindow,
ConfigDays: canStartToday,
ConfigStartHour: matchingStartHour,
},
},
expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{
{
Name: groupName,
State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE,
StartTime: timestamppb.New(clock.Now()),
LastUpdateTime: timestamppb.New(clock.Now()),
LastUpdateReason: updateReasonInWindow,
ConfigDays: canStartToday,
ConfigStartHour: matchingStartHour,
},
},
},
{
name: "active -> active",
initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{
{
Name: groupName,
State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE,
StartTime: lastUpdate,
LastUpdateTime: timestamppb.New(clock.Now()),
LastUpdateReason: updateReasonInWindow,
ConfigDays: canStartToday,
ConfigStartHour: matchingStartHour,
},
},
expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{
{
Name: groupName,
State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE,
StartTime: lastUpdate,
LastUpdateTime: timestamppb.New(clock.Now()),
LastUpdateReason: updateReasonInWindow,
ConfigDays: canStartToday,
ConfigStartHour: matchingStartHour,
},
},
},
{
name: "active -> done",
initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{
{
Name: groupName,
State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE,
StartTime: lastUpdate,
LastUpdateTime: timestamppb.New(clock.Now()),
LastUpdateReason: updateReasonInWindow,
ConfigDays: cannotStartToday,
ConfigStartHour: matchingStartHour,
},
},
expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{
{
Name: groupName,
State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE,
StartTime: lastUpdate,
LastUpdateTime: timestamppb.New(clock.Now()),
LastUpdateReason: updateReasonOutsideWindow,
ConfigDays: cannotStartToday,
ConfigStartHour: matchingStartHour,
},
},
},
{
name: "rolledback is a dead end",
initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{
{
Name: groupName + "-in-maintenance",
State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK,
LastUpdateTime: lastUpdate,
ConfigDays: canStartToday,
ConfigStartHour: matchingStartHour,
},
{
Name: groupName + "-out-of-maintenance",
State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK,
LastUpdateTime: lastUpdate,
ConfigDays: cannotStartToday,
ConfigStartHour: matchingStartHour,
},
},
expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{
{
Name: groupName + "-in-maintenance",
State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK,
LastUpdateTime: lastUpdate,
ConfigDays: canStartToday,
ConfigStartHour: matchingStartHour,
},
{
Name: groupName + "-out-of-maintenance",
State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK,
LastUpdateTime: lastUpdate,
ConfigDays: cannotStartToday,
ConfigStartHour: matchingStartHour,
},
},
},
{
name: "mix of everything",
initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{
{
Name: "new group should start",
State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED,
LastUpdateTime: lastUpdate,
ConfigDays: canStartToday,
ConfigStartHour: matchingStartHour,
},
{
Name: "done group should start",
State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE,
LastUpdateTime: lastUpdate,
StartTime: lastUpdate,
ConfigDays: canStartToday,
ConfigStartHour: matchingStartHour,
},
{
Name: "rolledback group should do nothing",
State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK,
LastUpdateTime: lastUpdate,
ConfigDays: canStartToday,
ConfigStartHour: matchingStartHour,
},
{
Name: "old group should stop",
State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE,
LastUpdateTime: lastUpdate,
StartTime: lastUpdate,
ConfigDays: cannotStartToday,
ConfigStartHour: matchingStartHour,
},
},
expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{
{
Name: "new group should start",
State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE,
StartTime: timestamppb.New(clock.Now()),
LastUpdateTime: timestamppb.New(clock.Now()),
LastUpdateReason: updateReasonInWindow,
ConfigDays: canStartToday,
ConfigStartHour: matchingStartHour,
},
{
Name: "done group should start",
State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE,
StartTime: timestamppb.New(clock.Now()),
LastUpdateTime: timestamppb.New(clock.Now()),
LastUpdateReason: updateReasonInWindow,
ConfigDays: canStartToday,
ConfigStartHour: matchingStartHour,
},
{
Name: "rolledback group should do nothing",
State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK,
LastUpdateTime: lastUpdate,
ConfigDays: canStartToday,
ConfigStartHour: matchingStartHour,
},
{
Name: "old group should stop",
State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE,
StartTime: lastUpdate,
LastUpdateTime: timestamppb.New(clock.Now()),
LastUpdateReason: updateReasonOutsideWindow,
ConfigDays: cannotStartToday,
ConfigStartHour: matchingStartHour,
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := strategy.progressRollout(ctx, tt.initialState)
require.NoError(t, err)
// We use require.Equal instead of Elements match because group order matters.
// It's not super important for time-based, but is crucial for halt-on-error.
// So it's better to be more conservative and validate order never changes for
// both strategies.
require.Equal(t, tt.expectedState, tt.initialState)
})
}
}

0 comments on commit 2d7c21b

Please sign in to comment.