Skip to content

Commit

Permalink
Skip patching the PV in finalization for failed operation
Browse files Browse the repository at this point in the history
This commit makes change in restore finalizer controller, to make it
check the status in item operation of a PVC before patch the PV that is
bound to it.  If the operation is not successful it will skip patching
the PV.

Signed-off-by: Daniel Jiang <[email protected]>
  • Loading branch information
reasonerjt committed Jan 8, 2025
1 parent 3eaa739 commit dc02caf
Show file tree
Hide file tree
Showing 6 changed files with 413 additions and 81 deletions.
1 change: 1 addition & 0 deletions changelogs/unreleased/8591-reasonerjt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Skip patching the PV in finalization for failed operation
3 changes: 2 additions & 1 deletion pkg/cmd/server/plugin/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/spf13/cobra"
apiextensions "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"

"github.com/vmware-tanzu/velero/pkg/constant"
"github.com/vmware-tanzu/velero/pkg/datamover"

dia "github.com/vmware-tanzu/velero/internal/delete/actions/csi"
Expand Down Expand Up @@ -162,7 +163,7 @@ func NewCommand(f client.Factory) *cobra.Command {
newVolumeSnapshotClassBackupItemAction,
).
RegisterRestoreItemActionV2(
"velero.io/csi-pvc-restorer",
constant.PluginCSIPVCRestoreRIA,

Check warning on line 166 in pkg/cmd/server/plugin/plugin.go

View check run for this annotation

Codecov / codecov/patch

pkg/cmd/server/plugin/plugin.go#L166

Added line #L166 was not covered by tests
newPvcRestoreItemAction(f),
).
RegisterRestoreItemActionV2(
Expand Down
2 changes: 2 additions & 0 deletions pkg/constant/constant.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,6 @@ const (
ControllerSchedule = "schedule"
ControllerServerStatusRequest = "server-status-request"
ControllerRestoreFinalizer = "restore-finalizer"

PluginCSIPVCRestoreRIA = "velero.io/csi-pvc-restorer"
)
68 changes: 61 additions & 7 deletions pkg/controller/restore_finalizer_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ import (
"sync"
"time"

"k8s.io/apimachinery/pkg/runtime/schema"

"github.com/vmware-tanzu/velero/pkg/constant"
"github.com/vmware-tanzu/velero/pkg/itemoperation"
"github.com/vmware-tanzu/velero/pkg/plugin/velero"

storagev1api "k8s.io/api/storage/v1"

"github.com/pkg/errors"
Expand Down Expand Up @@ -155,6 +161,12 @@ func (r *restoreFinalizerReconciler) Reconcile(ctx context.Context, req ctrl.Req

restoredPVCList := volume.RestoredPVCFromRestoredResourceList(restoredResourceList)

restoreItemOperations, err := backupStore.GetRestoreItemOperations(restore.Name)
if err != nil {
log.WithError(err).Error("error getting itemOperationList")
return ctrl.Result{}, errors.Wrap(err, "error getting itemOperationList")
}

Check warning on line 168 in pkg/controller/restore_finalizer_controller.go

View check run for this annotation

Codecov / codecov/patch

pkg/controller/restore_finalizer_controller.go#L166-L168

Added lines #L166 - L168 were not covered by tests

finalizerCtx := &finalizerContext{
logger: log,
restore: restore,
Expand All @@ -163,6 +175,9 @@ func (r *restoreFinalizerReconciler) Reconcile(ctx context.Context, req ctrl.Req
restoredPVCList: restoredPVCList,
multiHookTracker: r.multiHookTracker,
resourceTimeout: r.resourceTimeout,
restoreItemOperationList: restoreItemOperationList{
items: restoreItemOperations,
},
}
warnings, errs := finalizerCtx.execute()

Expand Down Expand Up @@ -239,16 +254,44 @@ func (r *restoreFinalizerReconciler) finishProcessing(restorePhase velerov1api.R
return kubeutil.PatchResourceWithRetriesOnErrors(r.resourceTimeout, original, restore, r.Client)
}

type restoreItemOperationList struct {
items []*itemoperation.RestoreOperation
}

func (r *restoreItemOperationList) selectByResource(group, resource, ns, name string) []*itemoperation.RestoreOperation {
var res []*itemoperation.RestoreOperation
rid := velero.ResourceIdentifier{
GroupResource: schema.GroupResource{
Group: group,
Resource: resource,
},
Namespace: ns,
Name: name,
}
for _, item := range r.items {
if item != nil && item.Spec.ResourceIdentifier == rid {
res = append(res, item)
}
}
return res
}

// SelectByPVC filters the restore item operation list by PVC namespace and name.
func (r *restoreItemOperationList) SelectByPVC(ns, name string) []*itemoperation.RestoreOperation {
return r.selectByResource("", "persistentvolumeclaims", ns, name)
}

// finalizerContext includes all the dependencies required by finalization tasks and
// a function execute() to orderly implement task logic.
type finalizerContext struct {
logger logrus.FieldLogger
restore *velerov1api.Restore
crClient client.Client
volumeInfo []*volume.BackupVolumeInfo
restoredPVCList map[string]struct{}
multiHookTracker *hook.MultiHookTracker
resourceTimeout time.Duration
logger logrus.FieldLogger
restore *velerov1api.Restore
crClient client.Client
volumeInfo []*volume.BackupVolumeInfo
restoredPVCList map[string]struct{}
restoreItemOperationList restoreItemOperationList
multiHookTracker *hook.MultiHookTracker
resourceTimeout time.Duration
}

func (ctx *finalizerContext) execute() (results.Result, results.Result) { //nolint:unparam //temporarily ignore the lint report: result 0 is always nil (unparam)
Expand Down Expand Up @@ -310,6 +353,17 @@ func (ctx *finalizerContext) patchDynamicPVWithVolumeInfo() (errs results.Result
return false, err
}

// Check whether the async operation to populate the PVC is successful. If it's not, will skip patching the PV, instead of waiting.
operations := ctx.restoreItemOperationList.SelectByPVC(pvc.Namespace, pvc.Name)
for _, op := range operations {
if op.Spec.RestoreItemAction == constant.PluginCSIPVCRestoreRIA &&
op.Status.Phase != itemoperation.OperationPhaseCompleted {
log.Warnf("skipping PV patch, because the operation to restore the PVC is not completed, "+
"operation: %s, phase: %s", op.Spec.OperationID, op.Status.Phase)
return true, nil
}

Check warning on line 364 in pkg/controller/restore_finalizer_controller.go

View check run for this annotation

Codecov / codecov/patch

pkg/controller/restore_finalizer_controller.go#L359-L364

Added lines #L359 - L364 were not covered by tests
}

// We are handling a common but specific scenario where a PVC is in a pending state and uses a storage class with
// VolumeBindingMode set to WaitForFirstConsumer. In this case, the PV patch step is skipped to avoid
// failures due to the PVC not being bound, which could cause a timeout and result in a failed restore.
Expand Down
115 changes: 115 additions & 0 deletions pkg/controller/restore_finalizer_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ import (
"testing"
"time"

"k8s.io/apimachinery/pkg/runtime/schema"

"github.com/vmware-tanzu/velero/pkg/itemoperation"
"github.com/vmware-tanzu/velero/pkg/plugin/velero"

"github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
Expand Down Expand Up @@ -145,6 +150,7 @@ func TestRestoreFinalizerReconcile(t *testing.T) {
if test.restore != nil && test.restore.Namespace == velerov1api.DefaultNamespace {
require.NoError(t, r.Client.Create(context.Background(), test.restore))
backupStore.On("GetRestoredResourceList", test.restore.Name).Return(map[string][]string{}, nil)
backupStore.On("GetRestoreItemOperations", test.restore.Name).Return([]*itemoperation.RestoreOperation{}, nil)
}
if test.backup != nil {
assert.NoError(t, r.Client.Create(context.Background(), test.backup))
Expand Down Expand Up @@ -627,3 +633,112 @@ func Test_restoreFinalizerReconciler_finishProcessing(t *testing.T) {
})
}
}

func TestRestoreOperationList(t *testing.T) {
var empty []*itemoperation.RestoreOperation
tests := []struct {
name string
items []*itemoperation.RestoreOperation
inputPVCNS string
inputPVCName string
expected []*itemoperation.RestoreOperation
}{
{
name: "no restore operations",
items: []*itemoperation.RestoreOperation{},
inputPVCNS: "ns-1",
inputPVCName: "pvc-1",
expected: empty,
},
{
name: "one operation with matched info and a nil element",
items: []*itemoperation.RestoreOperation{
nil,
{
Spec: itemoperation.RestoreOperationSpec{
RestoreName: "restore-1",
RestoreUID: "uid-1",
RestoreItemAction: "velero.io/csi-pvc-restorer",
OperationID: "dd-abbb048d-7036-4855-bf50-ebba978b59a6.2426dd0e-b863-4222b5b2b",
ResourceIdentifier: velero.ResourceIdentifier{
GroupResource: schema.GroupResource{
Group: "",
Resource: "persistentvolumeclaims",
},
Namespace: "ns-1",
Name: "pvc-1",
},
},
Status: itemoperation.OperationStatus{
Phase: itemoperation.OperationPhaseCompleted,
OperationUnits: "Byte",
Description: "Completed",
},
},
},
inputPVCNS: "ns-1",
inputPVCName: "pvc-1",
expected: []*itemoperation.RestoreOperation{
{
Spec: itemoperation.RestoreOperationSpec{
RestoreName: "restore-1",
RestoreUID: "uid-1",
RestoreItemAction: "velero.io/csi-pvc-restorer",
OperationID: "dd-abbb048d-7036-4855-bf50-ebba978b59a6.2426dd0e-b863-4222b5b2b",
ResourceIdentifier: velero.ResourceIdentifier{
GroupResource: schema.GroupResource{
Group: "",
Resource: "persistentvolumeclaims",
},
Namespace: "ns-1",
Name: "pvc-1",
},
},
Status: itemoperation.OperationStatus{
Phase: itemoperation.OperationPhaseCompleted,
OperationUnits: "Byte",
Description: "Completed",
},
},
},
},
{
name: "one operation with incorrect resource type",
items: []*itemoperation.RestoreOperation{
{
Spec: itemoperation.RestoreOperationSpec{
RestoreName: "restore-1",
RestoreUID: "uid-1",
RestoreItemAction: "velero.io/csi-pvc-restorer",
OperationID: "dd-abbb048d-7036-4855-bf50-ebba978b59a6.2426dd0e-b863-4222b5b2b",
ResourceIdentifier: velero.ResourceIdentifier{
GroupResource: schema.GroupResource{
Group: "",
Resource: "configmaps",
},
Namespace: "ns-1",
Name: "pvc-1",
},
},
Status: itemoperation.OperationStatus{
Phase: itemoperation.OperationPhaseCompleted,
OperationUnits: "Byte",
Description: "Completed",
},
},
},
inputPVCNS: "ns-1",
inputPVCName: "pvc-1",
expected: empty,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
l := restoreItemOperationList{
items: tt.items,
}
assert.Equal(t, tt.expected, l.SelectByPVC(tt.inputPVCNS, tt.inputPVCName))
})
}
}
Loading

0 comments on commit dc02caf

Please sign in to comment.