Skip to content

Commit

Permalink
Treating VSphereVM wasNotFoundByBIOSUUID as transient error
Browse files Browse the repository at this point in the history
Signed-off-by: Gong Zhang <[email protected]>
  • Loading branch information
zhanggbj committed Aug 3, 2023
1 parent 5adad8c commit 43cf6d0
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 5 deletions.
5 changes: 5 additions & 0 deletions apis/v1beta1/condition_consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@ const (
// are automatically re-tried by the controller.
PoweringOnFailedReason = "PoweringOnFailed"

// NotFoundByBIOSUUIDReason (Severity=Warning) documents a VSphereVM which can't be found by BIOS UUID.
// Those kind of errors could be transient sometimes and failed VSphereVM are automatically
// reconciled by the controller.
NotFoundByBIOSUUIDReason = "NotFoundByBIOSUUID"

// TaskFailure (Severity=Warning) documents a VSphereMachine/VSphere task failure; the reconcile look will automatically
// retry the operation, but a user intervention might be required to fix the problem.
TaskFailure = "TaskFailure"
Expand Down
10 changes: 5 additions & 5 deletions pkg/services/govmomi/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,8 @@ import (
"github.com/vmware/govmomi/vim25/types"
corev1 "k8s.io/api/core/v1"
apitypes "k8s.io/apimachinery/pkg/types"
"k8s.io/utils/pointer"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1"
capierrors "sigs.k8s.io/cluster-api/errors"
"sigs.k8s.io/cluster-api/util/conditions"
"sigs.k8s.io/controller-runtime/pkg/reconcile"

Expand Down Expand Up @@ -83,10 +81,12 @@ func (vms *VMService) ReconcileVM(ctx *context.VMContext) (vm infrav1.VirtualMac
return vm, err
}

// If the machine was not found by BIOS UUID it means that it got deleted from vcenter directly
// If the machine was not found by BIOS UUID, it could mean that the machine got deleted from vcenter directly,
// but sometimes this error is transient, for instance, if the storage was temporarily disconnected but
// later recovered, the machine will recover from this error.
if wasNotFoundByBIOSUUID(err) {
ctx.VSphereVM.Status.FailureReason = capierrors.MachineStatusErrorPtr(capierrors.UpdateMachineError)
ctx.VSphereVM.Status.FailureMessage = pointer.String(fmt.Sprintf("Unable to find VM by BIOS UUID %s. The vm was removed from infra", ctx.VSphereVM.Spec.BiosUUID))
conditions.MarkFalse(ctx.VSphereVM, infrav1.VMProvisionedCondition, infrav1.NotFoundByBIOSUUIDReason, clusterv1.ConditionSeverityWarning, err.Error())
vm.State = infrav1.VirtualMachineStateNotFound
return vm, err
}

Expand Down

0 comments on commit 43cf6d0

Please sign in to comment.