Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🌱 hack/tools/janitor also cleanup child resource pools and folders for capv #2870

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -865,8 +865,10 @@ clean-ci: ## Cleanup orphaned objects in CI
--max-age=12h \
--ipam-namespace=default \
--folder=/SDDC-Datacenter/vm/Workloads/cluster-api-provider-vsphere \
--folder=/SDDC-Datacenter/vm/Workloads/cloud-provider-vsphere \
--folder=/SDDC-Datacenter/vm/Workloads/image-builder
--resource-pool=/SDDC-Datacenter/host/Cluster-1/Resources/Compute-ResourcePool/cluster-api-provider-vsphere \
--vm-folder=/SDDC-Datacenter/vm/Workloads/cluster-api-provider-vsphere \
--vm-folder=/SDDC-Datacenter/vm/Workloads/cloud-provider-vsphere \
--vm-folder=/SDDC-Datacenter/vm/Workloads/image-builder

.PHONY: clean-temporary
clean-temporary: ## Remove all temporary files and folders
Expand Down
193 changes: 186 additions & 7 deletions hack/tools/janitor/janitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@ package main
import (
"context"
"fmt"
"slices"
"sort"
"strings"
"time"

"github.com/pkg/errors"
"github.com/vmware/govmomi/find"
"github.com/vmware/govmomi/object"
govmomicluster "github.com/vmware/govmomi/vapi/cluster"
"github.com/vmware/govmomi/vim25/mo"
Expand Down Expand Up @@ -57,6 +59,47 @@ type virtualMachine struct {
object *object.VirtualMachine
}

func (s *janitor) cleanupVSphere(ctx context.Context, folders, resourcePools, vmFolders []string) error {
errList := []error{}

// Delete vms to cleanup folders and resource pools.
for _, folder := range vmFolders {
if err := s.deleteVSphereVMs(ctx, folder); err != nil {
errList = append(errList, errors.Wrapf(err, "cleaning up vSphereVMs for folder %q", folder))
}
}
if err := kerrors.NewAggregate(errList); err != nil {
return errors.Wrap(err, "cleaning up vSphereVMs")
}

// Delete empty resource pools.
for _, resourcePool := range resourcePools {
if err := s.deleteObjectChildren(ctx, resourcePool, "ResourcePool"); err != nil {
errList = append(errList, errors.Wrapf(err, "cleaning up empty resource pool children for resource pool %q", resourcePool))
}
}
if err := kerrors.NewAggregate(errList); err != nil {
return errors.Wrap(err, "cleaning up resource pools")
}

// Delete empty folders.
for _, folder := range folders {
if err := s.deleteObjectChildren(ctx, folder, "Folder"); err != nil {
errList = append(errList, errors.Wrapf(err, "cleaning up empty folder children for folder %q", folder))
}
}
if err := kerrors.NewAggregate(errList); err != nil {
return errors.Wrap(err, "cleaning up folders")
}

// Delete empty cluster modules.
if err := s.deleteVSphereClusterModules(ctx); err != nil {
return errors.Wrap(err, "cleaning up vSphere cluster modules")
}

return nil
}

// deleteVSphereVMs deletes all VSphereVMs in a given folder in vSphere if their creation
// timestamp is before the janitor's configured maxCreationDate.
func (s *janitor) deleteVSphereVMs(ctx context.Context, folder string) error {
Expand All @@ -70,8 +113,7 @@ func (s *janitor) deleteVSphereVMs(ctx context.Context, folder string) error {
log.Info("Deleting vSphere VMs in folder")

// List all virtual machines inside the folder.
finder := find.NewFinder(s.vSphereClients.Vim, false)
managedObjects, err := finder.ManagedObjectListChildren(ctx, folder+"/...", "VirtualMachine")
managedObjects, err := s.vSphereClients.Finder.ManagedObjectListChildren(ctx, folder+"/...", "VirtualMachine")
if err != nil {
return err
}
Expand Down Expand Up @@ -143,7 +185,7 @@ func (s *janitor) deleteVSphereVMs(ctx context.Context, folder string) error {
destroyTasks := []*object.Task{}
for _, vm := range append(vmsToDeleteAndPoweroff, vmsToDelete...) {
log.Info("Destroying vm in vSphere", "vm", vm.managedObject.Config.Name)
if dryRun {
if s.dryRun {
// Skipping actual destroy on dryRun.
continue
}
Expand All @@ -162,12 +204,149 @@ func (s *janitor) deleteVSphereVMs(ctx context.Context, folder string) error {
return nil
}

func waitForTasksFinished(ctx context.Context, tasks []*object.Task, ignoreErrors bool) error {
for _, t := range tasks {
if err := t.Wait(ctx); !ignoreErrors && err != nil {
// deleteObjectChildren deletes all child objects in a given object in vSphere if they don't
// contain any virtual machine.
// An object only gets deleted if:
// * it does not have any children of a different type
// * the timestamp field's value is before s.maxCreationDate
// If an object does not yet have a field, the janitor will add the field to it with the current timestamp as value.
func (s *janitor) deleteObjectChildren(ctx context.Context, inventoryPath string, objectType string) error {
if !slices.Contains([]string{"ResourcePool", "Folder"}, objectType) {
return fmt.Errorf("deleteObjectChildren is not implemented for objectType %s", objectType)
}

if inventoryPath == "" {
return fmt.Errorf("cannot use empty string to delete children of type %s", objectType)
}

log := ctrl.LoggerFrom(ctx).WithName(fmt.Sprintf("%sChildren", objectType)).WithValues(objectType, inventoryPath)
ctx = ctrl.LoggerInto(ctx, log)

log.Info("Deleting empty children")

// Recursively list all objects of the given objectType below the inventoryPath.
managedEntities, err := recursiveList(ctx, inventoryPath, s.vSphereClients.Govmomi, s.vSphereClients.Finder, s.vSphereClients.ViewManager, objectType)
if err != nil {
return err
}

// Build a map which notes if an object has children of a different type.
// Later on we will use that information to not delete objects which have children.
hasChildren := map[string]bool{}
for _, e := range managedEntities {
// Check if the object has children, because we only want to delete objects which have children of a different type.
children, err := recursiveList(ctx, e.element.Path, s.vSphereClients.Govmomi, s.vSphereClients.Finder, s.vSphereClients.ViewManager)
if err != nil {
return err
}
// Mark e to have children, if there are children which are of a different type.
for _, child := range children {
if child.entity.Reference().Type == objectType {
continue
}
hasChildren[e.element.Path] = true
break
}
}

// Get key for the deletion marker.
deletionMarkerKey, err := s.vSphereClients.FieldsManager.FindKey(ctx, vSphereDeletionMarkerName)
if err != nil {
if !errors.Is(err, object.ErrKeyNameNotFound) {
return errors.Wrapf(err, "finding custom field %q", vSphereDeletionMarkerName)
}

// In case of ErrKeyNameNotFound we will create the deletionMarker but only if
// we are not on dryRun.
log.Info("Creating the deletion field")

if !s.dryRun {
field, err := s.vSphereClients.FieldsManager.Add(ctx, vSphereDeletionMarkerName, "ManagedEntity", nil, nil)
if err != nil {
return errors.Wrapf(err, "creating custom field %q", vSphereDeletionMarkerName)
}
deletionMarkerKey = field.Key
}
}

objectsToMark := []*managedElement{}
objectsToDelete := []*managedElement{}

// Filter elements and collect two groups:
// * objects to add the timestamp field
// * objects to destroy
for i := range managedEntities {
managedEntity := managedEntities[i]

// We mark any object we find with a timestamp to determine the first time we did see this item.
// This is used as replacement for the non-existing CreationTimestamp on objects.
timestamp, err := getDeletionMarkerTimestamp(deletionMarkerKey, managedEntity.entity.Value)
if err != nil {
return err
}
// If no timestamp was found: queue it to get marked.
if timestamp == nil {
objectsToMark = append(objectsToMark, managedEntity)
continue
}

// Filter out objects we don't have to cleanup depending on s.maxCreationDate.
if timestamp.After(s.maxCreationDate) {
log.Info("Skipping deletion of object: marked timestamp does not exceed maxCreationDate", "timestamp", timestamp, "inventoryPath", managedEntity.element.Path)
continue
chrischdi marked this conversation as resolved.
Show resolved Hide resolved
}

// Filter out objects which have children.
if hasChildren[managedEntity.element.Path] {
log.Info("Skipping deletion of object: object has child objects of a different type", "inventoryPath", managedEntity.element.Path)
continue
}

objectsToDelete = append(objectsToDelete, managedEntity)
}

for i := range objectsToMark {
managedElement := objectsToMark[i]
log.Info("Marking resource object for deletion in vSphere", objectType, managedElement.element.Path)

if s.dryRun {
// Skipping actual mark on dryRun.
continue
}

if err := s.vSphereClients.FieldsManager.Set(ctx, managedElement.entity.Reference(), deletionMarkerKey, time.Now().Format(time.RFC3339)); err != nil {
return errors.Wrapf(err, "setting field %s on object %s", vSphereDeletionMarkerName, managedElement.element.Path)
}
}

// sort objects to delete so children are deleted before parents
sort.Slice(objectsToDelete, func(i, j int) bool {
chrischdi marked this conversation as resolved.
Show resolved Hide resolved
a := objectsToDelete[i]
b := objectsToDelete[j]

return strings.Count(a.element.Path, "/") > strings.Count(b.element.Path, "/")
})

destroyTasks := []*object.Task{}
for _, managedEntity := range objectsToDelete {
log.Info("Destroying object in vSphere", objectType, managedEntity.element.Path)
if s.dryRun {
// Skipping actual destroy on dryRun.
continue
}

task, err := object.NewCommon(s.vSphereClients.Vim, managedEntity.entity.Reference()).Destroy(ctx)
if err != nil {
return err
}
log.Info("Created Destroy task for object", objectType, managedEntity.element.Path, "task", task.Reference().Value)
sbueringer marked this conversation as resolved.
Show resolved Hide resolved
destroyTasks = append(destroyTasks, task)
}
// Wait for all destroy tasks to succeed.
if err := waitForTasksFinished(ctx, destroyTasks, false); err != nil {
return errors.Wrap(err, "failed to wait for object destroy task to finish")
}

return nil
}

Expand Down
Loading
Loading