diff --git a/Makefile b/Makefile index 5eea68be9c..78414a8aca 100644 --- a/Makefile +++ b/Makefile @@ -865,8 +865,10 @@ clean-ci: ## Cleanup orphaned objects in CI --max-age=12h \ --ipam-namespace=default \ --folder=/SDDC-Datacenter/vm/Workloads/cluster-api-provider-vsphere \ - --folder=/SDDC-Datacenter/vm/Workloads/cloud-provider-vsphere \ - --folder=/SDDC-Datacenter/vm/Workloads/image-builder + --resource-pool=/SDDC-Datacenter/host/Cluster-1/Resources/Compute-ResourcePool/cluster-api-provider-vsphere \ + --vm-folder=/SDDC-Datacenter/vm/Workloads/cluster-api-provider-vsphere \ + --vm-folder=/SDDC-Datacenter/vm/Workloads/cloud-provider-vsphere \ + --vm-folder=/SDDC-Datacenter/vm/Workloads/image-builder .PHONY: clean-temporary clean-temporary: ## Remove all temporary files and folders diff --git a/hack/tools/janitor/janitor.go b/hack/tools/janitor/janitor.go index 444dfb1ea4..3427590c2a 100644 --- a/hack/tools/janitor/janitor.go +++ b/hack/tools/janitor/janitor.go @@ -19,10 +19,12 @@ package main import ( "context" "fmt" + "slices" + "sort" + "strings" "time" "github.com/pkg/errors" - "github.com/vmware/govmomi/find" "github.com/vmware/govmomi/object" govmomicluster "github.com/vmware/govmomi/vapi/cluster" "github.com/vmware/govmomi/vim25/mo" @@ -57,6 +59,47 @@ type virtualMachine struct { object *object.VirtualMachine } +func (s *janitor) cleanupVSphere(ctx context.Context, folders, resourcePools, vmFolders []string) error { + errList := []error{} + + // Delete vms to cleanup folders and resource pools. + for _, folder := range vmFolders { + if err := s.deleteVSphereVMs(ctx, folder); err != nil { + errList = append(errList, errors.Wrapf(err, "cleaning up vSphereVMs for folder %q", folder)) + } + } + if err := kerrors.NewAggregate(errList); err != nil { + return errors.Wrap(err, "cleaning up vSphereVMs") + } + + // Delete empty resource pools. + for _, resourcePool := range resourcePools { + if err := s.deleteObjectChildren(ctx, resourcePool, "ResourcePool"); err != nil { + errList = append(errList, errors.Wrapf(err, "cleaning up empty resource pool children for resource pool %q", resourcePool)) + } + } + if err := kerrors.NewAggregate(errList); err != nil { + return errors.Wrap(err, "cleaning up resource pools") + } + + // Delete empty folders. + for _, folder := range folders { + if err := s.deleteObjectChildren(ctx, folder, "Folder"); err != nil { + errList = append(errList, errors.Wrapf(err, "cleaning up empty folder children for folder %q", folder)) + } + } + if err := kerrors.NewAggregate(errList); err != nil { + return errors.Wrap(err, "cleaning up folders") + } + + // Delete empty cluster modules. + if err := s.deleteVSphereClusterModules(ctx); err != nil { + return errors.Wrap(err, "cleaning up vSphere cluster modules") + } + + return nil +} + // deleteVSphereVMs deletes all VSphereVMs in a given folder in vSphere if their creation // timestamp is before the janitor's configured maxCreationDate. func (s *janitor) deleteVSphereVMs(ctx context.Context, folder string) error { @@ -70,8 +113,7 @@ func (s *janitor) deleteVSphereVMs(ctx context.Context, folder string) error { log.Info("Deleting vSphere VMs in folder") // List all virtual machines inside the folder. - finder := find.NewFinder(s.vSphereClients.Vim, false) - managedObjects, err := finder.ManagedObjectListChildren(ctx, folder+"/...", "VirtualMachine") + managedObjects, err := s.vSphereClients.Finder.ManagedObjectListChildren(ctx, folder+"/...", "VirtualMachine") if err != nil { return err } @@ -143,7 +185,7 @@ func (s *janitor) deleteVSphereVMs(ctx context.Context, folder string) error { destroyTasks := []*object.Task{} for _, vm := range append(vmsToDeleteAndPoweroff, vmsToDelete...) { log.Info("Destroying vm in vSphere", "vm", vm.managedObject.Config.Name) - if dryRun { + if s.dryRun { // Skipping actual destroy on dryRun. continue } @@ -162,12 +204,149 @@ func (s *janitor) deleteVSphereVMs(ctx context.Context, folder string) error { return nil } -func waitForTasksFinished(ctx context.Context, tasks []*object.Task, ignoreErrors bool) error { - for _, t := range tasks { - if err := t.Wait(ctx); !ignoreErrors && err != nil { +// deleteObjectChildren deletes all child objects in a given object in vSphere if they don't +// contain any virtual machine. +// An object only gets deleted if: +// * it does not have any children of a different type +// * the timestamp field's value is before s.maxCreationDate +// If an object does not yet have a field, the janitor will add the field to it with the current timestamp as value. +func (s *janitor) deleteObjectChildren(ctx context.Context, inventoryPath string, objectType string) error { + if !slices.Contains([]string{"ResourcePool", "Folder"}, objectType) { + return fmt.Errorf("deleteObjectChildren is not implemented for objectType %s", objectType) + } + + if inventoryPath == "" { + return fmt.Errorf("cannot use empty string to delete children of type %s", objectType) + } + + log := ctrl.LoggerFrom(ctx).WithName(fmt.Sprintf("%sChildren", objectType)).WithValues(objectType, inventoryPath) + ctx = ctrl.LoggerInto(ctx, log) + + log.Info("Deleting empty children") + + // Recursively list all objects of the given objectType below the inventoryPath. + managedEntities, err := recursiveList(ctx, inventoryPath, s.vSphereClients.Govmomi, s.vSphereClients.Finder, s.vSphereClients.ViewManager, objectType) + if err != nil { + return err + } + + // Build a map which notes if an object has children of a different type. + // Later on we will use that information to not delete objects which have children. + hasChildren := map[string]bool{} + for _, e := range managedEntities { + // Check if the object has children, because we only want to delete objects which have children of a different type. + children, err := recursiveList(ctx, e.element.Path, s.vSphereClients.Govmomi, s.vSphereClients.Finder, s.vSphereClients.ViewManager) + if err != nil { + return err + } + // Mark e to have children, if there are children which are of a different type. + for _, child := range children { + if child.entity.Reference().Type == objectType { + continue + } + hasChildren[e.element.Path] = true + break + } + } + + // Get key for the deletion marker. + deletionMarkerKey, err := s.vSphereClients.FieldsManager.FindKey(ctx, vSphereDeletionMarkerName) + if err != nil { + if !errors.Is(err, object.ErrKeyNameNotFound) { + return errors.Wrapf(err, "finding custom field %q", vSphereDeletionMarkerName) + } + + // In case of ErrKeyNameNotFound we will create the deletionMarker but only if + // we are not on dryRun. + log.Info("Creating the deletion field") + + if !s.dryRun { + field, err := s.vSphereClients.FieldsManager.Add(ctx, vSphereDeletionMarkerName, "ManagedEntity", nil, nil) + if err != nil { + return errors.Wrapf(err, "creating custom field %q", vSphereDeletionMarkerName) + } + deletionMarkerKey = field.Key + } + } + + objectsToMark := []*managedElement{} + objectsToDelete := []*managedElement{} + + // Filter elements and collect two groups: + // * objects to add the timestamp field + // * objects to destroy + for i := range managedEntities { + managedEntity := managedEntities[i] + + // We mark any object we find with a timestamp to determine the first time we did see this item. + // This is used as replacement for the non-existing CreationTimestamp on objects. + timestamp, err := getDeletionMarkerTimestamp(deletionMarkerKey, managedEntity.entity.Value) + if err != nil { return err } + // If no timestamp was found: queue it to get marked. + if timestamp == nil { + objectsToMark = append(objectsToMark, managedEntity) + continue + } + + // Filter out objects we don't have to cleanup depending on s.maxCreationDate. + if timestamp.After(s.maxCreationDate) { + log.Info("Skipping deletion of object: marked timestamp does not exceed maxCreationDate", "timestamp", timestamp, "inventoryPath", managedEntity.element.Path) + continue + } + + // Filter out objects which have children. + if hasChildren[managedEntity.element.Path] { + log.Info("Skipping deletion of object: object has child objects of a different type", "inventoryPath", managedEntity.element.Path) + continue + } + + objectsToDelete = append(objectsToDelete, managedEntity) } + + for i := range objectsToMark { + managedElement := objectsToMark[i] + log.Info("Marking resource object for deletion in vSphere", objectType, managedElement.element.Path) + + if s.dryRun { + // Skipping actual mark on dryRun. + continue + } + + if err := s.vSphereClients.FieldsManager.Set(ctx, managedElement.entity.Reference(), deletionMarkerKey, time.Now().Format(time.RFC3339)); err != nil { + return errors.Wrapf(err, "setting field %s on object %s", vSphereDeletionMarkerName, managedElement.element.Path) + } + } + + // sort objects to delete so children are deleted before parents + sort.Slice(objectsToDelete, func(i, j int) bool { + a := objectsToDelete[i] + b := objectsToDelete[j] + + return strings.Count(a.element.Path, "/") > strings.Count(b.element.Path, "/") + }) + + destroyTasks := []*object.Task{} + for _, managedEntity := range objectsToDelete { + log.Info("Destroying object in vSphere", objectType, managedEntity.element.Path) + if s.dryRun { + // Skipping actual destroy on dryRun. + continue + } + + task, err := object.NewCommon(s.vSphereClients.Vim, managedEntity.entity.Reference()).Destroy(ctx) + if err != nil { + return err + } + log.Info("Created Destroy task for object", objectType, managedEntity.element.Path, "task", task.Reference().Value) + destroyTasks = append(destroyTasks, task) + } + // Wait for all destroy tasks to succeed. + if err := waitForTasksFinished(ctx, destroyTasks, false); err != nil { + return errors.Wrap(err, "failed to wait for object destroy task to finish") + } + return nil } diff --git a/hack/tools/janitor/janitor_test.go b/hack/tools/janitor/janitor_test.go new file mode 100644 index 0000000000..c5f9c5ff41 --- /dev/null +++ b/hack/tools/janitor/janitor_test.go @@ -0,0 +1,563 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "context" + "fmt" + "os" + "path" + "strings" + "testing" + "time" + + "github.com/onsi/gomega" + "github.com/onsi/gomega/gbytes" + "github.com/vmware/govmomi" + "github.com/vmware/govmomi/find" + "github.com/vmware/govmomi/simulator" + "github.com/vmware/govmomi/simulator/vpx" + "github.com/vmware/govmomi/view" + "k8s.io/apimachinery/pkg/util/rand" + "k8s.io/klog/v2" + ctrl "sigs.k8s.io/controller-runtime" + + "sigs.k8s.io/cluster-api-provider-vsphere/internal/test/helpers/vcsim" +) + +func setup(ctx context.Context, t *testing.T) (*vSphereClients, *vcsim.Simulator) { + t.Helper() + model := &simulator.Model{ + ServiceContent: vpx.ServiceContent, + RootFolder: vpx.RootFolder, + Autostart: true, + Datacenter: 1, + Portgroup: 1, + Host: 1, + Cluster: 1, + ClusterHost: 3, + DelayConfig: simulator.DelayConfig{}, + } + + vcsim, err := vcsim.NewBuilder().WithModel(model).Build() + if err != nil { + panic(fmt.Sprintf("unable to create simulator %s", err)) + } + + fmt.Printf(" export GOVC_URL=%s\n", vcsim.ServerURL()) + fmt.Printf(" export GOVC_USERNAME=%s\n", vcsim.Username()) + fmt.Printf(" export GOVC_PASSWORD=%s\n", vcsim.Password()) + fmt.Printf(" export GOVC_INSECURE=true\n") + + clients, err := newVSphereClients(ctx, getVSphereClientInput{ + Username: vcsim.Username(), + Password: vcsim.Password(), + Server: vcsim.ServerURL().String(), + UserAgent: "capv-janitor-test", + }) + if err != nil { + panic(err) + } + + t.Cleanup(vcsim.Destroy) + + return clients, vcsim +} + +func setupTestCase(g *gomega.WithT, sim *vcsim.Simulator, objects []*vcsimObject) (string, map[string]bool) { + g.THelper() + + relativePath := rand.String(10) + + baseRP := vcsimResourcePool("") + baseFolder := vcsimFolder("") + baseDatastore := vcsimDatastore("", os.TempDir()) + // Create base objects for the test case + g.Expect(baseRP.Create(sim, relativePath)).To(gomega.Succeed()) + g.Expect(baseFolder.Create(sim, relativePath)).To(gomega.Succeed()) + g.Expect(baseDatastore.Create(sim, relativePath)).To(gomega.Succeed()) + + createdObjects := map[string]bool{} + + // Create objects + for _, object := range objects { + createdObjects[path.Join(object.objectType, object.pathSuffix)] = true + g.Expect(object.Create(sim, relativePath)).To(gomega.Succeed()) + } + + return relativePath, createdObjects +} + +const ( + folderBase = "/DC0/vm" + resourcePoolBase = "/DC0/host/DC0_C0/Resources" +) + +func Test_janitor_deleteVSphereVMs(t *testing.T) { + ctx := context.Background() + ctx = ctrl.LoggerInto(ctx, klog.Background()) + + // Initialize and start vcsim + clients, sim := setup(ctx, t) + + deleteAll := time.Now().Add(time.Hour * 1) + deleteNone := time.Now() + + tests := []struct { + name string + objects []*vcsimObject + maxCreationDate time.Time + wantErr bool + want map[string]bool + }{ + { + name: "delete all VMs", + objects: []*vcsimObject{ + vcsimVirtualMachine("foo"), + }, + maxCreationDate: deleteAll, + wantErr: false, + want: nil, + }, + { + name: "delete no VMs", + objects: []*vcsimObject{ + vcsimVirtualMachine("foo"), + }, + maxCreationDate: deleteNone, + wantErr: false, + want: map[string]bool{ + "VirtualMachine/foo": true, + }, + }, + { + name: "recursive vm deletion", + objects: []*vcsimObject{ + vcsimResourcePool("a"), + vcsimFolder("a"), + vcsimResourcePool("a/b"), + vcsimFolder("a/b"), + vcsimResourcePool("a/b/c"), + vcsimFolder("a/b/c"), + vcsimVirtualMachine("foo"), + vcsimVirtualMachine("a/bar"), + vcsimVirtualMachine("a/b/c/foobar"), + }, + maxCreationDate: deleteAll, + wantErr: false, + want: map[string]bool{ + "ResourcePool/a": true, + "ResourcePool/a/b": true, + "ResourcePool/a/b/c": true, + "Folder/a": true, + "Folder/a/b": true, + "Folder/a/b/c": true, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := gomega.NewWithT(t) + + relativePath, _ := setupTestCase(g, sim, tt.objects) + + s := &janitor{ + dryRun: false, + maxCreationDate: tt.maxCreationDate, + vSphereClients: clients, + } + + // use folder created for this test case as inventoryPath + inventoryPath := vcsimFolder("").Path(relativePath) + + err := s.deleteVSphereVMs(ctx, inventoryPath) + if tt.wantErr { + g.Expect(err).To(gomega.HaveOccurred()) + } else { + g.Expect(err).ToNot(gomega.HaveOccurred()) + } + + // Ensure the expected objects still exists + existingObjects, err := recursiveListFoldersAndResourcePools(ctx, relativePath, clients.Govmomi, clients.Finder, clients.ViewManager) + g.Expect(err).ToNot(gomega.HaveOccurred()) + if tt.want != nil { + g.Expect(existingObjects).To(gomega.BeEquivalentTo(tt.want)) + } else { + g.Expect(existingObjects).To(gomega.BeEmpty()) + } + }) + } +} + +func Test_janitor_deleteObjectChildren(t *testing.T) { + ctx := context.Background() + ctx = ctrl.LoggerInto(ctx, klog.Background()) + + // Initialize and start vcsim + clients, sim := setup(ctx, t) + + tests := []struct { + name string + basePath string + objectType string + objects []*vcsimObject + wantErr bool + want map[string]bool + }{ + { + name: "should preserve resource pool if it contains a vm and delete empty resource pools", + basePath: resourcePoolBase, + objectType: "ResourcePool", + objects: []*vcsimObject{ + vcsimResourcePool("a"), + vcsimResourcePool("b"), + vcsimFolder("a"), + vcsimVirtualMachine("a/foo"), + }, + want: map[string]bool{ + "Folder/a": true, + "ResourcePool/a": true, + "VirtualMachine/a/foo": true, + }, + }, + { + name: "should preserve folder if it contains a vm and delete empty folders", + basePath: folderBase, + objectType: "Folder", + objects: []*vcsimObject{ + vcsimResourcePool("a"), + vcsimFolder("a"), + vcsimFolder("b"), + vcsimVirtualMachine("a/foo"), + }, + want: map[string]bool{ + "Folder/a": true, + "ResourcePool/a": true, + "VirtualMachine/a/foo": true, + }, + }, + { + name: "no-op", + basePath: resourcePoolBase, + objectType: "ResourcePool", + objects: []*vcsimObject{}, + }, + { + name: "single resource pool", + basePath: resourcePoolBase, + objectType: "ResourcePool", + objects: []*vcsimObject{ + vcsimResourcePool("a"), + }, + }, + { + name: "multiple nested resource pools", + basePath: resourcePoolBase, + objectType: "ResourcePool", + objects: []*vcsimObject{ + vcsimResourcePool("a"), + vcsimResourcePool("a/b"), + vcsimResourcePool("a/b/c"), + vcsimResourcePool("d"), + vcsimResourcePool("d/e"), + vcsimResourcePool("f"), + }, + }, + { + name: "no-op", + basePath: folderBase, + objectType: "Folder", + objects: []*vcsimObject{}, + }, + { + name: "single folder", + basePath: folderBase, + objectType: "Folder", + objects: []*vcsimObject{ + vcsimFolder("a"), + }, + }, + { + name: "multiple nested folders", + basePath: folderBase, + objectType: "Folder", + objects: []*vcsimObject{ + vcsimFolder("a"), + vcsimFolder("a/b"), + vcsimFolder("a/b/c"), + vcsimFolder("d"), + vcsimFolder("d/e"), + vcsimFolder("f"), + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := gomega.NewWithT(t) + + relativePath, wantMarkedObjects := setupTestCase(g, sim, tt.objects) + + inventoryPath := path.Join(tt.basePath, relativePath) + + s := &janitor{ + dryRun: false, + maxCreationDate: time.Now().Add(time.Hour * 1), + vSphereClients: clients, + } + + // Run first iteration which should only tag the resource pools with a timestamp. + g.Expect(s.deleteObjectChildren(ctx, inventoryPath, tt.objectType)).To(gomega.Succeed()) + existingObjects, err := recursiveListFoldersAndResourcePools(ctx, relativePath, clients.Govmomi, clients.Finder, clients.ViewManager) + g.Expect(err).ToNot(gomega.HaveOccurred()) + g.Expect(existingObjects).To(gomega.BeEquivalentTo(wantMarkedObjects)) + + // Run second iteration which should destroy the resource pools with a timestamp. + g.Expect(s.deleteObjectChildren(ctx, inventoryPath, tt.objectType)).To(gomega.Succeed()) + existingObjects, err = recursiveListFoldersAndResourcePools(ctx, relativePath, clients.Govmomi, clients.Finder, clients.ViewManager) + g.Expect(err).ToNot(gomega.HaveOccurred()) + if tt.want != nil { + g.Expect(existingObjects).To(gomega.BeEquivalentTo(tt.want)) + } else { + g.Expect(existingObjects).To(gomega.BeEmpty()) + } + + // Ensure the parent object still exists + assertObjectExists(ctx, g, clients.Finder, inventoryPath) + }) + } +} + +func Test_janitor_CleanupVSphere(t *testing.T) { + ctx := context.Background() + ctx = ctrl.LoggerInto(ctx, klog.Background()) + + // Initialize and start vcsim + clients, sim := setup(ctx, t) + + deleteAll := time.Now().Add(time.Hour * 1) + + tests := []struct { + name string + dryRun bool + maxCreationDate time.Time + objects []*vcsimObject + wantAfterFirstRun map[string]bool + wantAfterSecondRun map[string]bool + }{ + { + name: "no-op", + dryRun: false, + maxCreationDate: deleteAll, + objects: nil, + wantAfterFirstRun: map[string]bool{}, + wantAfterSecondRun: map[string]bool{}, + }, + { + name: "dryRun: no-op", + dryRun: true, + maxCreationDate: deleteAll, + objects: nil, + wantAfterFirstRun: map[string]bool{}, + wantAfterSecondRun: map[string]bool{}, + }, + { + name: "delete everything", + dryRun: false, + maxCreationDate: deleteAll, + objects: []*vcsimObject{ + vcsimFolder("a"), + vcsimResourcePool("a"), + vcsimVirtualMachine("a/b"), + vcsimFolder("c"), + vcsimResourcePool("c"), + }, + wantAfterFirstRun: map[string]bool{ + "Folder/a": true, + "Folder/c": true, + "ResourcePool/a": true, + "ResourcePool/c": true, + }, + wantAfterSecondRun: map[string]bool{}, + }, + { + name: "dryRun: would delete everything", + dryRun: true, + maxCreationDate: deleteAll, + objects: []*vcsimObject{ + vcsimFolder("a"), + vcsimResourcePool("a"), + vcsimVirtualMachine("a/b"), + vcsimFolder("c"), + vcsimResourcePool("c"), + }, + wantAfterFirstRun: map[string]bool{ + "Folder/a": true, + "Folder/c": true, + "ResourcePool/a": true, + "ResourcePool/c": true, + "VirtualMachine/a/b": true, + }, + wantAfterSecondRun: map[string]bool{ + "Folder/a": true, + "Folder/c": true, + "ResourcePool/a": true, + "ResourcePool/c": true, + "VirtualMachine/a/b": true, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := gomega.NewWithT(t) + + relativePath, _ := setupTestCase(g, sim, tt.objects) + + s := &janitor{ + dryRun: tt.dryRun, + maxCreationDate: tt.maxCreationDate, + vSphereClients: clients, + } + + folder := vcsimFolder("").Path(relativePath) + resourcePool := vcsimResourcePool("").Path(relativePath) + + folders := []string{folder} + resourcePools := []string{resourcePool} + + g.Expect(s.cleanupVSphere(ctx, folders, resourcePools, folders)).To(gomega.Succeed()) + existingObjects, err := recursiveListFoldersAndResourcePools(ctx, relativePath, clients.Govmomi, clients.Finder, clients.ViewManager) + g.Expect(err).ToNot(gomega.HaveOccurred()) + g.Expect(existingObjects).To(gomega.BeEquivalentTo(tt.wantAfterFirstRun)) + + g.Expect(s.cleanupVSphere(ctx, folders, resourcePools, folders)).To(gomega.Succeed()) + existingObjects, err = recursiveListFoldersAndResourcePools(ctx, relativePath, clients.Govmomi, clients.Finder, clients.ViewManager) + g.Expect(err).ToNot(gomega.HaveOccurred()) + g.Expect(existingObjects).To(gomega.BeEquivalentTo(tt.wantAfterSecondRun)) + + // Ensure the parent object still exists + assertObjectExists(ctx, g, clients.Finder, folder) + assertObjectExists(ctx, g, clients.Finder, resourcePool) + }) + } +} + +func assertObjectExists(ctx context.Context, g *gomega.WithT, finder *find.Finder, inventoryPath string) { + g.THelper() + + e, err := finder.ManagedObjectList(ctx, inventoryPath) + g.Expect(err).ToNot(gomega.HaveOccurred()) + g.Expect(e).To(gomega.HaveLen(1)) +} + +func recursiveListFoldersAndResourcePools(ctx context.Context, testPrefix string, govmomiClient *govmomi.Client, finder *find.Finder, viewManager *view.Manager) (map[string]bool, error) { + resourcePoolElements, err := recursiveList(ctx, path.Join(resourcePoolBase, testPrefix), govmomiClient, finder, viewManager) + if err != nil { + return nil, err + } + + folderElements, err := recursiveList(ctx, path.Join(folderBase, testPrefix), govmomiClient, finder, viewManager) + if err != nil { + return nil, err + } + + objects := map[string]bool{} + + for _, e := range append(resourcePoolElements, folderElements...) { + splitted := strings.Split(e.element.Path, testPrefix+"/") + if len(splitted) == 2 { + objects[path.Join(e.element.Object.Reference().Type, splitted[1])] = true + } + } + + return objects, nil +} + +type vcsimObject struct { + pathSuffix string + objectType string + datastoreTempDir string +} + +func (o vcsimObject) Path(testPrefix string) string { + var pathPrefix string + + switch o.objectType { + case "ResourcePool": + pathPrefix = resourcePoolBase + case "Folder": + pathPrefix = folderBase + case "VirtualMachine": + // VMs exist at the folders. + pathPrefix = folderBase + case "Datastore": + pathPrefix = "/DC0/datastore" + default: + panic("unimplemented") + } + + return path.Join(pathPrefix, testPrefix, o.pathSuffix) +} + +func (o vcsimObject) Create(sim *vcsim.Simulator, testPrefix string) error { + var cmd string + switch o.objectType { + case "ResourcePool": + cmd = fmt.Sprintf("pool.create %s", o.Path(testPrefix)) + case "Folder": + cmd = fmt.Sprintf("folder.create %s", o.Path(testPrefix)) + case "Datastore": + tmpDir, err := os.MkdirTemp(o.datastoreTempDir, testPrefix) + if err != nil { + return err + } + cmd = fmt.Sprintf("datastore.create -type local -name %s -path %s /DC0/host/DC0_C0", testPrefix, tmpDir) + case "VirtualMachine": + fullPath := o.Path(testPrefix) + folderPath := path.Dir(fullPath) + rpPath := vcsimResourcePool(path.Dir(o.pathSuffix)).Path(testPrefix) + name := path.Base(fullPath) + networkPath := "/DC0/network/DC0_DVPG0" + cmd = fmt.Sprintf("vm.create -on=true -pool %s -folder %s -net %s -ds /DC0/datastore/%s %s", rpPath, folderPath, networkPath, testPrefix, name) + default: + panic("unimplemented") + } + + stdout, stderr := gbytes.NewBuffer(), gbytes.NewBuffer() + err := sim.Run(cmd, stdout, stderr) + if err != nil { + fmt.Printf("stdout:\n%s\n", stdout.Contents()) + fmt.Printf("stderr:\n%s\n", stderr.Contents()) + return err + } + return nil +} + +func vcsimResourcePool(p string) *vcsimObject { + return &vcsimObject{pathSuffix: p, objectType: "ResourcePool"} +} + +func vcsimFolder(p string) *vcsimObject { + return &vcsimObject{pathSuffix: p, objectType: "Folder"} +} + +func vcsimDatastore(p, datastoreTempDir string) *vcsimObject { + return &vcsimObject{pathSuffix: p, objectType: "Datastore", datastoreTempDir: datastoreTempDir} +} + +func vcsimVirtualMachine(p string) *vcsimObject { + return &vcsimObject{pathSuffix: p, objectType: "VirtualMachine"} +} diff --git a/hack/tools/janitor/main.go b/hack/tools/janitor/main.go index 0a4329bbcd..01740b07e2 100644 --- a/hack/tools/janitor/main.go +++ b/hack/tools/janitor/main.go @@ -26,7 +26,6 @@ import ( "github.com/pkg/errors" "github.com/spf13/pflag" "k8s.io/apimachinery/pkg/runtime" - kerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/klog/v2" ipamv1 "sigs.k8s.io/cluster-api/exp/ipam/api/v1beta1" ctrl "sigs.k8s.io/controller-runtime" @@ -41,14 +40,18 @@ func init() { } var ( - dryRun bool - ipamNamespace string - maxAge time.Duration - vsphereFolders []string + dryRun bool + ipamNamespace string + maxAge time.Duration + vsphereVMFolders []string + vsphereFolders []string + vsphereResourcePools []string ) func initFlags(fs *pflag.FlagSet) { - fs.StringArrayVar(&vsphereFolders, "folder", []string{}, "Path to folders in vCenter to cleanup virtual machines.") + fs.StringArrayVar(&vsphereVMFolders, "vm-folder", []string{}, "Path to folders in vCenter to cleanup virtual machines.") + fs.StringArrayVar(&vsphereFolders, "folder", []string{}, "Path to a folder in vCenter to recursively cleanup empty subfolders.") + fs.StringArrayVar(&vsphereResourcePools, "resource-pool", []string{}, "Path to a resource pool in vCenter to recursively cleanup empty child resource pools.") fs.StringVar(&ipamNamespace, "ipam-namespace", "", "Namespace for IPAddressClaim cleanup.") fs.DurationVar(&maxAge, "max-age", time.Hour*12, "Maximum age of an object before it is getting deleted.") fs.BoolVar(&dryRun, "dry-run", false, "dry-run results in not deleting anything but printing the actions.") @@ -75,6 +78,8 @@ func run(ctx context.Context) error { log.Info("Configured settings", "dry-run", dryRun) log.Info("Configured settings", "folders", vsphereFolders) + log.Info("Configured settings", "vm-folders", vsphereVMFolders) + log.Info("Configured settings", "resource-pools", vsphereResourcePools) log.Info("Configured settings", "ipam-namespace", ipamNamespace) log.Info("Configured settings", "max-age", maxAge) @@ -103,15 +108,11 @@ func run(ctx context.Context) error { janitor := newJanitor(vSphereClients, ipamClient, maxAge, ipamNamespace, dryRun) - // First cleanup old vms to free up IPAddressClaims or cluster modules which are still in-use. - errList := []error{} - for _, folder := range vsphereFolders { - if err := janitor.deleteVSphereVMs(ctx, folder); err != nil { - errList = append(errList, errors.Wrapf(err, "cleaning up vSphereVMs for folder %q", folder)) - } - } - if err := kerrors.NewAggregate(errList); err != nil { - return errors.Wrap(err, "cleaning up vSphereVMs") + log.Info("Configured settings", "janitor.maxCreationDate", janitor.maxCreationDate) + + // First cleanup old vms and other vSphere resources to free up IPAddressClaims or cluster modules which are still in-use. + if err := janitor.cleanupVSphere(ctx, vsphereFolders, vsphereResourcePools, vsphereVMFolders); err != nil { + return errors.Wrap(err, "cleaning up vSphere") } // Second cleanup IPAddressClaims. @@ -119,10 +120,5 @@ func run(ctx context.Context) error { return errors.Wrap(err, "cleaning up IPAddressClaims") } - // Third cleanup cluster modules. - if err := janitor.deleteVSphereClusterModules(ctx); err != nil { - return errors.Wrap(err, "cleaning up vSphere cluster modules") - } - return nil } diff --git a/hack/tools/janitor/vsphere.go b/hack/tools/janitor/vsphere.go index 5185e6425e..b73f45aa39 100644 --- a/hack/tools/janitor/vsphere.go +++ b/hack/tools/janitor/vsphere.go @@ -18,13 +18,23 @@ package main import ( "context" + "fmt" "net/url" + "time" + "github.com/pkg/errors" "github.com/vmware/govmomi" + "github.com/vmware/govmomi/find" + "github.com/vmware/govmomi/list" + "github.com/vmware/govmomi/object" + "github.com/vmware/govmomi/property" "github.com/vmware/govmomi/session" "github.com/vmware/govmomi/vapi/rest" + "github.com/vmware/govmomi/view" "github.com/vmware/govmomi/vim25" + "github.com/vmware/govmomi/vim25/mo" "github.com/vmware/govmomi/vim25/soap" + "github.com/vmware/govmomi/vim25/types" ctrl "sigs.k8s.io/controller-runtime" ) @@ -38,9 +48,12 @@ type getVSphereClientInput struct { // vSphereClients is a collection of different clients for vSphere. type vSphereClients struct { - Vim *vim25.Client - Govmomi *govmomi.Client - Rest *rest.Client + Vim *vim25.Client + Govmomi *govmomi.Client + Rest *rest.Client + FieldsManager *object.CustomFieldsManager + Finder *find.Finder + ViewManager *view.Manager } // logout logs out all clients. It logs errors if the context contains a logger. @@ -92,9 +105,109 @@ func newVSphereClients(ctx context.Context, input getVSphereClientInput) (*vSphe return nil, err } + fieldsManager, err := object.GetCustomFieldsManager(vimClient) + if err != nil { + return nil, err + } + + viewManager := view.NewManager(vimClient) + finder := find.NewFinder(vimClient, false) + return &vSphereClients{ - Vim: vimClient, - Govmomi: govmomiClient, - Rest: restClient, + Vim: vimClient, + Govmomi: govmomiClient, + Rest: restClient, + FieldsManager: fieldsManager, + Finder: finder, + ViewManager: viewManager, }, nil } + +const vSphereDeletionMarkerName = "capv-janitor-deletion-marker" + +func waitForTasksFinished(ctx context.Context, tasks []*object.Task, ignoreErrors bool) error { + for _, t := range tasks { + if err := t.Wait(ctx); !ignoreErrors && err != nil { + return err + } + } + return nil +} + +func getDeletionMarkerTimestamp(key int32, values []types.BaseCustomFieldValue) (*time.Time, error) { + // Find the value for the key + var b *types.BaseCustomFieldValue + for i := range values { + if values[i].GetCustomFieldValue().Key != key { + continue + } + b = &values[i] + break + } + + // Key does not exist + if b == nil { + return nil, nil + } + + value, ok := (*b).(*types.CustomFieldStringValue) + if !ok { + return nil, fmt.Errorf("cannot typecast %t to *types.CustomFieldStringValue", *b) + } + + t, err := time.Parse(time.RFC3339, value.Value) + return &t, err +} + +type managedElement struct { + entity mo.ManagedEntity + element *list.Element +} + +func recursiveList(ctx context.Context, inventoryPath string, govmomiClient *govmomi.Client, finder *find.Finder, viewManager *view.Manager, objectTypes ...string) ([]*managedElement, error) { + // Get the object at inventoryPath + objList, err := finder.ManagedObjectList(ctx, inventoryPath) + if err != nil { + return nil, err + } + if len(objList) != 1 { + return nil, errors.Errorf("expected to find exactly 1 object at managed object at path: %s", inventoryPath) + } + + root := objList[0].Object.Reference() + + v, err := viewManager.CreateContainerView(ctx, root, objectTypes, true) + if err != nil { + return nil, err + } + defer func() { _ = v.Destroy(ctx) }() + + // Recursively find all objects. + managedObjects, err := v.Find(ctx, nil, property.Match{"name": "*"}) + if err != nil { + return nil, err + } + + managedElements := []*managedElement{} + + if len(managedObjects) == 0 { + return managedElements, nil + } + + // Retrieve the availableField and value attributes of the found object so we + // later can check for the deletion marker. + var objs []mo.ManagedEntity + if err := govmomiClient.Retrieve(ctx, managedObjects, []string{"availableField", "value"}, &objs); err != nil { + return nil, err + } + + for _, entity := range objs { + element, err := finder.Element(ctx, entity.Reference()) + if err != nil { + return nil, err + } + managedElements = append(managedElements, &managedElement{entity: entity, element: element}) + } + + return managedElements, nil +}