Skip to content

Commit

Permalink
Add Namespace Scoped Zone Discovery
Browse files Browse the repository at this point in the history
- Introduce a feature flag to enable Namespace Scoped Zone.
- Enhance zone discovery to support Namespace Scoped Zones.
- Filter out zones marked for deletion during the discovery process.

Signed-off-by: Gong Zhang <[email protected]>
  • Loading branch information
zhanggbj committed Aug 8, 2024
1 parent 98f6510 commit 250cdcd
Show file tree
Hide file tree
Showing 14 changed files with 786 additions and 565 deletions.
2 changes: 1 addition & 1 deletion config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ spec:
- "--diagnostics-address=${CAPI_DIAGNOSTICS_ADDRESS:=:8443}"
- "--insecure-diagnostics=${CAPI_INSECURE_DIAGNOSTICS:=false}"
- --v=4
- "--feature-gates=NodeAntiAffinity=${EXP_NODE_ANTI_AFFINITY:=false}"
- "--feature-gates=NodeAntiAffinity=${EXP_NODE_ANTI_AFFINITY:=false},NamespaceScopedZone=${EXP_NAMESPACE_SCOPED_ZONE:=false}"
image: controller:latest
imagePullPolicy: IfNotPresent
name: manager
Expand Down
58 changes: 47 additions & 11 deletions controllers/vmware/vspherecluster_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,13 @@ import (
"fmt"

"github.com/pkg/errors"
topologyv1 "github.com/vmware-tanzu/vm-operator/external/tanzu-topology/api/v1alpha1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/types"
kerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/client-go/tools/record"
"k8s.io/klog/v2"
topologyv1 "sigs.k8s.io/cluster-api-provider-vsphere/external/tanzu-topology/api/v1alpha1"
"sigs.k8s.io/cluster-api-provider-vsphere/feature"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
clusterutilv1 "sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/annotations"
Expand Down Expand Up @@ -160,7 +161,7 @@ func (r *ClusterReconciler) reconcileDelete(clusterCtx *vmware.ClusterContext) {

func (r *ClusterReconciler) reconcileNormal(ctx context.Context, clusterCtx *vmware.ClusterContext) error {
// Get any failure domains to report back to the CAPI core controller.
failureDomains, err := r.getFailureDomains(ctx)
failureDomains, err := r.getFailureDomains(ctx, clusterCtx)
if err != nil {
return errors.Wrapf(
err,
Expand Down Expand Up @@ -371,22 +372,57 @@ func (r *ClusterReconciler) VSphereMachineToCluster(ctx context.Context, o clien

// Returns the failure domain information discovered on the cluster
// hosting this controller.
func (r *ClusterReconciler) getFailureDomains(ctx context.Context) (clusterv1.FailureDomains, error) {
availabilityZoneList := &topologyv1.AvailabilityZoneList{}
if err := r.Client.List(ctx, availabilityZoneList); err != nil {
return nil, err
}
func (r *ClusterReconciler) getFailureDomains(ctx context.Context, clusterCtx *vmware.ClusterContext) (clusterv1.FailureDomains, error) {
// Determine the source of failure domain based on feature gates NamespaceScopedZone.
// If NamespaceScopedZone is enabled, use Zone which is Namespace scoped,otherwise use
// Availability Zone which is Cluster scoped.
var failureDomainNames []string
if feature.Gates.Enabled(feature.NamespaceScopedZone) {
zoneList := &topologyv1.ZoneList{}
listOptions := &client.ListOptions{Namespace: clusterCtx.VSphereCluster.Namespace}
if err := r.Client.List(ctx, zoneList, listOptions); err != nil {
return nil, err
}

filteredZonelist := filterZonesWithoutDeletionTimestamp(zoneList)
if len(filteredZonelist.Items) == 0 {
return nil, nil
}

for _, zone := range filteredZonelist.Items {
failureDomainNames = append(failureDomainNames, zone.Name)
}
} else {
availabilityZoneList := &topologyv1.AvailabilityZoneList{}
if err := r.Client.List(ctx, availabilityZoneList); err != nil {
return nil, err
}

if len(availabilityZoneList.Items) == 0 {
return nil, nil
if len(availabilityZoneList.Items) == 0 {
return nil, nil
}
for _, az := range availabilityZoneList.Items {
failureDomainNames = append(failureDomainNames, az.Name)
}
}

failureDomains := clusterv1.FailureDomains{}
for _, az := range availabilityZoneList.Items {
failureDomains[az.Name] = clusterv1.FailureDomainSpec{
for _, name := range failureDomainNames {
failureDomains[name] = clusterv1.FailureDomainSpec{
ControlPlane: true,
}
}

return failureDomains, nil
}

// filterZonesWithoutDeletionTimestamp filters out zones with a deletionTimestamp

Check failure on line 419 in controllers/vmware/vspherecluster_reconciler.go

View workflow job for this annotation

GitHub Actions / lint

Comment should end in a period (godot)
func filterZonesWithoutDeletionTimestamp(zoneList *topologyv1.ZoneList) *topologyv1.ZoneList {
filtered := &topologyv1.ZoneList{}
for _, zone := range zoneList.Items {
if zone.DeletionTimestamp.IsZero() {
filtered.Items = append(filtered.Items, zone)
}
}
return filtered
}
155 changes: 131 additions & 24 deletions controllers/vmware/vspherecluster_reconciler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,20 +24,22 @@ import (
. "github.com/onsi/ginkgo/v2"
"github.com/onsi/ginkgo/v2/types"
. "github.com/onsi/gomega"
topologyv1 "github.com/vmware-tanzu/vm-operator/external/tanzu-topology/api/v1alpha1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
apirecord "k8s.io/client-go/tools/record"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
"sigs.k8s.io/cluster-api/util/conditions"
ctrl "sigs.k8s.io/controller-runtime"

utilfeature "k8s.io/component-base/featuregate/testing"
vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1"
topologyv1 "sigs.k8s.io/cluster-api-provider-vsphere/external/tanzu-topology/api/v1alpha1"
"sigs.k8s.io/cluster-api-provider-vsphere/feature"
capvcontext "sigs.k8s.io/cluster-api-provider-vsphere/pkg/context"
"sigs.k8s.io/cluster-api-provider-vsphere/pkg/context/vmware"
"sigs.k8s.io/cluster-api-provider-vsphere/pkg/services/network"
"sigs.k8s.io/cluster-api-provider-vsphere/pkg/services/vmoperator"
"sigs.k8s.io/cluster-api-provider-vsphere/pkg/util"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
"sigs.k8s.io/cluster-api/util/conditions"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
)

func TestVSphereClusterReconciler(t *testing.T) {
Expand Down Expand Up @@ -130,32 +132,137 @@ var _ = Describe("Cluster Controller Tests", func() {
})

Context("Test getFailureDomains", func() {
It("should not find FailureDomains", func() {
fds, err := reconciler.getFailureDomains(ctx)
It("should not find any FailureDomains if neither AvailabilityZone nor Zone exists", func() {
fds, err := reconciler.getFailureDomains(ctx, clusterCtx)
Expect(err).ToNot(HaveOccurred())
Expect(fds).Should(BeEmpty())
})

It("should find FailureDomains", func() {
zoneNames := []string{"homer", "marge", "bart"}
for _, name := range zoneNames {
zone := &topologyv1.AvailabilityZone{
TypeMeta: metav1.TypeMeta{
APIVersion: topologyv1.GroupVersion.String(),
Kind: "AvailabilityZone",
},
ObjectMeta: metav1.ObjectMeta{
Name: name,
},
Context("when only AvailabilityZone exists", func() {
BeforeEach(func() {
azNames := []string{"az-1", "az-2", "az-3"}
for _, name := range azNames {
az := &topologyv1.AvailabilityZone{
TypeMeta: metav1.TypeMeta{
APIVersion: topologyv1.GroupVersion.String(),
Kind: "AvailabilityZone",
},
ObjectMeta: metav1.ObjectMeta{
Name: name,
},
}

Expect(controllerManagerContext.Client.Create(ctx, az)).To(Succeed())
}
})

Expect(controllerManagerContext.Client.Create(ctx, zone)).To(Succeed())
}
It("should discover FailureDomains using AvailabilityZone by default", func() {
fds, err := reconciler.getFailureDomains(ctx, clusterCtx)
Expect(err).ToNot(HaveOccurred())
Expect(fds).NotTo(BeNil())
Expect(fds).Should(HaveLen(3))
})

fds, err := reconciler.getFailureDomains(ctx)
Expect(err).ToNot(HaveOccurred())
Expect(fds).NotTo(BeNil())
Expect(fds).Should(HaveLen(3))
It("should return nil when NamespaceScopedZone is enabled", func() {
defer utilfeature.SetFeatureGateDuringTest(GinkgoTB(), feature.Gates, feature.NamespaceScopedZone, true)()
fds, err := reconciler.getFailureDomains(ctx, clusterCtx)
Expect(err).ToNot(HaveOccurred())
Expect(fds).To(BeNil())
})
})

Context("when AvailabilityZone and Zone co-exists", func() {
BeforeEach(func() {
azNames := []string{"az-1", "az-2"}
for _, name := range azNames {
az := &topologyv1.AvailabilityZone{
TypeMeta: metav1.TypeMeta{
APIVersion: topologyv1.GroupVersion.String(),
Kind: "AvailabilityZone",
},
ObjectMeta: metav1.ObjectMeta{
Name: name,
},
}
Expect(controllerManagerContext.Client.Create(ctx, az)).To(Succeed())

}
zoneNames := []string{"zone-1", "zone-2", "zone-3"}
for _, name := range zoneNames {
zone := &topologyv1.Zone{
TypeMeta: metav1.TypeMeta{
APIVersion: topologyv1.GroupVersion.String(),
Kind: "Zone",
},
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: clusterCtx.VSphereCluster.Namespace,
},
}

Expect(controllerManagerContext.Client.Create(ctx, zone)).To(Succeed())
}
})

It("should discover FailureDomains using AvailabilityZone by default", func() {
fds, err := reconciler.getFailureDomains(ctx, clusterCtx)
Expect(err).ToNot(HaveOccurred())
Expect(fds).NotTo(BeNil())
Expect(fds).Should(HaveLen(2))
})

It("should discover FailureDomains using Zone when NamespaceScopedZone is enabled", func() {
defer utilfeature.SetFeatureGateDuringTest(GinkgoTB(), feature.Gates, feature.NamespaceScopedZone, true)()

fds, err := reconciler.getFailureDomains(ctx, clusterCtx)
Expect(err).ToNot(HaveOccurred())
Expect(fds).NotTo(BeNil())
Expect(fds).Should(HaveLen(3))
})
})

Context("when Zone is marked for deleteion", func() {
BeforeEach(func() {
zoneNames := []string{"zone-1", "zone-2", "zone-3"}
zoneNamespace := clusterCtx.VSphereCluster.Namespace
for _, name := range zoneNames {
zone := &topologyv1.Zone{
TypeMeta: metav1.TypeMeta{
APIVersion: topologyv1.GroupVersion.String(),
Kind: "Zone",
},
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: zoneNamespace,
Finalizers: []string{"zone.test.finalizer"},
},
}

Expect(controllerManagerContext.Client.Create(ctx, zone)).To(Succeed())

if name == "zone-3" {
// Delete the zone to set the deletion timestamp
Expect(controllerManagerContext.Client.Delete(ctx, zone)).To(Succeed())
Zone3 := &topologyv1.Zone{}
Expect(controllerManagerContext.Client.Get(ctx, client.ObjectKey{Namespace: zoneNamespace, Name: name}, Zone3)).To(Succeed())

// Validate the deletion timestamp
Expect(Zone3.DeletionTimestamp.IsZero()).To(BeFalse())
}
}

})

It("should discover FailureDomains using Zone and filter out Zone marked for deletion", func() {
defer utilfeature.SetFeatureGateDuringTest(GinkgoTB(), feature.Gates, feature.NamespaceScopedZone, true)()

fds, err := reconciler.getFailureDomains(ctx, clusterCtx)
Expect(err).ToNot(HaveOccurred())
Expect(fds).NotTo(BeNil())
Expect(fds).Should(HaveLen(2))
})

})

})
})
73 changes: 73 additions & 0 deletions external/tanzu-topology/api/v1alpha1/availability_zone.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
// Copyright (c) 2021 VMware, Inc. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

Check failure on line 2 in external/tanzu-topology/api/v1alpha1/availability_zone.go

View workflow job for this annotation

GitHub Actions / lint

Comment should end in a period (godot)

package v1alpha1

import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// NamespaceInfo contains identifying information about the vSphere resources
// used to represent a Kubernetes namespace on individual vSphere Zones.
type NamespaceInfo struct {
// PoolMoId is the managed object ID of the vSphere ResourcePool for a
// Namespace on an individual vSphere Cluster.
PoolMoId string `json:"poolMoId,omitempty"`

Check failure on line 15 in external/tanzu-topology/api/v1alpha1/availability_zone.go

View workflow job for this annotation

GitHub Actions / lint

var-naming: struct field PoolMoId should be PoolMoID (revive)

// PoolMoIDs are the managed object ID of the vSphere ResourcePools for a
// Namespace in an individual vSphere Zone. A zone may be comprised of
// multiple ResourcePools.
PoolMoIDs []string `json:"poolMoIDs,omitempty"`

// FolderMoId is the managed object ID of the vSphere Folder for a
// Namespace. Folders are global and not per-vSphere Cluster, but the
// FolderMoId is stored here, alongside the PoolMoId for convenience.
FolderMoId string `json:"folderMoId,omitempty"`

Check failure on line 25 in external/tanzu-topology/api/v1alpha1/availability_zone.go

View workflow job for this annotation

GitHub Actions / lint

var-naming: struct field FolderMoId should be FolderMoID (revive)
}

// AvailabilityZoneSpec defines the desired state of AvailabilityZone.
type AvailabilityZoneSpec struct {
// ClusterComputeResourceMoId is the managed object ID of the vSphere
// ClusterComputeResource represented by this availability zone.
ClusterComputeResourceMoId string `json:"clusterComputeResourceMoId,omitempty"`

// ClusterComputeResourceMoIDs are the managed object IDs of the vSphere
// ClusterComputeResources represented by this availability zone.
ClusterComputeResourceMoIDs []string `json:"clusterComputeResourceMoIDs,omitempty"`

// Namespaces is a map that enables querying information about the vSphere
// objects that make up a Kubernetes Namespace based on its name.
Namespaces map[string]NamespaceInfo `json:"namespaces,omitempty"`
}

// AvailabilityZoneStatus defines the observed state of AvailabilityZone.
type AvailabilityZoneStatus struct {
}

// AvailabilityZone is the schema for the AvailabilityZone resource for the
// vSphere topology API.
//
// +kubebuilder:object:root=true
// +kubebuilder:resource:path=availabilityzones,scope=Cluster,shortName=az
// +kubebuilder:storageversion
// +kubebuilder:subresource:status
type AvailabilityZone struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`

Spec AvailabilityZoneSpec `json:"spec,omitempty"`
Status AvailabilityZoneStatus `json:"status,omitempty"`
}

// AvailabilityZoneList contains a list of AvailabilityZone resources.
//
// +kubebuilder:object:root=true
type AvailabilityZoneList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`
Items []AvailabilityZone `json:"items"`
}

func init() {
SchemeBuilder.Register(&AvailabilityZone{}, &AvailabilityZoneList{})
}
26 changes: 26 additions & 0 deletions external/tanzu-topology/api/v1alpha1/groupversion_info.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// Copyright (c) 2021 VMware, Inc. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

Check failure on line 2 in external/tanzu-topology/api/v1alpha1/groupversion_info.go

View workflow job for this annotation

GitHub Actions / lint

Comment should end in a period (godot)

// Package v1alpha1 contains API Schema definitions for vSphere topology APIs.
// +kubebuilder:object:generate=true
// +groupName=topology.tanzu.vmware.com
package v1alpha1

import (
"k8s.io/apimachinery/pkg/runtime/schema"
"sigs.k8s.io/controller-runtime/pkg/scheme"
)

var (
// GroupVersion is group version used to register these objects

Check failure on line 15 in external/tanzu-topology/api/v1alpha1/groupversion_info.go

View workflow job for this annotation

GitHub Actions / lint

Comment should end in a period (godot)
GroupVersion = schema.GroupVersion{
Group: "topology.tanzu.vmware.com",
Version: "v1alpha1",
}

// SchemeBuilder is used to add go types to the GroupVersionKind scheme

Check failure on line 21 in external/tanzu-topology/api/v1alpha1/groupversion_info.go

View workflow job for this annotation

GitHub Actions / lint

Comment should end in a period (godot)
SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}

// AddToScheme adds the types in this group-version to the given scheme.
AddToScheme = SchemeBuilder.AddToScheme
)
Loading

0 comments on commit 250cdcd

Please sign in to comment.