Skip to content

Commit

Permalink
1. Validates Cloudstack template name and kubernetes version from the…
Browse files Browse the repository at this point in the history
… controller side and waits for both to match for further reconciliation.

2. Since we have seperated ETCD and CP reconciliation, it waits for ETCD to be ready before marking ControlPlaneReady to true.
3. From the CLI side during management cluster upgrade, waits for Cluster FailureMessage to be nil before waiting for other condition to met.
  • Loading branch information
panktishah26 committed Sep 26, 2023
1 parent a62abf3 commit ea93006
Show file tree
Hide file tree
Showing 23 changed files with 426 additions and 58 deletions.
4 changes: 2 additions & 2 deletions pkg/cluster/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,8 @@ func (c *Config) ChildObjects() []kubernetes.Object {
// ClusterAndChildren returns all kubernetes objects in the cluster Config.
// It's equivalent to appending the Cluster to the result of ChildObjects.
func (c *Config) ClusterAndChildren() []kubernetes.Object {
objs := c.ChildObjects()
return append(objs, c.Cluster)
objs := []kubernetes.Object{c.Cluster}
return append(objs, c.ChildObjects()...)
}

func appendIfNotNil(objs []kubernetes.Object, elems ...kubernetes.Object) []kubernetes.Object {
Expand Down
37 changes: 27 additions & 10 deletions pkg/cluster/wait.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,32 @@ import (
// or the retrier timeouts. If observedGeneration is not equal to generation,
// the condition is considered false regardless of the status value.
func WaitForCondition(ctx context.Context, client kubernetes.Reader, cluster *anywherev1.Cluster, retrier *retrier.Retrier, conditionType anywherev1.ConditionType) error {
return retrier.Retry(func() error {
if err := WaitFor(ctx, client, cluster, retrier, func(c *anywherev1.Cluster) error {
condition := conditions.Get(c, conditionType)
if condition == nil {
return fmt.Errorf("cluster doesn't yet have condition %s", conditionType)
}

if condition.Status != corev1.ConditionTrue {
return fmt.Errorf("cluster condition %s is %s: %s", conditionType, condition.Status, condition.Message)
}
return nil
}); err != nil {
return err
}

return nil
})
}

// Matcher matches the given condition.
type Matcher func(*anywherev1.Cluster) error

// WaitFor gets the cluster object from the client
// checks for generation and observedGeneration condition
// matches condition and returns error if the condition is not met.
func WaitFor(ctx context.Context, client kubernetes.Reader, cluster *anywherev1.Cluster, retrier *retrier.Retrier, matcher Matcher) error {
return retrier.Retry(func() error {
c := &anywherev1.Cluster{}

Expand All @@ -35,15 +61,6 @@ func WaitForCondition(ctx context.Context, client kubernetes.Reader, cluster *an
return fmt.Errorf("cluster generation (%d) and observedGeneration (%d) differ", generation, observedGeneration)
}

condition := conditions.Get(c, conditionType)
if condition == nil {
return fmt.Errorf("cluster doesn't yet have condition %s", conditionType)
}

if condition.Status != corev1.ConditionTrue {
return fmt.Errorf("cluster condition %s is %s: %s", conditionType, condition.Status, condition.Message)
}

return nil
return matcher(c)
})
}
110 changes: 110 additions & 0 deletions pkg/cluster/wait_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package cluster_test

import (
"context"
"fmt"
"testing"

. "github.com/onsi/gomega"
Expand Down Expand Up @@ -153,3 +154,112 @@ func TestWaitForCondition(t *testing.T) {
})
}
}

func TestWaitFor(t *testing.T) {
testCases := []struct {
name string
clusterInput, currentCluster *anywherev1.Cluster
retrier *retrier.Retrier
matcher func(_ *anywherev1.Cluster) error
wantErr string
}{
{
name: "cluster does not exist",
clusterInput: &anywherev1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: "my-c",
Namespace: "my-n",
},
},
currentCluster: &anywherev1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: "my-c",
Namespace: "default",
},
},
retrier: retrier.NewWithMaxRetries(1, 0),
matcher: func(_ *anywherev1.Cluster) error {
return nil
},
wantErr: "clusters.anywhere.eks.amazonaws.com \"my-c\" not found",
},
{
name: "cluster namespace not provided",
clusterInput: &anywherev1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: "my-c",
},
},
currentCluster: &anywherev1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: "my-c",
Namespace: "eksa-namespace",
},
},
retrier: retrier.NewWithMaxRetries(1, 0),
matcher: func(_ *anywherev1.Cluster) error {
return nil
},
wantErr: "clusters.anywhere.eks.amazonaws.com \"my-c\" not found",
},
{
name: "observed generation not updated",
clusterInput: &anywherev1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: "my-c",
Namespace: "my-n",
},
},
currentCluster: &anywherev1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: "my-c",
Namespace: "my-n",
Generation: 5,
},
Status: anywherev1.ClusterStatus{
ObservedGeneration: 4,
},
},
retrier: retrier.NewWithMaxRetries(1, 0),
matcher: func(_ *anywherev1.Cluster) error {
return nil
},
wantErr: "cluster generation (5) and observedGeneration (4) differ",
},
{
name: "matcher return error",
clusterInput: &anywherev1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: "my-c",
Namespace: "my-n",
},
},
currentCluster: &anywherev1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: "my-c",
Namespace: "my-n",
},
},
retrier: retrier.NewWithMaxRetries(1, 0),
matcher: func(_ *anywherev1.Cluster) error {
return fmt.Errorf("error")
},
wantErr: "error",
},
}

for _, tt := range testCases {
t.Run(tt.name, func(t *testing.T) {
ctx := context.Background()
g := NewWithT(t)
client := test.NewFakeKubeClient(tt.currentCluster)

gotErr := cluster.WaitFor(ctx, client, tt.clusterInput, tt.retrier, tt.matcher)
if tt.wantErr != "" {
g.Expect(gotErr).To(MatchError(tt.wantErr))
} else {
g.Expect(gotErr).NotTo(HaveOccurred())
}
})
}
}
10 changes: 10 additions & 0 deletions pkg/clustermanager/applier.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package clustermanager

import (
"context"
"fmt"
"math"
"time"

Expand Down Expand Up @@ -129,6 +130,15 @@ func (a Applier) Run(ctx context.Context, spec *cluster.Spec, managementCluster
waitStartTime := time.Now()
retry := a.retrierForWait(waitStartTime)

if err := cluster.WaitFor(ctx, client, spec.Cluster, retrier.New(5*time.Second), func(c *anywherev1.Cluster) error {
if c.Status.FailureMessage != nil && *c.Status.FailureMessage != "" {
return fmt.Errorf("cluster has an error: %s", *c.Status.FailureMessage)
}
return nil
}); err != nil {
return fmt.Errorf("cluster has a validation error that doesn't seem transient: %s", err)
}

a.log.V(3).Info("Waiting for control plane to be ready")
if err := cluster.WaitForCondition(ctx, client, spec.Cluster, retry, anywherev1.ControlPlaneReadyCondition); err != nil {
return errors.Wrapf(err, "waiting for cluster's control plane to be ready")
Expand Down
17 changes: 17 additions & 0 deletions pkg/clustermanager/applier_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@ func (a *applierTest) buildClient(objs ...kubernetes.Object) {
a.clientFactory.EXPECT().BuildClientFromKubeconfig(a.mgmtCluster.KubeconfigFile).Return(a.client, nil)
}

func (a *applierTest) updateFailureMessage(c *anywherev1.Cluster, err string) {
c.Status.FailureMessage = ptr.String(err)
a.Expect(a.client.Update(a.ctx, c)).To(Succeed())
}

func (a *applierTest) markCPReady(c *anywherev1.Cluster) {
conditions.MarkTrue(c, anywherev1.ControlPlaneReadyCondition)
a.Expect(a.client.Update(a.ctx, c)).To(Succeed())
Expand Down Expand Up @@ -173,6 +178,18 @@ func TestApplierRunErrorApplying(t *testing.T) {
tt.Expect(a.Run(tt.ctx, tt.spec, tt.mgmtCluster)).To(MatchError(ContainSubstring("applying cluster spec")))
}

func TestApplierRunFailureMessage(t *testing.T) {
tt := newApplierTest(t)
tt.buildClient(tt.spec.ClusterAndChildren()...)
tt.updateFailureMessage(tt.spec.Cluster, "error")
tt.startFakeController()
a := clustermanager.NewApplier(tt.log, tt.clientFactory,
clustermanager.WithApplierRetryBackOff(time.Millisecond),
)

tt.Expect(a.Run(tt.ctx, tt.spec, tt.mgmtCluster)).To(MatchError(ContainSubstring("cluster has a validation error that doesn't seem transient")))
}

func TestApplierRunControlPlaneNotReady(t *testing.T) {
tt := newApplierTest(t)
tt.buildClient()
Expand Down
24 changes: 22 additions & 2 deletions pkg/controller/clusters/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package clusters
import (
"context"

etcdv1 "github.com/aws/etcdadm-controller/api/v1beta1"
"github.com/pkg/errors"
v1 "k8s.io/api/core/v1"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
Expand All @@ -22,8 +23,21 @@ func UpdateClusterStatusForControlPlane(ctx context.Context, client client.Clien
return errors.Wrapf(err, "getting kubeadmcontrolplane")
}

var etcdadmCluster *etcdv1.EtcdadmCluster
if cluster.Spec.ExternalEtcdConfiguration != nil {
capiCluster, err := controller.GetCAPICluster(ctx, client, cluster)
if err != nil {
return errors.Wrap(err, "getting capi cluster")
}

etcdadmCluster, err = getEtcdadmCluster(ctx, client, capiCluster)
if err != nil {
return errors.Wrap(err, "reading etcdadm cluster")
}
}

updateControlPlaneInitializedCondition(cluster, kcp)
updateControlPlaneReadyCondition(cluster, kcp)
updateControlPlaneReadyCondition(cluster, kcp, etcdadmCluster)

return nil
}
Expand Down Expand Up @@ -70,7 +84,13 @@ func UpdateClusterStatusForCNI(ctx context.Context, cluster *anywherev1.Cluster)

// updateControlPlaneReadyCondition updates the ControlPlaneReady condition, after checking the state of the control plane
// in the cluster.
func updateControlPlaneReadyCondition(cluster *anywherev1.Cluster, kcp *controlplanev1.KubeadmControlPlane) {
func updateControlPlaneReadyCondition(cluster *anywherev1.Cluster, kcp *controlplanev1.KubeadmControlPlane, etcdadmCluster *etcdv1.EtcdadmCluster) {
// Make sure etcd cluster is ready before marking ControlPlaneReady status to true
if cluster.Spec.ExternalEtcdConfiguration != nil && !etcdadmClusterReady(etcdadmCluster) {
conditions.MarkFalse(cluster, anywherev1.ControlPlaneReadyCondition, anywherev1.RollingUpgradeInProgress, clusterv1.ConditionSeverityInfo, "Etcd is not ready")
return
}

initializedCondition := conditions.Get(cluster, anywherev1.ControlPlaneInitializedCondition)
if initializedCondition.Status != "True" {
conditions.MarkFalse(cluster, anywherev1.ControlPlaneReadyCondition, initializedCondition.Reason, initializedCondition.Severity, initializedCondition.Message)
Expand Down
Loading

0 comments on commit ea93006

Please sign in to comment.