Skip to content

Commit

Permalink
Refactor disabled component test (#1464)
Browse files Browse the repository at this point in the history
The tests to validate the correctness of the operator when components
are disabled have been refactored to:

1. take into account all the component's deployments, not only the first
   one to avoid swallowing misbehavior
2. remove useless waiting time before checking the presence of
   component's deployments because within the introduction of the
   internal APIs, the object hierarchy now allow for a proper cleanup as
   all the object have a proper owner reference. The operator deletes
   the component API using foreground deletion policy hence once the top
   level component CR is removed, the all the owned objects should have
   been deleted too.
  • Loading branch information
lburgazzoli authored Dec 20, 2024
1 parent b36c13a commit 9b75bac
Show file tree
Hide file tree
Showing 8 changed files with 174 additions and 196 deletions.
50 changes: 22 additions & 28 deletions tests/e2e/codeflare_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,28 +259,23 @@ func (tc *CodeFlareTestCtx) testUpdateOnCodeFlareResources() error {
}

func (tc *CodeFlareTestCtx) testUpdateCodeFlareComponentDisabled() error {
// Test Updating CodeFlare to be disabled
var codeflareDeploymentName string
if tc.testCtx.testDsc.Spec.Components.CodeFlare.ManagementState != operatorv1.Managed {
return errors.New("the CodeFlare spec should be in 'enabled: true' state in order to perform test")
}

if tc.testCtx.testDsc.Spec.Components.CodeFlare.ManagementState == operatorv1.Managed {
appDeployments, err := tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).List(tc.testCtx.ctx, metav1.ListOptions{
LabelSelector: labels.PlatformPartOf + "=" + strings.ToLower(gvk.CodeFlare.Kind),
})
if err != nil {
return fmt.Errorf("error getting enabled component %v", componentApi.CodeFlareComponentName)
}
if len(appDeployments.Items) > 0 {
codeflareDeploymentName = appDeployments.Items[0].Name
if appDeployments.Items[0].Status.ReadyReplicas == 0 {
return fmt.Errorf("error getting enabled component: %s its deployment 'ReadyReplicas'", codeflareDeploymentName)
}
deployments, err := tc.testCtx.getComponentDeployments(gvk.CodeFlare)
if err != nil {
return fmt.Errorf("error getting enabled component %s", componentApi.CodeFlareComponentName)
}

for _, d := range deployments {
if d.Status.ReadyReplicas == 0 {
return fmt.Errorf("component %s deployment %sis not ready", d.Name, componentApi.CodeFlareComponentName)
}
} else {
return errors.New("CodeFlare spec should be in 'enabled: true' state in order to perform test")
}

// Disable component CodeFlare
err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
err = retry.RetryOnConflict(retry.DefaultRetry, func() error {
// refresh DSC instance in case it was updated during the reconcile
err := tc.testCtx.customClient.Get(tc.testCtx.ctx, types.NamespacedName{Name: tc.testCtx.testDsc.Name}, tc.testCtx.testDsc)
if err != nil {
Expand Down Expand Up @@ -312,17 +307,16 @@ func (tc *CodeFlareTestCtx) testUpdateCodeFlareComponentDisabled() error {
return fmt.Errorf("component CodeFlare is disabled, should not get the CodeFlare CR %v", tc.testCodeFlareInstance.Name)
}

// Sleep for 20 seconds to allow the operator to reconcile
time.Sleep(2 * generalRetryInterval)
_, err = tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).Get(tc.testCtx.ctx, codeflareDeploymentName, metav1.GetOptions{})
deployments, err = tc.testCtx.getComponentDeployments(gvk.CodeFlare)
if err != nil {
if k8serr.IsNotFound(err) {
return nil // correct result: should not find deployment after we disable it already
}
return fmt.Errorf("error getting component resource after reconcile: %w", err)
return fmt.Errorf("error listing deployments: %w", err)
}
return fmt.Errorf("component %v is disabled, should not get its deployment %v from NS %v any more",
componentApi.CodeFlareKind,
codeflareDeploymentName,
tc.testCtx.applicationsNamespace)

if len(deployments) != 0 {
return fmt.Errorf("component %v is disabled, should not have deployments in NS %v any more",
gvk.CodeFlare.Kind,
tc.testCtx.applicationsNamespace)
}

return nil
}
49 changes: 21 additions & 28 deletions tests/e2e/datasciencepipelines_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -298,29 +298,23 @@ func (tc *DataSciencePipelinesTestCtx) testUpdateOnDataSciencePipelinesResources
}

func (tc *DataSciencePipelinesTestCtx) testUpdateDataSciencePipelinesComponentDisabled() error {
// Test Updating DSP to be disabled
var dspDeploymentName string
if tc.testCtx.testDsc.Spec.Components.DataSciencePipelines.ManagementState != operatorv1.Managed {
return errors.New("the DataSciencePipelines spec should be in 'enabled: true' state in order to perform test")
}

if tc.testCtx.testDsc.Spec.Components.DataSciencePipelines.ManagementState == operatorv1.Managed {
appDeployments, err := tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).List(tc.testCtx.ctx, metav1.ListOptions{
LabelSelector: labels.PlatformPartOf + "=" + strings.ToLower(gvk.DataSciencePipelines.Kind),
})
if err != nil {
return fmt.Errorf("error getting enabled component %v", componentApi.DataSciencePipelinesComponentName)
}
deployments, err := tc.testCtx.getComponentDeployments(gvk.DataSciencePipelines)
if err != nil {
return fmt.Errorf("error getting enabled component %s", componentApi.DataSciencePipelinesComponentName)
}

if len(appDeployments.Items) > 0 {
dspDeploymentName = appDeployments.Items[0].Name
if appDeployments.Items[0].Status.ReadyReplicas == 0 {
return fmt.Errorf("error getting enabled component: %s its deployment 'ReadyReplicas'", dspDeploymentName)
}
for _, d := range deployments {
if d.Status.ReadyReplicas == 0 {
return fmt.Errorf("component %s deployment %sis not ready", d.Name, componentApi.DataSciencePipelinesComponentName)
}
} else {
return errors.New("datasciencepipelines spec should be in 'enabled: true' state in order to perform test")
}

// Disable component DataSciencePipelines
err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
err = retry.RetryOnConflict(retry.DefaultRetry, func() error {
// refresh DSC instance in case it was updated during the reconcile
err := tc.testCtx.customClient.Get(tc.testCtx.ctx, types.NamespacedName{Name: tc.testCtx.testDsc.Name}, tc.testCtx.testDsc)
if err != nil {
Expand Down Expand Up @@ -354,17 +348,16 @@ func (tc *DataSciencePipelinesTestCtx) testUpdateDataSciencePipelinesComponentDi
return fmt.Errorf("component datasciencepipelines is disabled, should not get the DataSciencePipelines CR %v", tc.testDataSciencePipelinesInstance.Name)
}

// Sleep for 20 seconds to allow the operator to reconcile
time.Sleep(2 * generalRetryInterval)
_, err = tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).Get(tc.testCtx.ctx, dspDeploymentName, metav1.GetOptions{})
deployments, err = tc.testCtx.getComponentDeployments(gvk.DataSciencePipelines)
if err != nil {
if k8serr.IsNotFound(err) {
return nil // correct result: should not find deployment after we disable it already
}
return fmt.Errorf("error getting component resource after reconcile: %w", err)
return fmt.Errorf("error listing deployments: %w", err)
}
return fmt.Errorf("component %v is disabled, should not get its deployment %v from NS %v any more",
componentApi.DataSciencePipelinesKind,
dspDeploymentName,
tc.testCtx.applicationsNamespace)

if len(deployments) != 0 {
return fmt.Errorf("component %v is disabled, should not have deployments in NS %v any more",
gvk.DataSciencePipelines.Kind,
tc.testCtx.applicationsNamespace)
}

return nil
}
21 changes: 21 additions & 0 deletions tests/e2e/helper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
infrav1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/infrastructure/v1"
serviceApi "github.com/opendatahub-io/opendatahub-operator/v2/apis/services/v1alpha1"
"github.com/opendatahub-io/opendatahub-operator/v2/controllers/components/modelregistry"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/metadata/labels"
)

const (
Expand Down Expand Up @@ -75,6 +76,26 @@ func (tc *testContext) waitForOperatorDeployment(name string, replicas int32) er
return err
}

func (tc *testContext) getComponentDeployments(componentGVK schema.GroupVersionKind) ([]appsv1.Deployment, error) {
deployments := appsv1.DeploymentList{}
err := tc.customClient.List(
tc.ctx,
&deployments,
client.InNamespace(
tc.applicationsNamespace,
),
client.MatchingLabels{
labels.PlatformPartOf: strings.ToLower(componentGVK.Kind),
},
)

if err != nil {
return nil, err
}

return deployments.Items, nil
}

func setupDSCICR(name string) *dsciv1.DSCInitialization {
dsciTest := &dsciv1.DSCInitialization{
ObjectMeta: metav1.ObjectMeta{
Expand Down
50 changes: 22 additions & 28 deletions tests/e2e/kueue_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,28 +259,23 @@ func (tc *KueueTestCtx) testUpdateOnKueueResources() error {
}

func (tc *KueueTestCtx) testUpdateKueueComponentDisabled() error {
// Test Updating Kueue to be disabled
var kueueDeploymentName string
if tc.testCtx.testDsc.Spec.Components.Kueue.ManagementState != operatorv1.Managed {
return errors.New("the Kueue spec should be in 'enabled: true' state in order to perform test")
}

if tc.testCtx.testDsc.Spec.Components.Kueue.ManagementState == operatorv1.Managed {
appDeployments, err := tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).List(tc.testCtx.ctx, metav1.ListOptions{
LabelSelector: labels.PlatformPartOf + "=" + strings.ToLower(gvk.Kueue.Kind),
})
if err != nil {
return fmt.Errorf("error getting enabled component %v", componentApi.KueueComponentName)
}
if len(appDeployments.Items) > 0 {
kueueDeploymentName = appDeployments.Items[0].Name
if appDeployments.Items[0].Status.ReadyReplicas == 0 {
return fmt.Errorf("error getting enabled component: %s its deployment 'ReadyReplicas'", kueueDeploymentName)
}
deployments, err := tc.testCtx.getComponentDeployments(gvk.Kueue)
if err != nil {
return fmt.Errorf("error getting enabled component %s", componentApi.KueueComponentName)
}

for _, d := range deployments {
if d.Status.ReadyReplicas == 0 {
return fmt.Errorf("component %s deployment %sis not ready", d.Name, componentApi.KueueComponentName)
}
} else {
return errors.New("kueue spec should be in 'enabled: true' state in order to perform test")
}

// Disable component Kueue
err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
err = retry.RetryOnConflict(retry.DefaultRetry, func() error {
// refresh DSC instance in case it was updated during the reconcile
err := tc.testCtx.customClient.Get(tc.testCtx.ctx, types.NamespacedName{Name: tc.testCtx.testDsc.Name}, tc.testCtx.testDsc)
if err != nil {
Expand Down Expand Up @@ -312,17 +307,16 @@ func (tc *KueueTestCtx) testUpdateKueueComponentDisabled() error {
return fmt.Errorf("component kueue is disabled, should not get the Kueue CR %v", tc.testKueueInstance.Name)
}

// Sleep for 20 seconds to allow the operator to reconcile
time.Sleep(2 * generalRetryInterval)
_, err = tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).Get(tc.testCtx.ctx, kueueDeploymentName, metav1.GetOptions{})
deployments, err = tc.testCtx.getComponentDeployments(gvk.Kueue)
if err != nil {
if k8serr.IsNotFound(err) {
return nil // correct result: should not find deployment after we disable it already
}
return fmt.Errorf("error getting component resource after reconcile: %w", err)
return fmt.Errorf("error listing deployments: %w", err)
}
return fmt.Errorf("component %v is disabled, should not get its deployment %v from NS %v any more",
componentApi.KueueKind,
kueueDeploymentName,
tc.testCtx.applicationsNamespace)

if len(deployments) != 0 {
return fmt.Errorf("component %v is disabled, should not have deployments in NS %v any more",
gvk.Kueue.Kind,
tc.testCtx.applicationsNamespace)
}

return nil
}
50 changes: 22 additions & 28 deletions tests/e2e/ray_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,28 +259,23 @@ func (tc *RayTestCtx) testUpdateOnRayResources() error {
}

func (tc *RayTestCtx) testUpdateRayComponentDisabled() error {
// Test Updating Ray to be disabled
var rayDeploymentName string
if tc.testCtx.testDsc.Spec.Components.Ray.ManagementState != operatorv1.Managed {
return errors.New("the Ray spec should be in 'enabled: true' state in order to perform test")
}

if tc.testCtx.testDsc.Spec.Components.Ray.ManagementState == operatorv1.Managed {
appDeployments, err := tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).List(tc.testCtx.ctx, metav1.ListOptions{
LabelSelector: labels.PlatformPartOf + "=" + strings.ToLower(gvk.Ray.Kind),
})
if err != nil {
return fmt.Errorf("error getting enabled component %v", componentApi.RayComponentName)
}
if len(appDeployments.Items) > 0 {
rayDeploymentName = appDeployments.Items[0].Name
if appDeployments.Items[0].Status.ReadyReplicas == 0 {
return fmt.Errorf("error getting enabled component: %s its deployment 'ReadyReplicas'", rayDeploymentName)
}
deployments, err := tc.testCtx.getComponentDeployments(gvk.Ray)
if err != nil {
return fmt.Errorf("error getting enabled component %s", componentApi.RayComponentName)
}

for _, d := range deployments {
if d.Status.ReadyReplicas == 0 {
return fmt.Errorf("component %s deployment %sis not ready", d.Name, componentApi.RayComponentName)
}
} else {
return errors.New("ray spec should be in 'enabled: true' state in order to perform test")
}

// Disable component Ray
err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
err = retry.RetryOnConflict(retry.DefaultRetry, func() error {
// refresh DSC instance in case it was updated during the reconcile
err := tc.testCtx.customClient.Get(tc.testCtx.ctx, types.NamespacedName{Name: tc.testCtx.testDsc.Name}, tc.testCtx.testDsc)
if err != nil {
Expand Down Expand Up @@ -312,17 +307,16 @@ func (tc *RayTestCtx) testUpdateRayComponentDisabled() error {
return fmt.Errorf("component ray is disabled, should not get the Ray CR %v", tc.testRayInstance.Name)
}

// Sleep for 20 seconds to allow the operator to reconcile
time.Sleep(2 * generalRetryInterval)
_, err = tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).Get(tc.testCtx.ctx, rayDeploymentName, metav1.GetOptions{})
deployments, err = tc.testCtx.getComponentDeployments(gvk.Ray)
if err != nil {
if k8serr.IsNotFound(err) {
return nil // correct result: should not find deployment after we disable it already
}
return fmt.Errorf("error getting component resource after reconcile: %w", err)
return fmt.Errorf("error listing deployments: %w", err)
}
return fmt.Errorf("component %v is disabled, should not get its deployment %v from NS %v any more",
componentApi.RayKind,
rayDeploymentName,
tc.testCtx.applicationsNamespace)

if len(deployments) != 0 {
return fmt.Errorf("component %v is disabled, should not have deployments in NS %v any more",
gvk.Ray.Kind,
tc.testCtx.applicationsNamespace)
}

return nil
}
50 changes: 22 additions & 28 deletions tests/e2e/trainingoperator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,28 +259,23 @@ func (tc *TrainingOperatorTestCtx) testUpdateOnTrainingOperatorResources() error
}

func (tc *TrainingOperatorTestCtx) testUpdateTrainingOperatorComponentDisabled() error {
// Test Updating TrainingOperator to be disabled
var trainingoperatorDeploymentName string
if tc.testCtx.testDsc.Spec.Components.TrainingOperator.ManagementState != operatorv1.Managed {
return errors.New("the TrainingOperator spec should be in 'enabled: true' state in order to perform test")
}

if tc.testCtx.testDsc.Spec.Components.TrainingOperator.ManagementState == operatorv1.Managed {
appDeployments, err := tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).List(tc.testCtx.ctx, metav1.ListOptions{
LabelSelector: labels.PlatformPartOf + "=" + strings.ToLower(gvk.TrainingOperator.Kind),
})
if err != nil {
return fmt.Errorf("error getting enabled component %v", componentApi.TrainingOperatorComponentName)
}
if len(appDeployments.Items) > 0 {
trainingoperatorDeploymentName = appDeployments.Items[0].Name
if appDeployments.Items[0].Status.ReadyReplicas == 0 {
return fmt.Errorf("error getting enabled component: %s its deployment 'ReadyReplicas'", trainingoperatorDeploymentName)
}
deployments, err := tc.testCtx.getComponentDeployments(gvk.TrainingOperator)
if err != nil {
return fmt.Errorf("error getting enabled component %s", componentApi.TrainingOperatorComponentName)
}

for _, d := range deployments {
if d.Status.ReadyReplicas == 0 {
return fmt.Errorf("component %s deployment %sis not ready", d.Name, componentApi.TrainingOperatorComponentName)
}
} else {
return errors.New("TrainingOperator spec should be in 'enabled: true' state in order to perform test")
}

// Disable component TrainingOperator
err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
err = retry.RetryOnConflict(retry.DefaultRetry, func() error {
// refresh DSC instance in case it was updated during the reconcile
err := tc.testCtx.customClient.Get(tc.testCtx.ctx, types.NamespacedName{Name: tc.testCtx.testDsc.Name}, tc.testCtx.testDsc)
if err != nil {
Expand Down Expand Up @@ -312,17 +307,16 @@ func (tc *TrainingOperatorTestCtx) testUpdateTrainingOperatorComponentDisabled()
return fmt.Errorf("component TrainingOperator is disabled, should not get the TrainingOperator CR %v", tc.testTrainingOperatorInstance.Name)
}

// Sleep for 20 seconds to allow the operator to reconcile
time.Sleep(2 * generalRetryInterval)
_, err = tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).Get(tc.testCtx.ctx, trainingoperatorDeploymentName, metav1.GetOptions{})
deployments, err = tc.testCtx.getComponentDeployments(gvk.TrainingOperator)
if err != nil {
if k8serr.IsNotFound(err) {
return nil // correct result: should not find deployment after we disable it already
}
return fmt.Errorf("error getting component resource after reconcile: %w", err)
return fmt.Errorf("error listing deployments: %w", err)
}
return fmt.Errorf("component %v is disabled, should not get its deployment %v from NS %v any more",
componentApi.TrainingOperatorKind,
trainingoperatorDeploymentName,
tc.testCtx.applicationsNamespace)

if len(deployments) != 0 {
return fmt.Errorf("component %v is disabled, should not have deployments in NS %v any more",
gvk.TrainingOperator.Kind,
tc.testCtx.applicationsNamespace)
}

return nil
}
Loading

0 comments on commit 9b75bac

Please sign in to comment.