Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(test): configuration changes and fixes needed to scale-test #1085

Merged
merged 4 commits into from
Jan 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/scale-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -96,11 +96,12 @@ jobs:
NUM_REPLICAS: ${{ inputs.num_replicas }}
NUM_NETPOLS: ${{ inputs.num_netpol }}
CLEANUP: ${{ inputs.cleanup }}
IMAGE_REGISTRY: ${{ inputs.image_namespace == '' && vars.ACR_NAME || inputs.image_namespace }}
IMAGE_REGISTRY: ${{ vars.ACR_NAME }}
IMAGE_NAMESPACE: ${{ github.repository }}
TAG: ${{ inputs.image_tag }}
AZURE_APP_INSIGHTS_KEY: ${{ secrets.AZURE_APP_INSIGHTS_KEY }}
shell: bash
run: |
set -euo pipefail
go test -v ./test/e2e/. -timeout 300m -tags=scale -count=1 -args -image-tag=$( [[ $TAG == "" ]] && make version || echo $TAG ) -create-infra=false -delete-infra=false
[[ $TAG == "" ]] && TAG=$(make version)
go test -v ./test/e2e/. -timeout 300m -tags=scale -count=1 -args -create-infra=false -delete-infra=false
1 change: 1 addition & 0 deletions test/e2e/common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ const (
KubeSystemNamespace = "kube-system"
TestPodNamespace = "kube-system-test"
AzureAppInsightsKeyEnv = "AZURE_APP_INSIGHTS_KEY"
OutputFilePathEnv = "OUTPUT_FILEPATH"
)

var (
Expand Down
29 changes: 10 additions & 19 deletions test/e2e/framework/kubernetes/check-pod-status.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@ import (
)

const (
RetryTimeoutPodsReady = 5 * time.Minute
RetryIntervalPodsReady = 5 * time.Second
RetryTimeoutPodsReady = 5 * time.Minute
RetryIntervalPodsReady = 5 * time.Second
timeoutWaitForPodsSeconds = 1200

printInterval = 5 // print to stdout every 5 iterations
)
Expand Down Expand Up @@ -48,7 +49,7 @@ func (w *WaitPodsReady) Run() error {
return fmt.Errorf("error creating Kubernetes client: %w", err)
}

ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutSeconds*time.Second)
ctx, cancel := context.WithTimeout(context.Background(), timeoutWaitForPodsSeconds*time.Second)
defer cancel()

return WaitForPodReady(ctx, clientset, w.Namespace, w.LabelSelector)
Expand All @@ -60,7 +61,6 @@ func (w *WaitPodsReady) Stop() error {
}

func WaitForPodReady(ctx context.Context, clientset *kubernetes.Clientset, namespace, labelSelector string) error {
podReadyMap := make(map[string]bool)

printIterator := 0
conditionFunc := wait.ConditionWithContextFunc(func(context.Context) (bool, error) {
Expand All @@ -78,34 +78,25 @@ func WaitForPodReady(ctx context.Context, clientset *kubernetes.Clientset, names
return false, nil
}

// check each indviidual pod to see if it's in Running state
// check each individual pod to see if it's in Running state
for i := range podList.Items {
var pod *corev1.Pod
pod, err = clientset.CoreV1().Pods(namespace).Get(ctx, podList.Items[i].Name, metav1.GetOptions{})
if err != nil {
return false, fmt.Errorf("error getting Pod: %w", err)
}

// Check the Pod phase
if pod.Status.Phase != corev1.PodRunning {
if podList.Items[i].Status.Phase != corev1.PodRunning {
if printIterator%printInterval == 0 {
log.Printf("pod \"%s\" is not in Running state yet. Waiting...\n", pod.Name)
log.Printf("pod \"%s\" is not in Running state yet. Waiting...\n", podList.Items[i].Name)
}
return false, nil
}

// Check all container status.
for _, containerStatus := range pod.Status.ContainerStatuses {
if !containerStatus.Ready {
log.Printf("container \"%s\" in pod \"%s\" is not ready yet. Waiting...\n", containerStatus.Name, pod.Name)
for j := range podList.Items[i].Status.ContainerStatuses {
if !podList.Items[i].Status.ContainerStatuses[j].Ready {
log.Printf("container \"%s\" in pod \"%s\" is not ready yet. Waiting...\n", podList.Items[i].Status.ContainerStatuses[j].Name, podList.Items[i].Name)
return false, nil
}
}

if !podReadyMap[pod.Name] {
log.Printf("pod \"%s\" is in Running state\n", pod.Name)
podReadyMap[pod.Name] = true
}
}
log.Printf("all pods in namespace \"%s\" with label \"%s\" are in Running state\n", namespace, labelSelector)
return true, nil
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ func (c *CreateKapingerDeployment) GetKapingerDeployment() *appsv1.Deployment {
"memory": resource.MustParse("20Mi"),
},
Limits: v1.ResourceList{
"memory": resource.MustParse("20Mi"),
"memory": resource.MustParse("100Mi"),
},
},
Ports: []v1.ContainerPort{
Expand Down
4 changes: 2 additions & 2 deletions test/e2e/framework/kubernetes/delete-namespace.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ func (d *DeleteNamespace) Run() error {
return fmt.Errorf("error creating Kubernetes client: %w", err)
}

ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutSeconds*time.Second)
ctx, cancel := context.WithTimeout(context.Background(), 1200*time.Second)
defer cancel()

err = clientset.CoreV1().Namespaces().Delete(ctx, d.Namespace, metaV1.DeleteOptions{})
Expand All @@ -41,7 +41,7 @@ func (d *DeleteNamespace) Run() error {
}

backoff := wait.Backoff{
Steps: 6,
Steps: 9,
Duration: 10 * time.Second,
Factor: 2.0,
// Jitter: 0.1,
Expand Down
1 change: 1 addition & 0 deletions test/e2e/framework/kubernetes/install-retina-helm.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ func (i *InstallHelmChart) Run() error {
chart.Values["image"].(map[string]interface{})["repository"] = imageRegistry + "/" + imageNamespace + "/retina-agent"
chart.Values["image"].(map[string]interface{})["initRepository"] = imageRegistry + "/" + imageNamespace + "/retina-init"
chart.Values["operator"].(map[string]interface{})["repository"] = imageRegistry + "/" + imageNamespace + "/retina-operator"
chart.Values["operator"].(map[string]interface{})["enabled"] = true

getclient := action.NewGet(actionConfig)
release, err := getclient.Run(i.ReleaseName)
Expand Down
61 changes: 43 additions & 18 deletions test/e2e/framework/scaletest/add-shared-labels.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"encoding/json"
"fmt"
"log"
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -50,32 +51,21 @@ func (a *AddSharedLabelsToAllPods) Run() error {
return fmt.Errorf("error creating Kubernetes client: %w", err)
}

ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutSeconds*time.Second)
ctx, cancel := contextToLabelAllPods()
defer cancel()

resources, err := clientset.CoreV1().Pods(a.Namespace).List(ctx, metav1.ListOptions{})

patch := []patchStringValue{}

for i := 0; i < a.NumSharedLabelsPerPod; i++ {
patch = append(patch, patchStringValue{
Op: "add",
Path: "/metadata/labels/shared-lab-" + fmt.Sprintf("%05d", i),
Value: "val",
})
}

patchBytes, err := json.Marshal(patch)
patchBytes, err := getSharedLabelsPatch(a.NumSharedLabelsPerPod)
if err != nil {
return fmt.Errorf("error marshalling patch: %w", err)
return fmt.Errorf("error getting label patch: %w", err)
}

for _, resource := range resources.Items {
clientset.CoreV1().Pods(a.Namespace).Patch(ctx, resource.Name,
types.JSONPatchType,
patchBytes,
metav1.PatchOptions{},
)
err = patchLabel(ctx, clientset, a.Namespace, resource.Name, patchBytes)
if err != nil {
log.Printf("Error adding shared labels to pod %s: %s\n", resource.Name, err)
}
}

return nil
Expand All @@ -85,3 +75,38 @@ func (a *AddSharedLabelsToAllPods) Run() error {
func (a *AddSharedLabelsToAllPods) Stop() error {
return nil
}

func patchLabel(ctx context.Context, clientset *kubernetes.Clientset, namespace, podName string, patchBytes []byte) error {
log.Println("Labeling Pod", podName)
_, err := clientset.CoreV1().Pods(namespace).Patch(ctx, podName,
types.JSONPatchType,
patchBytes,
metav1.PatchOptions{},
)
if err != nil {
return fmt.Errorf("failed to patch pod: %w", err)
}

return nil
}

func getSharedLabelsPatch(numLabels int) ([]byte, error) {
patch := []patchStringValue{}
for i := 0; i < numLabels; i++ {
patch = append(patch, patchStringValue{
Op: "add",
Path: "/metadata/labels/shared-lab-" + fmt.Sprintf("%05d", i),
Value: "val",
})
}
b, err := json.Marshal(patch)
if err != nil {
return nil, fmt.Errorf("error marshalling patch: %w", err)
}

return b, nil
}

func contextToLabelAllPods() (context.Context, context.CancelFunc) {
return context.WithTimeout(context.Background(), 120*time.Minute)
}
17 changes: 6 additions & 11 deletions test/e2e/framework/scaletest/add-unique-labels.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
package scaletest

import (
"context"
"encoding/json"
"fmt"
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/clientcmd"
)
Expand Down Expand Up @@ -44,7 +41,7 @@ func (a *AddUniqueLabelsToAllPods) Run() error {
return fmt.Errorf("error creating Kubernetes client: %w", err)
}

ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutSeconds*time.Second)
ctx, cancel := contextToLabelAllPods()
defer cancel()

resources, err := clientset.CoreV1().Pods(a.Namespace).List(ctx, metav1.ListOptions{})
Expand All @@ -53,7 +50,6 @@ func (a *AddUniqueLabelsToAllPods) Run() error {

for _, resource := range resources.Items {
patch := []patchStringValue{}

for i := 0; i < a.NumUniqueLabelsPerPod; i++ {
patch = append(patch, patchStringValue{
Op: "add",
Expand All @@ -65,14 +61,13 @@ func (a *AddUniqueLabelsToAllPods) Run() error {

patchBytes, err := json.Marshal(patch)
if err != nil {
return fmt.Errorf("error marshalling patch: %w", err)
return fmt.Errorf("failed to marshal patch: %w", err)
}

clientset.CoreV1().Pods(a.Namespace).Patch(ctx, resource.Name,
types.JSONPatchType,
patchBytes,
metav1.PatchOptions{},
)
err = patchLabel(ctx, clientset, a.Namespace, resource.Name, patchBytes)
if err != nil {
return fmt.Errorf("error adding unique label to pod: %w", err)
}
}

return nil
Expand Down
33 changes: 22 additions & 11 deletions test/e2e/framework/scaletest/create-resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"time"

e2ekubernetes "github.com/microsoft/retina/test/e2e/framework/kubernetes"
"github.com/microsoft/retina/test/retry"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/clientcmd"
Expand Down Expand Up @@ -48,11 +49,18 @@ func (c *CreateResources) Run() error {
return fmt.Errorf("error creating Kubernetes client: %w", err)
}

ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutSeconds*time.Second)
ctx, cancel := context.WithTimeout(context.Background(), 1200*time.Second)
defer cancel()

retrier := retry.Retrier{Attempts: defaultRetryAttempts, Delay: defaultRetryDelay}

for _, resource := range resources {
e2ekubernetes.CreateResource(ctx, resource, clientset)
err := retrier.Do(ctx, func() error {
return e2ekubernetes.CreateResource(ctx, resource, clientset)
})
if err != nil {
return fmt.Errorf("error creating resource: %w", err)
}
}

return nil
Expand All @@ -71,12 +79,6 @@ func (c *CreateResources) getResources() []runtime.Object {
// kwokDeployments := c.generateDeployments(c.NumKwokDeployments, c.NumKwokReplicas, "kwok")
// objs = append(objs, kwokDeployments...)

realDeployments := c.generateDeployments()
objs = append(objs, realDeployments...)

services := c.generateServices("real")
objs = append(objs, services...)

kapinger := e2ekubernetes.CreateKapingerDeployment{
KapingerNamespace: c.Namespace,
KubeConfigFilePath: c.KubeConfigFilePath,
Expand All @@ -88,6 +90,13 @@ func (c *CreateResources) getResources() []runtime.Object {
kapingerSA := kapinger.GetKapingerServiceAccount()

objs = append(objs, kapingerClusterRole, kapingerClusterRoleBinding, kapingerSA)

realDeployments := c.generateDeployments()
objs = append(objs, realDeployments...)

services := c.generateServices()
objs = append(objs, services...)

// c.generateKwokNodes()
log.Println("Finished generating YAMLs")
return objs
Expand Down Expand Up @@ -118,6 +127,8 @@ func (c *CreateResources) generateDeployments() []runtime.Object {
labelPrefix := fmt.Sprintf("%s-dep-lab", name)

deployment.Name = name
deployment.Labels["name"] = name
deployment.Spec.Template.Labels["name"] = name

r := int32(c.NumRealReplicas)
deployment.Spec.Replicas = &r
Expand All @@ -135,7 +146,7 @@ func (c *CreateResources) generateDeployments() []runtime.Object {
return objs
}

func (c *CreateResources) generateServices(svcKind string) []runtime.Object {
func (c *CreateResources) generateServices() []runtime.Object {
objs := []runtime.Object{}

kapingerSvc := e2ekubernetes.CreateKapingerDeployment{
Expand All @@ -146,10 +157,10 @@ func (c *CreateResources) generateServices(svcKind string) []runtime.Object {
for i := 0; i < c.NumRealServices; i++ {
template := kapingerSvc.GetKapingerService()

name := fmt.Sprintf("%s-svc-%05d", svcKind, i)
name := fmt.Sprintf("%s-svc-%05d", c.RealPodType, i)
template.Name = name

template.Spec.Selector["name"] = fmt.Sprintf("%s-%s-dep-%05d", svcKind, c.RealPodType, i)
template.Spec.Selector["name"] = fmt.Sprintf("%s-dep-%05d", c.RealPodType, i)

objs = append(objs, template)
}
Expand Down
4 changes: 3 additions & 1 deletion test/e2e/framework/scaletest/delete-and-re-add-labels.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ func (d *DeleteAndReAddLabels) Run() error {
return fmt.Errorf("error creating Kubernetes client: %w", err)
}

ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutSeconds*time.Second)
ctx, cancel := contextToLabelAllPods()
defer cancel()

labelsToDelete := `"shared-lab-00000": null, "shared-lab-00001": null, "shared-lab-00002": null`
Expand Down Expand Up @@ -91,6 +91,7 @@ func (d *DeleteAndReAddLabels) Run() error {
func (d *DeleteAndReAddLabels) addLabels(ctx context.Context, clientset *kubernetes.Clientset, pods *corev1.PodList, patch string) error {

for _, pod := range pods.Items {
log.Println("Labeling Pod", pod.Name)
_, err := clientset.CoreV1().Pods(d.Namespace).Patch(ctx, pod.Name, types.StrategicMergePatchType, []byte(patch), metav1.PatchOptions{})
if err != nil {
return fmt.Errorf("error patching pod: %w", err)
Expand All @@ -103,6 +104,7 @@ func (d *DeleteAndReAddLabels) addLabels(ctx context.Context, clientset *kuberne
func (d *DeleteAndReAddLabels) deleteLabels(ctx context.Context, clientset *kubernetes.Clientset, pods *corev1.PodList, patch string) error {

for _, pod := range pods.Items {
log.Println("Deleting label from Pod", pod.Name)
_, err := clientset.CoreV1().Pods(d.Namespace).Patch(ctx, pod.Name, types.StrategicMergePatchType, []byte(patch), metav1.PatchOptions{})
if err != nil {
return fmt.Errorf("error patching pod: %w", err)
Expand Down
Loading
Loading