Skip to content

Commit

Permalink
test: automate scale test execution
Browse files Browse the repository at this point in the history
Signed-off-by: Alex Castilio dos Santos <[email protected]>
  • Loading branch information
alexcastilio committed Jan 20, 2025
1 parent b3cd0ec commit 4f112ab
Show file tree
Hide file tree
Showing 8 changed files with 209 additions and 44 deletions.
23 changes: 23 additions & 0 deletions .github/workflows/daily-scale-test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: Daily Scale Test

on:
push:
branches:
- alexcastilio/scale-test-workflow
# schedule:
# - cron: "0 0 * * *"

permissions:
contents: read
id-token: write

jobs:
call-scale-test:
uses: ./.github/workflows/scale-test.yaml
with:
num_deployments: 10
num_replicas: 10
# TODO: Fix value
num_netpol: 0
cleanup: false
secrets: inherit
22 changes: 10 additions & 12 deletions .github/workflows/scale-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,25 +36,17 @@ on:

workflow_call:
inputs:
resource_group:
description: "Azure Resource Group"
required: true
type: string
cluster_name:
description: "AKS Cluster Name"
required: true
type: string
num_deployments:
description: "Number of Traffic Deployments"
default: 1000
default: 100
type: number
num_replicas:
description: "Number of Traffic Replicas per Deployment"
default: 40
default: 10
type: number
num_netpol:
description: "Number of Network Policies"
default: 1000
default: 100
type: number
cleanup:
description: "Clean up environment after test"
Expand Down Expand Up @@ -100,8 +92,14 @@ jobs:
IMAGE_NAMESPACE: ${{ github.repository }}
TAG: ${{ inputs.image_tag }}
AZURE_APP_INSIGHTS_KEY: ${{ secrets.AZURE_APP_INSIGHTS_KEY }}
# TODO: FIX VALUE
NODES_PER_POOL: ""
# TODO: DELETE
WORKFLOW_NAME: ${{ github.event_name }}
WORKFLOW_EVENT: ${{ github.event }}
WORKFLOW_CALL: ${{ github.event.workflow == '.github/workflows/daily-scale-test.yaml' }}
shell: bash
run: |
set -euo pipefail
[[ $TAG == "" ]] && TAG=$(make version)
go test -v ./test/e2e/. -timeout 300m -tags=scale -count=1 -args -create-infra=false -delete-infra=false
go test -v ./test/e2e/. -timeout 300m -tags=scale -count=1 -args -create-infra=$(echo $WORKFLOW_CALL) -delete-infra=false
55 changes: 53 additions & 2 deletions test/e2e/common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ package common

import (
"flag"
"os"
"os/user"
"strconv"
"testing"
"time"

"github.com/microsoft/retina/test/e2e/framework/params"
"github.com/stretchr/testify/require"
)

Expand All @@ -30,10 +30,61 @@ var (
Architectures = []string{"amd64", "arm64"}
CreateInfra = flag.Bool("create-infra", true, "create a Resource group, vNET and AKS cluster for testing")
DeleteInfra = flag.Bool("delete-infra", true, "delete a Resource group, vNET and AKS cluster for testing")
ScaleTestInfra = ScaleTestInfraHandler{
location: params.Location,
subscriptionID: params.SubscriptionID,
resourceGroup: params.ResourceGroup,
clusterName: params.ClusterName,
nodesPerPool: params.NodesPerPool,
}
)

type ScaleTestInfraHandler struct {
location string
subscriptionID string
resourceGroup string
clusterName string
nodesPerPool string
}

func (s ScaleTestInfraHandler) GetSubscriptionID(t *testing.T) string {
require.NotEmpty(t, s.subscriptionID)
return s.subscriptionID
}

func (s ScaleTestInfraHandler) GetLocation(t *testing.T) string {

Check failure on line 55 in test/e2e/common/common.go

View workflow job for this annotation

GitHub Actions / Lint (linux, amd64)

unused-parameter: parameter 't' seems to be unused, consider removing or renaming it as _ (revive)

Check failure on line 55 in test/e2e/common/common.go

View workflow job for this annotation

GitHub Actions / Lint (linux, arm64)

unused-parameter: parameter 't' seems to be unused, consider removing or renaming it as _ (revive)

Check failure on line 55 in test/e2e/common/common.go

View workflow job for this annotation

GitHub Actions / Lint (windows, amd64)

unused-parameter: parameter 't' seems to be unused, consider removing or renaming it as _ (revive)

Check failure on line 55 in test/e2e/common/common.go

View workflow job for this annotation

GitHub Actions / Lint (windows, arm64)

unused-parameter: parameter 't' seems to be unused, consider removing or renaming it as _ (revive)
if s.location == "" {
return "westus2"
}
return s.location
}

func (s ScaleTestInfraHandler) GetResourceGroup(t *testing.T) string {
if s.resourceGroup != "" {
return s.resourceGroup
}
// Use the cluster name as the resource group name by default.
return s.GetClusterName(t)
}

func (s ScaleTestInfraHandler) GetNodesPerPool(t *testing.T) int32 {
if s.nodesPerPool == "" {
return 5
}
nodesPerPool, err := strconv.Atoi(s.nodesPerPool)
require.NoError(t, err, "NODES_PER_POOL must be an integer")
return int32(nodesPerPool)

Check failure on line 76 in test/e2e/common/common.go

View workflow job for this annotation

GitHub Actions / Lint (linux, amd64)

G109: Potential Integer overflow made by strconv.Atoi result conversion to int16/32 (gosec)

Check failure on line 76 in test/e2e/common/common.go

View workflow job for this annotation

GitHub Actions / Lint (linux, arm64)

G109: Potential Integer overflow made by strconv.Atoi result conversion to int16/32 (gosec)

Check failure on line 76 in test/e2e/common/common.go

View workflow job for this annotation

GitHub Actions / Lint (windows, amd64)

G109: Potential Integer overflow made by strconv.Atoi result conversion to int16/32 (gosec)

Check failure on line 76 in test/e2e/common/common.go

View workflow job for this annotation

GitHub Actions / Lint (windows, arm64)

G109: Potential Integer overflow made by strconv.Atoi result conversion to int16/32 (gosec)

Check failure

Code scanning / CodeQL

Incorrect conversion between integer types High test

Incorrect conversion of an integer with architecture-dependent bit size from
strconv.Atoi
to a lower bit size type int32 without an upper bound check.
}

func (s ScaleTestInfraHandler) GetClusterName(t *testing.T) string {

Check failure on line 79 in test/e2e/common/common.go

View workflow job for this annotation

GitHub Actions / Lint (linux, amd64)

unused-parameter: parameter 't' seems to be unused, consider removing or renaming it as _ (revive)

Check failure on line 79 in test/e2e/common/common.go

View workflow job for this annotation

GitHub Actions / Lint (linux, arm64)

unused-parameter: parameter 't' seems to be unused, consider removing or renaming it as _ (revive)

Check failure on line 79 in test/e2e/common/common.go

View workflow job for this annotation

GitHub Actions / Lint (windows, amd64)

unused-parameter: parameter 't' seems to be unused, consider removing or renaming it as _ (revive)

Check failure on line 79 in test/e2e/common/common.go

View workflow job for this annotation

GitHub Actions / Lint (windows, arm64)

unused-parameter: parameter 't' seems to be unused, consider removing or renaming it as _ (revive)
if s.clusterName != "" {
return s.clusterName
}
return "retina-scale-test"
}

func ClusterNameForE2ETest(t *testing.T) string {
clusterName := os.Getenv("CLUSTER_NAME")
clusterName := params.ClusterName
if clusterName == "" {
curuser, err := user.Current()
require.NoError(t, err)
Expand Down
6 changes: 3 additions & 3 deletions test/e2e/framework/azure/create-cluster-with-npm.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ const (
clusterCreateTicker = 30 * time.Second
pollFrequency = 5 * time.Second
AgentARMSKU = "Standard_D4pls_v5"
AuxilaryNodeCount = 1
)

type CreateNPMCluster struct {
Expand All @@ -35,6 +34,7 @@ type CreateNPMCluster struct {
PodCidr string
DNSServiceIP string
ServiceCidr string
NodesPerPool int32
}

func (c *CreateNPMCluster) Prevalidate() error {
Expand All @@ -55,7 +55,7 @@ func (c *CreateNPMCluster) Run() error {
npmCluster.Properties.AgentPoolProfiles = append(npmCluster.Properties.AgentPoolProfiles, &armcontainerservice.ManagedClusterAgentPoolProfile{ //nolint:all
Type: to.Ptr(armcontainerservice.AgentPoolTypeVirtualMachineScaleSets),
// AvailabilityZones: []*string{to.Ptr("1")},
Count: to.Ptr[int32](AuxilaryNodeCount),
Count: to.Ptr[int32](c.NodesPerPool),
EnableNodePublicIP: to.Ptr(false),
Mode: to.Ptr(armcontainerservice.AgentPoolModeUser),
OSType: to.Ptr(armcontainerservice.OSTypeWindows),
Expand Down Expand Up @@ -86,7 +86,7 @@ func (c *CreateNPMCluster) Run() error {
npmCluster.Properties.AgentPoolProfiles = append(npmCluster.Properties.AgentPoolProfiles, &armcontainerservice.ManagedClusterAgentPoolProfile{ //nolint:all
Type: to.Ptr(armcontainerservice.AgentPoolTypeVirtualMachineScaleSets),
// AvailabilityZones: []*string{to.Ptr("1")},
Count: to.Ptr[int32](AuxilaryNodeCount),
Count: to.Ptr[int32](c.NodesPerPool),
EnableNodePublicIP: to.Ptr(false),
Mode: to.Ptr(armcontainerservice.AgentPoolModeUser),
OSType: to.Ptr(armcontainerservice.OSTypeLinux),
Expand Down
17 changes: 17 additions & 0 deletions test/e2e/framework/params/params.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package params

import (
"os"
)

var (
Location = os.Getenv("LOCATION")
SubscriptionID = os.Getenv("AZURE_SUBSCRIPTION_ID")
ResourceGroup = os.Getenv("AZURE_RESOURCE_GROUP")
ClusterName = os.Getenv("CLUSTER_NAME")
NodesPerPool = os.Getenv("NODES_PER_POOL")
NumDeployments = os.Getenv("NUM_DEPLOYMENTS")
NumReplicas = os.Getenv("NUM_REPLICAS")
NumNetworkPolicies = os.Getenv("NUM_NET_POL")
CleanUp = os.Getenv("CLEANUP")
)
1 change: 1 addition & 0 deletions test/e2e/jobs/jobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ func CreateTestInfra(subID, rg, clusterName, location, kubeConfigFilePath string
PodCidr: "10.128.0.0/9",
DNSServiceIP: "192.168.0.10",
ServiceCidr: "192.168.0.0/28",
NodesPerPool: 1,
}, nil)

job.AddStep(&azure.GetAKSKubeConfig{
Expand Down
88 changes: 88 additions & 0 deletions test/e2e/jobs/scale.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,49 @@ import (
"time"

"github.com/microsoft/retina/test/e2e/common"
"github.com/microsoft/retina/test/e2e/framework/azure"
"github.com/microsoft/retina/test/e2e/framework/generic"
"github.com/microsoft/retina/test/e2e/framework/kubernetes"
"github.com/microsoft/retina/test/e2e/framework/scaletest"
"github.com/microsoft/retina/test/e2e/framework/types"
)

func DefaultScaleTestOptions() scaletest.Options {
// var NumRealDeployments int
// var NumReplicas int
// var err error
//
// if env.NumDeployments != "" {
// NumRealDeployments, err = strconv.Atoi(env.NumDeployments)
// require.NoError(t, err, "Failed to convert NUM_DEPLOYMENTS to int")
// } else {
// NumRealDeployments = 1000
// }
//
// if env.NumReplicas != "" {
// NumReplicas, err = strconv.Atoi(env.NumReplicas)
// require.NoError(t, err, "Failed to convert NUM_REPLICAS to int")
// } else {
// NumReplicas = "40"
// NumNetworkPolicies := env.NumNetworkPolicies
// CleanUp := env.CleanUp
//
// if NumDeployments != "" {
// } else {
// NumRealDeployments = 1000
// }
// if NumReplicas != "" {
// opt.NumRealReplicas, err = strconv.Atoi(NumReplicas)
// require.NoError(t, err)
// }
// if NumNetworkPolicies != "" {
// opt.NumNetworkPolicies, err = strconv.Atoi(NumNetworkPolicies)
// require.NoError(t, err)
// }
// if CleanUp != "" {
// opt.DeleteLabels, err = strconv.ParseBool(CleanUp)
// require.NoError(t, err)
// }
return scaletest.Options{
Namespace: "scale-test",
MaxKwokPodsPerNode: 0,
Expand Down Expand Up @@ -45,6 +82,57 @@ func DefaultScaleTestOptions() scaletest.Options {
}
}

func GetScaleTestInfra(subID, rg, clusterName, location, kubeConfigFilePath string, nodesPerPool int32, createInfra bool) *types.Job {
job := types.NewJob("Get scale test infrastructure")

if createInfra {
job.AddStep(&azure.CreateResourceGroup{
SubscriptionID: subID,
ResourceGroupName: rg,
Location: location,
}, nil)

job.AddStep(&azure.CreateVNet{
VnetName: "testvnet",
VnetAddressSpace: "10.0.0.0/9",
}, nil)

job.AddStep(&azure.CreateSubnet{
SubnetName: "testsubnet",
SubnetAddressSpace: "10.0.0.0/12",
}, nil)

job.AddStep(&azure.CreateNPMCluster{
ClusterName: clusterName,
PodCidr: "10.128.0.0/9",
DNSServiceIP: "192.168.0.10",
ServiceCidr: "192.168.0.0/28",
NodesPerPool: nodesPerPool,
}, nil)

job.AddStep(&azure.GetAKSKubeConfig{
KubeConfigFilePath: kubeConfigFilePath,
}, nil)

} else {
job.AddStep(&azure.GetAKSKubeConfig{
KubeConfigFilePath: kubeConfigFilePath,
ClusterName: clusterName,
SubscriptionID: subID,
ResourceGroupName: rg,
Location: location,
}, nil)
}

job.AddStep(&generic.LoadFlags{
TagEnv: generic.DefaultTagEnv,
ImageNamespaceEnv: generic.DefaultImageNamespace,
ImageRegistryEnv: generic.DefaultImageRegistry,
}, nil)

return job
}

func ScaleTest(opt *scaletest.Options) *types.Job {
job := types.NewJob("Scale Test")

Expand Down
41 changes: 14 additions & 27 deletions test/e2e/scale_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
package retina

import (
"crypto/rand"
"math/big"
"os"
"path/filepath"
"strconv"
Expand All @@ -14,6 +12,7 @@ import (
"github.com/microsoft/retina/test/e2e/framework/azure"
"github.com/microsoft/retina/test/e2e/framework/generic"
"github.com/microsoft/retina/test/e2e/framework/helpers"
"github.com/microsoft/retina/test/e2e/framework/params"
"github.com/microsoft/retina/test/e2e/framework/types"
jobs "github.com/microsoft/retina/test/e2e/jobs"
"github.com/stretchr/testify/require"
Expand All @@ -23,25 +22,11 @@ func TestE2ERetina_Scale(t *testing.T) {
ctx, cancel := helpers.Context(t)
defer cancel()

clusterName := common.ClusterNameForE2ETest(t)

subID := os.Getenv("AZURE_SUBSCRIPTION_ID")
require.NotEmpty(t, subID)

location := os.Getenv("AZURE_LOCATION")
if location == "" {
nBig, err := rand.Int(rand.Reader, big.NewInt(int64(len(common.AzureLocations))))
if err != nil {
t.Fatal("Failed to generate a secure random index", err)
}
location = common.AzureLocations[nBig.Int64()]
}

rg := os.Getenv("AZURE_RESOURCE_GROUP")
if rg == "" {
// Use the cluster name as the resource group name by default.
rg = clusterName
}
clusterName := common.ScaleTestInfra.GetClusterName(t)
subID := common.ScaleTestInfra.GetSubscriptionID(t)
location := common.ScaleTestInfra.GetLocation(t)
rg := common.ScaleTestInfra.GetResourceGroup(t)
nodesPerPool := common.ScaleTestInfra.GetNodesPerPool(t)

cwd, err := os.Getwd()
require.NoError(t, err)
Expand All @@ -56,10 +41,10 @@ func TestE2ERetina_Scale(t *testing.T) {
opt := jobs.DefaultScaleTestOptions()
opt.KubeconfigPath = kubeConfigFilePath

NumDeployments := os.Getenv("NUM_DEPLOYMENTS")
NumReplicas := os.Getenv("NUM_REPLICAS")
NumNetworkPolicies := os.Getenv("NUM_NETPOLS")
CleanUp := os.Getenv("CLEANUP")
NumDeployments := params.NumDeployments
NumReplicas := params.NumReplicas
NumNetworkPolicies := params.NumNetworkPolicies
CleanUp := params.CleanUp

if NumDeployments != "" {
opt.NumRealDeployments, err = strconv.Atoi(NumDeployments)
Expand Down Expand Up @@ -89,9 +74,11 @@ func TestE2ERetina_Scale(t *testing.T) {

opt.LabelsToGetMetrics = map[string]string{"k8s-app": "retina"}

createInfra := *common.CreateInfra

// CreateTestInfra
createTestInfra := types.NewRunner(t, jobs.CreateTestInfra(subID, rg, clusterName, location, kubeConfigFilePath, *common.CreateInfra))
createTestInfra.Run(ctx)
infra := types.NewRunner(t, jobs.GetScaleTestInfra(subID, rg, clusterName, location, kubeConfigFilePath, nodesPerPool, createInfra))
infra.Run(ctx)

t.Cleanup(func() {
if *common.DeleteInfra {
Expand Down

0 comments on commit 4f112ab

Please sign in to comment.