Skip to content

Commit

Permalink
Merge pull request rancher#44972 from susesgartner/snap-retention2.8
Browse files Browse the repository at this point in the history
[backport ]Snap retention2.8
  • Loading branch information
markusewalker authored Mar 28, 2024
2 parents aad75d9 + 9e5525e commit 2a64213
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 3 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ require (
github.com/containers/image/v5 v5.25.0
github.com/google/gnostic-models v0.6.8
github.com/rancher/rancher/pkg/apis v0.0.0-20240213233515-935d309ebad4
github.com/rancher/shepherd v0.0.0-20240325171529-2334ef617633
github.com/rancher/shepherd v0.0.0-20240326230250-981fedb03a38
go.qase.io/client v0.0.0-20231114201952-65195ec001fa
)

Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1641,8 +1641,8 @@ github.com/rancher/remotedialer v0.3.0 h1:y1EO8JCsgZo0RcqTUp6U8FXcBAv27R+TLnWRcp
github.com/rancher/remotedialer v0.3.0/go.mod h1:BwwztuvViX2JrLLUwDlsYt5DiyUwHLlzynRwkZLAY0Q=
github.com/rancher/rke v1.5.7 h1:pCVziDwgulQc2WgRkisY6sEo3DFGgu1StE66UbkuF2c=
github.com/rancher/rke v1.5.7/go.mod h1:vojhOf8U8VCmw7y17OENWXSIfEFPEbXCMQcmI7xN7i8=
github.com/rancher/shepherd v0.0.0-20240325171529-2334ef617633 h1:FU6wzPdSMc5xmEaQ+w6zpYQGvy3Yl1NlpLj7tBYCH+s=
github.com/rancher/shepherd v0.0.0-20240325171529-2334ef617633/go.mod h1:LNI7nH1BptYMvJmuqsLgmkMytGBBTpW4jk4vAHCxfF4=
github.com/rancher/shepherd v0.0.0-20240326230250-981fedb03a38 h1:Ela4BCmaYlXhMaFpVv+arkBKlU9PhzDaYMSWBcADWjo=
github.com/rancher/shepherd v0.0.0-20240326230250-981fedb03a38/go.mod h1:LNI7nH1BptYMvJmuqsLgmkMytGBBTpW4jk4vAHCxfF4=
github.com/rancher/steve v0.0.0-20240305150728-3943409601f1 h1:6wNYy3q9jget45syTN6K2uOLSYaptLYCHscY2WRmhDI=
github.com/rancher/steve v0.0.0-20240305150728-3943409601f1/go.mod h1:o4vLBzMTKbHHhIiAcbgOiaN3aK1vIjL6ZTgaGxQYpsY=
github.com/rancher/system-upgrade-controller/pkg/apis v0.0.0-20210727200656-10b094e30007 h1:ru+mqGnxMmKeU0Q3XIDxkARvInDIqT1hH2amTcsjxI4=
Expand Down
34 changes: 34 additions & 0 deletions tests/v2/validation/snapshot/snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package snapshot
import (
"strings"
"testing"
"time"

rkev1 "github.com/rancher/rancher/pkg/apis/rke.cattle.io/v1"
v1 "github.com/rancher/rancher/pkg/generated/norman/apps/v1"
Expand All @@ -12,6 +13,7 @@ import (
"github.com/rancher/shepherd/extensions/clusters"
"github.com/rancher/shepherd/extensions/clusters/kubernetesversions"
extdefault "github.com/rancher/shepherd/extensions/defaults"
"github.com/rancher/shepherd/extensions/defaults/stevetypes"
"github.com/rancher/shepherd/extensions/etcdsnapshot"
"github.com/rancher/shepherd/extensions/ingresses"
nodestat "github.com/rancher/shepherd/extensions/nodes"
Expand Down Expand Up @@ -413,3 +415,35 @@ func createPostBackupWorkloads(t *testing.T, client *rancher.Client, clusterID s
require.NoError(t, err)
require.Equal(t, WorkloadNamePostBackup, postDeploymentResp.ObjectMeta.Name)
}

// This function waits for retentionlimit+1 automatic snapshots to be taken before verifying that the retention limit is respected
func createSnapshotsUntilRetentionLimit(t *testing.T, client *rancher.Client, clusterName string, retentionLimit int, timeBetweenSnapshots int) {
v1ClusterID, err := clusters.GetV1ProvisioningClusterByName(client, clusterName)
if v1ClusterID == "" {
v3ClusterID, err := clusters.GetClusterIDByName(client, clusterName)
require.NoError(t, err)
v1ClusterID = "fleet-default/" + v3ClusterID
}
require.NoError(t, err)

fleetCluster, err := client.Steve.SteveType(stevetypes.FleetCluster).ByID(v1ClusterID)
require.NoError(t, err)

provider := fleetCluster.ObjectMeta.Labels["provider.cattle.io"]
if provider == "rke" {
sleepNum := (retentionLimit + 1) * timeBetweenSnapshots
logrus.Infof("Waiting %v hours for %v automatic snapshots to be taken", sleepNum, (retentionLimit + 1))
time.Sleep(time.Duration(sleepNum)*time.Hour + time.Minute*5)

err := etcdsnapshot.RKE1RetentionLimitCheck(client, clusterName)
require.NoError(t, err)

} else {
sleepNum := (retentionLimit + 1) * timeBetweenSnapshots
logrus.Infof("Waiting %v minutes for %v automatic snapshots to be taken", sleepNum, (retentionLimit + 1))
time.Sleep(time.Duration(sleepNum)*time.Minute + time.Minute*5)

err := etcdsnapshot.RKE2K3SRetentionLimitCheck(client, clusterName)
require.NoError(t, err)
}
}
118 changes: 118 additions & 0 deletions tests/v2/validation/snapshot/snapshot_retention_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
//go:build (validation || extended || infra.any || cluster.any) && !sanity && !stress

package snapshot

import (
"fmt"
"testing"

"github.com/rancher/shepherd/clients/rancher"
"github.com/rancher/shepherd/extensions/clusters"
"github.com/rancher/shepherd/extensions/defaults/stevetypes"
"github.com/rancher/shepherd/pkg/config"
"github.com/rancher/shepherd/pkg/session"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
)

// For SnapshotInterval this will be hours for rke1 and minutes for rke2
type SnapshotRetentionConfig struct {
ClusterName string `json:"clusterName" yaml:"clusterName"`
SnapshotInterval int `json:"snapshotInterval" yaml:"snapshotInterval"`
SnapshotRetention int `json:"snapshotRetention" yaml:"snapshotRetention"`
}

type SnapshotRetentionTestSuite struct {
suite.Suite
session *session.Session
client *rancher.Client
snapshotConfig *SnapshotRetentionConfig
provider string
}

func (s *SnapshotRetentionTestSuite) TearDownSuite() {
s.session.Cleanup()
}

func (s *SnapshotRetentionTestSuite) SetupSuite() {
testSession := session.NewSession()
s.session = testSession

s.snapshotConfig = new(SnapshotRetentionConfig)
config.LoadConfig("retentionTest", s.snapshotConfig)

client, err := rancher.NewClient("", testSession)
require.NoError(s.T(), err)

s.client = client

v1ClusterID, err := clusters.GetV1ProvisioningClusterByName(client, s.snapshotConfig.ClusterName)
var v3ClusterID string
if v1ClusterID == "" {
v3ClusterID, err = clusters.GetClusterIDByName(client, s.snapshotConfig.ClusterName)
require.NoError(s.T(), err)
v1ClusterID = "fleet-default/" + v3ClusterID
}

require.NoError(s.T(), err)
fleetCluster, err := s.client.Steve.SteveType(stevetypes.FleetCluster).ByID(v1ClusterID)
require.NoError(s.T(), err)

s.provider = fleetCluster.ObjectMeta.Labels["provider.cattle.io"]

if s.provider == "rke" {
clusterObject, err := s.client.Management.Cluster.ByID(v3ClusterID)
require.NoError(s.T(), err)

updatedClusterObject := clusterObject
updatedClusterObject.RancherKubernetesEngineConfig.Services.Etcd.BackupConfig.Retention = int64(s.snapshotConfig.SnapshotRetention)
updatedClusterObject.RancherKubernetesEngineConfig.Services.Etcd.BackupConfig.IntervalHours = int64(s.snapshotConfig.SnapshotInterval)

_, err = s.client.Management.Cluster.Update(clusterObject, updatedClusterObject)
require.NoError(s.T(), err)
} else {
if s.snapshotConfig.SnapshotInterval < 5 {
logrus.Info("Snapshot cron schedules below 2 minutes can cause unexpected behaviors in rancher")
}

clusterObject, clusterResponse, err := clusters.GetProvisioningClusterByName(s.client, s.snapshotConfig.ClusterName, "fleet-default")
require.NoError(s.T(), err)

clusterObject.Spec.RKEConfig.ETCD.SnapshotRetention = s.snapshotConfig.SnapshotRetention
cronSchedule := fmt.Sprintf("%s%v%s", "*/", s.snapshotConfig.SnapshotInterval, " * * * *")
clusterObject.Spec.RKEConfig.ETCD.SnapshotScheduleCron = cronSchedule
_, err = s.client.Steve.SteveType(stevetypes.Provisioning).Update(clusterResponse, clusterObject)
require.NoError(s.T(), err)
}
}

func (s *SnapshotRetentionTestSuite) TestAutomaticSnapshotRetention() {
tests := []struct {
testName string
client *rancher.Client
clusterName string
retentionLimit int
intervalBetweenSnapshots int
}{
{"Retention limit test", s.client, s.snapshotConfig.ClusterName, 2, 1},
}

for _, tt := range tests {
s.Run(tt.testName, func() {
config := s.snapshotConfig
createSnapshotsUntilRetentionLimit(s.T(), s.client, config.ClusterName, config.SnapshotRetention, config.SnapshotInterval)
})
}
}

func (s *SnapshotRetentionTestSuite) TestAutomaticSnapshotRetentionDynamic() {
config := s.snapshotConfig
createSnapshotsUntilRetentionLimit(s.T(), s.client, config.ClusterName, config.SnapshotRetention, config.SnapshotInterval)
}

// In order for 'go test' to run this suite, we need to create
// a normal test function and pass our suite to suite.Run
func TestSnapshotRetentionTestSuite(t *testing.T) {
suite.Run(t, new(SnapshotRetentionTestSuite))
}

0 comments on commit 2a64213

Please sign in to comment.