From ac9a4e7e0cf78906dea7f1e35acf24f7015cb176 Mon Sep 17 00:00:00 2001 From: David Symons Date: Thu, 1 Jul 2021 17:41:44 +1000 Subject: [PATCH] Adds ability to configure frequency for pod termination via annotation --- chaoskube/chaoskube.go | 80 +++++++++++++----- chaoskube/chaoskube_test.go | 150 +++++++++++++++++++++++++--------- chart/chaoskube/values.yaml | 3 + main.go | 120 ++++++++++++++------------- notifier/slack_test.go | 6 +- terminator/delete_pod_test.go | 6 +- util/util.go | 141 +++++++++++++++++++++++--------- util/util_test.go | 37 ++++++++- 8 files changed, 378 insertions(+), 165 deletions(-) diff --git a/chaoskube/chaoskube.go b/chaoskube/chaoskube.go index 1bb7a7ba..3da75cf1 100644 --- a/chaoskube/chaoskube.go +++ b/chaoskube/chaoskube.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "math/rand" "regexp" "time" @@ -32,6 +33,8 @@ import ( type Chaoskube struct { // a kubernetes client object Client kubernetes.Interface + // the interval Chaoskube is configured to run at + Interval time.Duration // a label selector which restricts the pods to choose from Labels labels.Selector // an annotation selector which restricts the pods to choose from @@ -56,6 +59,8 @@ type Chaoskube struct { Timezone *time.Location // minimum age of pods to consider MinimumAge time.Duration + // an annotation containing the frequency to kill a pod at + FrequencyAnnotation string // an instance of logrus.StdLogger to write log messages to Logger log.FieldLogger // a terminator that terminates victim pods @@ -68,7 +73,7 @@ type Chaoskube struct { EventRecorder record.EventRecorder // a function to retrieve the current time Now func() time.Time - + // the maximum number of pods to terminate per interval MaxKill int // chaos events notifier Notifier notifier.Notifier @@ -95,32 +100,34 @@ var ( // * a logger implementing logrus.FieldLogger to send log output to // * what specific terminator to use to imbue chaos on victim pods // * whether to enable/disable dry-run mode -func New(client kubernetes.Interface, labels, annotations, kinds, namespaces, namespaceLabels labels.Selector, includedPodNames, excludedPodNames *regexp.Regexp, excludedWeekdays []time.Weekday, excludedTimesOfDay []util.TimePeriod, excludedDaysOfYear []time.Time, timezone *time.Location, minimumAge time.Duration, logger log.FieldLogger, dryRun bool, terminator terminator.Terminator, maxKill int, notifier notifier.Notifier) *Chaoskube { +func New(client kubernetes.Interface, interval time.Duration, labels, annotations, kinds, namespaces, namespaceLabels labels.Selector, includedPodNames, excludedPodNames *regexp.Regexp, excludedWeekdays []time.Weekday, excludedTimesOfDay []util.TimePeriod, excludedDaysOfYear []time.Time, timezone *time.Location, minimumAge time.Duration, frequencyAnnotation string, logger log.FieldLogger, dryRun bool, terminator terminator.Terminator, maxKill int, notifier notifier.Notifier) *Chaoskube { broadcaster := record.NewBroadcaster() broadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: client.CoreV1().Events(v1.NamespaceAll)}) recorder := broadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "chaoskube"}) return &Chaoskube{ - Client: client, - Labels: labels, - Annotations: annotations, - Kinds: kinds, - Namespaces: namespaces, - NamespaceLabels: namespaceLabels, - IncludedPodNames: includedPodNames, - ExcludedPodNames: excludedPodNames, - ExcludedWeekdays: excludedWeekdays, - ExcludedTimesOfDay: excludedTimesOfDay, - ExcludedDaysOfYear: excludedDaysOfYear, - Timezone: timezone, - MinimumAge: minimumAge, - Logger: logger, - DryRun: dryRun, - Terminator: terminator, - EventRecorder: recorder, - Now: time.Now, - MaxKill: maxKill, - Notifier: notifier, + Client: client, + Interval: interval, + Labels: labels, + Annotations: annotations, + Kinds: kinds, + Namespaces: namespaces, + NamespaceLabels: namespaceLabels, + IncludedPodNames: includedPodNames, + ExcludedPodNames: excludedPodNames, + ExcludedWeekdays: excludedWeekdays, + ExcludedTimesOfDay: excludedTimesOfDay, + ExcludedDaysOfYear: excludedDaysOfYear, + Timezone: timezone, + MinimumAge: minimumAge, + FrequencyAnnotation: frequencyAnnotation, + Logger: logger, + DryRun: dryRun, + Terminator: terminator, + EventRecorder: recorder, + Now: time.Now, + MaxKill: maxKill, + Notifier: notifier, } } @@ -238,6 +245,8 @@ func (c *Chaoskube) Candidates(ctx context.Context) ([]v1.Pod, error) { pods = filterByPodName(pods, c.IncludedPodNames, c.ExcludedPodNames) pods = filterByOwnerReference(pods) + pods = filterByFrequency(pods, c.FrequencyAnnotation, c.Interval, c.Logger) + return pods, nil } @@ -533,3 +542,30 @@ func filterByOwnerReference(pods []v1.Pod) []v1.Pod { return filteredList } + +func filterByFrequency(pods []v1.Pod, annotation string, interval time.Duration, logger log.FieldLogger) []v1.Pod { + if annotation == "" { + return pods + } + + filteredList := []v1.Pod{} + for _, pod := range pods { + text, ok := pod.Annotations[annotation] + + // Don't filter out pods missing frequency annotation + if !ok { + filteredList = append(filteredList, pod) + } + + chance, err := util.ParseFrequency(text, interval) + if err != nil { + logger.WithField("err", err).Warn("failed to parse frequency annotation") + } + + if chance > rand.Float64() { + filteredList = append(filteredList, pod) + } + } + + return filteredList +} diff --git a/chaoskube/chaoskube_test.go b/chaoskube/chaoskube_test.go index 0175965f..1d5650b2 100644 --- a/chaoskube/chaoskube_test.go +++ b/chaoskube/chaoskube_test.go @@ -48,26 +48,29 @@ func (suite *Suite) SetupTest() { // TestNew tests that arguments are passed to the new instance correctly func (suite *Suite) TestNew() { var ( - client = fake.NewSimpleClientset() - labelSelector, _ = labels.Parse("foo=bar") - annotations, _ = labels.Parse("baz=waldo") - kinds, _ = labels.Parse("job") - namespaces, _ = labels.Parse("qux") - namespaceLabels, _ = labels.Parse("taz=wubble") - includedPodNames = regexp.MustCompile("foo") - excludedPodNames = regexp.MustCompile("bar") - excludedWeekdays = []time.Weekday{time.Friday} - excludedTimesOfDay = []util.TimePeriod{util.TimePeriod{}} - excludedDaysOfYear = []time.Time{time.Now()} - minimumAge = time.Duration(42) - dryRun = true - terminator = terminator.NewDeletePodTerminator(client, logger, 10*time.Second) - maxKill = 1 - notifier = testNotifier + client = fake.NewSimpleClientset() + interval = 10 * time.Minute + labelSelector, _ = labels.Parse("foo=bar") + annotations, _ = labels.Parse("baz=waldo") + kinds, _ = labels.Parse("job") + namespaces, _ = labels.Parse("qux") + namespaceLabels, _ = labels.Parse("taz=wubble") + includedPodNames = regexp.MustCompile("foo") + excludedPodNames = regexp.MustCompile("bar") + excludedWeekdays = []time.Weekday{time.Friday} + excludedTimesOfDay = []util.TimePeriod{util.TimePeriod{}} + excludedDaysOfYear = []time.Time{time.Now()} + minimumAge = time.Duration(42) + frequencyAnnotation = "chaos.alpha.kubernetes.io/frequency" + dryRun = true + terminator = terminator.NewDeletePodTerminator(client, logger, 10*time.Second) + maxKill = 1 + notifier = testNotifier ) chaoskube := New( client, + interval, labelSelector, annotations, kinds, @@ -80,6 +83,7 @@ func (suite *Suite) TestNew() { excludedDaysOfYear, time.UTC, minimumAge, + frequencyAnnotation, logger, dryRun, terminator, @@ -89,6 +93,7 @@ func (suite *Suite) TestNew() { suite.Require().NotNil(chaoskube) suite.Equal(client, chaoskube.Client) + suite.Equal(10*time.Minute, chaoskube.Interval) suite.Equal("foo=bar", chaoskube.Labels.String()) suite.Equal("baz=waldo", chaoskube.Annotations.String()) suite.Equal("job", chaoskube.Kinds.String()) @@ -101,6 +106,7 @@ func (suite *Suite) TestNew() { suite.Equal(excludedDaysOfYear, chaoskube.ExcludedDaysOfYear) suite.Equal(time.UTC, chaoskube.Timezone) suite.Equal(minimumAge, chaoskube.MinimumAge) + suite.Equal("chaos.alpha.kubernetes.io/frequency", chaoskube.FrequencyAnnotation) suite.Equal(logger, chaoskube.Logger) suite.Equal(dryRun, chaoskube.DryRun) suite.Equal(terminator, chaoskube.Terminator) @@ -109,6 +115,7 @@ func (suite *Suite) TestNew() { // TestRunContextCanceled tests that a canceled context will exit the Run function. func (suite *Suite) TestRunContextCanceled() { chaoskube := suite.setup( + 10*time.Minute, labels.Everything(), labels.Everything(), labels.Everything(), @@ -121,6 +128,7 @@ func (suite *Suite) TestRunContextCanceled() { []time.Time{}, time.UTC, time.Duration(0), + "", false, 10, 1, @@ -165,6 +173,7 @@ func (suite *Suite) TestCandidates() { suite.Require().NoError(err) chaoskube := suite.setupWithPods( + 10*time.Minute, labelSelector, annotationSelector, labels.Everything(), @@ -177,6 +186,7 @@ func (suite *Suite) TestCandidates() { []time.Time{}, time.UTC, time.Duration(0), + "", false, 10, ) @@ -210,6 +220,7 @@ func (suite *Suite) TestCandidatesNamespaceLabels() { suite.Require().NoError(err) chaoskube := suite.setupWithPods( + 10*time.Minute, labels.Everything(), labels.Everything(), labels.Everything(), @@ -222,6 +233,7 @@ func (suite *Suite) TestCandidatesNamespaceLabels() { []time.Time{}, time.UTC, time.Duration(0), + "", false, 10, ) @@ -253,6 +265,7 @@ func (suite *Suite) TestCandidatesPodNameRegexp() { {regexp.MustCompile("fo.*"), regexp.MustCompile("f.*"), []map[string]string{}}, } { chaoskube := suite.setupWithPods( + 10*time.Minute, labels.Everything(), labels.Everything(), labels.Everything(), @@ -265,6 +278,7 @@ func (suite *Suite) TestCandidatesPodNameRegexp() { []time.Time{}, time.UTC, time.Duration(0), + "", false, 10, ) @@ -293,6 +307,7 @@ func (suite *Suite) TestVictim() { suite.Require().NoError(err) chaoskube := suite.setupWithPods( + 10*time.Minute, labelSelector, labels.Everything(), labels.Everything(), @@ -305,6 +320,7 @@ func (suite *Suite) TestVictim() { []time.Time{}, time.UTC, time.Duration(0), + "", false, 10, ) @@ -346,6 +362,7 @@ func (suite *Suite) TestVictims() { suite.Require().NoError(err) chaoskube := suite.setup( + 10*time.Minute, labelSelector, labels.Everything(), labels.Everything(), @@ -358,6 +375,7 @@ func (suite *Suite) TestVictims() { []time.Time{}, time.UTC, time.Duration(0), + "", false, 10, tt.maxKill, @@ -371,6 +389,7 @@ func (suite *Suite) TestVictims() { // TestNoVictimReturnsError tests that on missing victim it returns a known error func (suite *Suite) TestNoVictimReturnsError() { chaoskube := suite.setup( + 10*time.Minute, labels.Everything(), labels.Everything(), labels.Everything(), @@ -383,6 +402,7 @@ func (suite *Suite) TestNoVictimReturnsError() { []time.Time{}, time.UTC, time.Duration(0), + "", false, 10, 1, @@ -406,6 +426,7 @@ func (suite *Suite) TestDeletePod() { {true, []map[string]string{foo, bar}}, } { chaoskube := suite.setupWithPods( + 10*time.Minute, labels.Everything(), labels.Everything(), labels.Everything(), @@ -418,11 +439,12 @@ func (suite *Suite) TestDeletePod() { []time.Time{}, time.UTC, time.Duration(0), + "", tt.dryRun, 10, ) - victim := util.NewPod("default", "foo", v1.PodRunning) + victim := util.NewPodBuilder("default", "foo").Build() err := chaoskube.DeletePod(context.Background(), victim) suite.Require().NoError(err) @@ -435,6 +457,7 @@ func (suite *Suite) TestDeletePod() { // TestDeletePodNotFound tests missing target pod will return an error. func (suite *Suite) TestDeletePodNotFound() { chaoskube := suite.setup( + 10*time.Minute, labels.Everything(), labels.Everything(), labels.Everything(), @@ -447,12 +470,13 @@ func (suite *Suite) TestDeletePodNotFound() { []time.Time{}, time.UTC, time.Duration(0), + "", false, 10, 1, ) - victim := util.NewPod("default", "foo", v1.PodRunning) + victim := util.NewPodBuilder("default", "foo").Build() err := chaoskube.DeletePod(context.Background(), victim) suite.EqualError(err, `pods "foo" not found`) @@ -667,6 +691,7 @@ func (suite *Suite) TestTerminateVictim() { }, } { chaoskube := suite.setupWithPods( + 10*time.Minute, labels.Everything(), labels.Everything(), labels.Everything(), @@ -679,6 +704,7 @@ func (suite *Suite) TestTerminateVictim() { tt.excludedDaysOfYear, tt.timezone, time.Duration(0), + "", false, 10, ) @@ -697,6 +723,7 @@ func (suite *Suite) TestTerminateVictim() { // TestTerminateNoVictimLogsInfo tests that missing victim prints a log message func (suite *Suite) TestTerminateNoVictimLogsInfo() { chaoskube := suite.setup( + 10*time.Minute, labels.Everything(), labels.Everything(), labels.Everything(), @@ -709,6 +736,7 @@ func (suite *Suite) TestTerminateNoVictimLogsInfo() { []time.Time{}, time.UTC, time.Duration(0), + "", false, 10, 1, @@ -746,8 +774,9 @@ func (suite *Suite) assertNotified(notifier *notifier.Noop) { suite.Assert().Greater(notifier.Calls, 0) } -func (suite *Suite) setupWithPods(labelSelector labels.Selector, annotations labels.Selector, kinds labels.Selector, namespaces labels.Selector, namespaceLabels labels.Selector, includedPodNames *regexp.Regexp, excludedPodNames *regexp.Regexp, excludedWeekdays []time.Weekday, excludedTimesOfDay []util.TimePeriod, excludedDaysOfYear []time.Time, timezone *time.Location, minimumAge time.Duration, dryRun bool, gracePeriod time.Duration) *Chaoskube { +func (suite *Suite) setupWithPods(interval time.Duration, labelSelector labels.Selector, annotations labels.Selector, kinds labels.Selector, namespaces labels.Selector, namespaceLabels labels.Selector, includedPodNames *regexp.Regexp, excludedPodNames *regexp.Regexp, excludedWeekdays []time.Weekday, excludedTimesOfDay []util.TimePeriod, excludedDaysOfYear []time.Time, timezone *time.Location, minimumAge time.Duration, frequencyAnnotation string, dryRun bool, gracePeriod time.Duration) *Chaoskube { chaoskube := suite.setup( + 10*time.Minute, labelSelector, annotations, kinds, @@ -760,6 +789,7 @@ func (suite *Suite) setupWithPods(labelSelector labels.Selector, annotations lab excludedDaysOfYear, timezone, minimumAge, + frequencyAnnotation, dryRun, gracePeriod, 1, @@ -774,9 +804,9 @@ func (suite *Suite) setupWithPods(labelSelector labels.Selector, annotations lab } pods := []v1.Pod{ - util.NewPod("default", "foo", v1.PodRunning), - util.NewPod("testing", "bar", v1.PodRunning), - util.NewPod("testing", "baz", v1.PodPending), // Non-running pods are ignored + util.NewPodBuilder("default", "foo").Build(), + util.NewPodBuilder("testing", "bar").Build(), + util.NewPodBuilder("testing", "baz").Build(), // Non-running pods are ignored } for _, pod := range pods { @@ -792,13 +822,13 @@ func (suite *Suite) createPods(client kubernetes.Interface, podsInfo []podInfo) namespace := util.NewNamespace(p.Namespace) _, err := client.CoreV1().Namespaces().Create(context.Background(), &namespace, metav1.CreateOptions{}) suite.Require().NoError(err) - pod := util.NewPod(p.Namespace, p.Name, v1.PodRunning) + pod := util.NewPodBuilder(p.Namespace, p.Name).Build() _, err = client.CoreV1().Pods(p.Namespace).Create(context.Background(), &pod, metav1.CreateOptions{}) suite.Require().NoError(err) } } -func (suite *Suite) setup(labelSelector labels.Selector, annotations labels.Selector, kinds labels.Selector, namespaces labels.Selector, namespaceLabels labels.Selector, includedPodNames *regexp.Regexp, excludedPodNames *regexp.Regexp, excludedWeekdays []time.Weekday, excludedTimesOfDay []util.TimePeriod, excludedDaysOfYear []time.Time, timezone *time.Location, minimumAge time.Duration, dryRun bool, gracePeriod time.Duration, maxKill int) *Chaoskube { +func (suite *Suite) setup(interval time.Duration, labelSelector labels.Selector, annotations labels.Selector, kinds labels.Selector, namespaces labels.Selector, namespaceLabels labels.Selector, includedPodNames *regexp.Regexp, excludedPodNames *regexp.Regexp, excludedWeekdays []time.Weekday, excludedTimesOfDay []util.TimePeriod, excludedDaysOfYear []time.Time, timezone *time.Location, minimumAge time.Duration, frequencyAnnotation string, dryRun bool, gracePeriod time.Duration, maxKill int) *Chaoskube { logOutput.Reset() client := fake.NewSimpleClientset() @@ -806,6 +836,7 @@ func (suite *Suite) setup(labelSelector labels.Selector, annotations labels.Sele return New( client, + interval, labelSelector, annotations, kinds, @@ -818,6 +849,7 @@ func (suite *Suite) setup(labelSelector labels.Selector, annotations labels.Sele excludedDaysOfYear, timezone, minimumAge, + frequencyAnnotation, logger, dryRun, terminator.NewDeletePodTerminator(client, nullLogger, gracePeriod), @@ -913,6 +945,7 @@ func (suite *Suite) TestMinimumAge() { }, } { chaoskube := suite.setup( + 10*time.Minute, labels.Everything(), labels.Everything(), labels.Everything(), @@ -925,6 +958,7 @@ func (suite *Suite) TestMinimumAge() { []time.Time{}, time.UTC, tt.minimumAge, + "", false, 10, 1, @@ -932,7 +966,7 @@ func (suite *Suite) TestMinimumAge() { chaoskube.Now = tt.now for _, p := range tt.pods { - pod := util.NewPod(p.namespace, p.name, v1.PodRunning) + pod := util.NewPodBuilder(p.namespace, p.name).Build() pod.ObjectMeta.CreationTimestamp = metav1.Time{Time: p.creationTime} _, err := chaoskube.Client.CoreV1().Pods(pod.Namespace).Create(context.Background(), &pod, metav1.CreateOptions{}) suite.Require().NoError(err) @@ -946,11 +980,11 @@ func (suite *Suite) TestMinimumAge() { } func (suite *Suite) TestFilterDeletedPods() { - deletedPod := util.NewPod("default", "deleted", v1.PodRunning) + deletedPod := util.NewPodBuilder("default", "deleted").Build() now := metav1.NewTime(time.Now()) deletedPod.SetDeletionTimestamp(&now) - runningPod := util.NewPod("default", "running", v1.PodRunning) + runningPod := util.NewPodBuilder("default", "running").Build() pods := []v1.Pod{runningPod, deletedPod} @@ -960,11 +994,11 @@ func (suite *Suite) TestFilterDeletedPods() { } func (suite *Suite) TestFilterByKinds() { - foo := util.NewPodWithOwner("default", "foo", v1.PodRunning, "parent-1") - foo1 := util.NewPodWithOwner("default", "foo-1", v1.PodRunning, "parent-2") - bar := util.NewPodWithOwner("default", "bar", v1.PodRunning, "other-parent") - baz := util.NewPod("default", "baz", v1.PodRunning) - baz1 := util.NewPod("default", "baz-1", v1.PodRunning) + foo := util.NewPodBuilder("default", "foo").WithOwnerUID("parent-1").Build() + foo1 := util.NewPodBuilder("default", "foo-1").WithOwnerUID("parent-2").Build() + bar := util.NewPodBuilder("default", "bar").WithOwnerUID("other-parent").Build() + baz := util.NewPodBuilder("default", "baz").Build() + baz1 := util.NewPodBuilder("default", "baz-1").Build() for _, tt := range []struct { name string @@ -1035,11 +1069,11 @@ func (suite *Suite) TestFilterByKinds() { } func (suite *Suite) TestFilterByOwnerReference() { - foo := util.NewPodWithOwner("default", "foo", v1.PodRunning, "parent") - foo1 := util.NewPodWithOwner("default", "foo-1", v1.PodRunning, "parent") - bar := util.NewPodWithOwner("default", "bar", v1.PodRunning, "other-parent") - baz := util.NewPod("default", "baz", v1.PodRunning) - baz1 := util.NewPod("default", "baz-1", v1.PodRunning) + foo := util.NewPodBuilder("default", "foo").WithOwnerUID("parent").Build() + foo1 := util.NewPodBuilder("default", "foo-1").WithOwnerUID("parent").Build() + bar := util.NewPodBuilder("default", "bar").WithOwnerUID("other-parent").Build() + baz := util.NewPodBuilder("default", "baz").Build() + baz1 := util.NewPodBuilder("default", "baz-1").Build() for _, tt := range []struct { seed int64 @@ -1094,8 +1128,47 @@ func (suite *Suite) TestFilterByOwnerReference() { } } +func (suite *Suite) TestFilterByFrequency() { + interval := 10 * time.Minute + logger, _ := test.NewNullLogger() + + foo := util.NewPodBuilder("default", "foo").WithFrequency("1 / hour").Build() + foo1 := util.NewPodBuilder("default", "foo-1").WithFrequency("1 / minute").Build() + bar := util.NewPodBuilder("default", "bar").WithFrequency("2.5 / hour").Build() + baz := util.NewPodBuilder("default", "baz").Build() + + pods := []v1.Pod{foo, foo1, bar, baz} + alwaysExpected := []v1.Pod{foo1, baz} + + for _, tt := range []struct { + seed int64 + expected []v1.Pod + }{ + { + seed: 1000, + expected: []v1.Pod{}, + }, + { + seed: 3000, + expected: []v1.Pod{foo}, + }, + { + seed: 4000, + expected: []v1.Pod{bar}, + }, + } { + expected := append(tt.expected, alwaysExpected...) + + rand.Seed(tt.seed) + results := filterByFrequency(pods, "chaos.alpha.kubernetes.io/frequency", interval, logger) + + suite.Assert().ElementsMatch(results, expected) + } +} + func (suite *Suite) TestNotifierCall() { chaoskube := suite.setupWithPods( + 10*time.Minute, labels.Everything(), labels.Everything(), labels.Everything(), @@ -1108,11 +1181,12 @@ func (suite *Suite) TestNotifierCall() { []time.Time{}, time.UTC, time.Duration(0), + "", false, 10, ) - victim := util.NewPod("default", "foo", v1.PodRunning) + victim := util.NewPodBuilder("default", "foo").Build() err := chaoskube.DeletePod(context.Background(), victim) suite.Require().NoError(err) diff --git a/chart/chaoskube/values.yaml b/chart/chaoskube/values.yaml index c2440a47..fed1b825 100644 --- a/chart/chaoskube/values.yaml +++ b/chart/chaoskube/values.yaml @@ -33,6 +33,9 @@ chaoskube: timezone: "UTC" # exclude all pods that haven't been running for at least one hour minimum-age: "1h" + # checks for "chaos.alpha.kubernetes.io/frequency" annotation on pods to determine frequency to terminate + # eg. setting "chaos.alpha.kubernetes.io/frequency=1/hour" will terminate the pod approximately once per hour + termination-frequency-annotation: "chaos.alpha.kubernetes.io/frequency" # terminate pods for real: this disables dry-run mode which is on by default no-dry-run: "" diff --git a/main.go b/main.go index 29830d02..ffce7d4c 100644 --- a/main.go +++ b/main.go @@ -15,9 +15,9 @@ import ( "syscall" "time" - "gopkg.in/alecthomas/kingpin.v2" "github.com/prometheus/client_golang/prometheus/promhttp" log "github.com/sirupsen/logrus" + "gopkg.in/alecthomas/kingpin.v2" "k8s.io/apimachinery/pkg/labels" "k8s.io/client-go/kubernetes" @@ -36,30 +36,31 @@ var ( ) var ( - labelString string - annString string - kindsString string - nsString string - nsLabelString string - includedPodNames *regexp.Regexp - excludedPodNames *regexp.Regexp - excludedWeekdays string - excludedTimesOfDay string - excludedDaysOfYear string - timezone string - minimumAge time.Duration - maxRuntime time.Duration - maxKill int - master string - kubeconfig string - interval time.Duration - dryRun bool - debug bool - metricsAddress string - gracePeriod time.Duration - logFormat string - logCaller bool - slackWebhook string + labelString string + annString string + kindsString string + nsString string + nsLabelString string + includedPodNames *regexp.Regexp + excludedPodNames *regexp.Regexp + excludedWeekdays string + excludedTimesOfDay string + excludedDaysOfYear string + timezone string + minimumAge time.Duration + frequencyAnnotation string + maxRuntime time.Duration + maxKill int + master string + kubeconfig string + interval time.Duration + dryRun bool + debug bool + metricsAddress string + gracePeriod time.Duration + logFormat string + logCaller bool + slackWebhook string ) func init() { @@ -78,6 +79,7 @@ func init() { kingpin.Flag("excluded-days-of-year", "A list of days of a year when termination is suspended, e.g. Apr1,Dec24").StringVar(&excludedDaysOfYear) kingpin.Flag("timezone", "The timezone by which to interpret the excluded weekdays and times of day, e.g. UTC, Local, Europe/Berlin. Defaults to UTC.").Default("UTC").StringVar(&timezone) kingpin.Flag("minimum-age", "Minimum age of pods to consider for termination").Default("0s").DurationVar(&minimumAge) + kingpin.Flag("termination-frequency-annotation", "Annotation to look for on pods describing how frequently a pod should be terminated.").StringVar(&frequencyAnnotation) kingpin.Flag("max-runtime", "Maximum runtime before chaoskube exits").Default("-1s").DurationVar(&maxRuntime) kingpin.Flag("max-kill", "Specifies the maximum number of pods to be terminated per interval.").Default("1").IntVar(&maxKill) kingpin.Flag("master", "The address of the Kubernetes cluster to target").StringVar(&master) @@ -110,29 +112,30 @@ func main() { log.SetReportCaller(logCaller) log.WithFields(log.Fields{ - "labels": labelString, - "annotations": annString, - "kinds": kindsString, - "namespaces": nsString, - "namespaceLabels": nsLabelString, - "includedPodNames": includedPodNames, - "excludedPodNames": excludedPodNames, - "excludedWeekdays": excludedWeekdays, - "excludedTimesOfDay": excludedTimesOfDay, - "excludedDaysOfYear": excludedDaysOfYear, - "timezone": timezone, - "minimumAge": minimumAge, - "maxRuntime": maxRuntime, - "maxKill": maxKill, - "master": master, - "kubeconfig": kubeconfig, - "interval": interval, - "dryRun": dryRun, - "debug": debug, - "metricsAddress": metricsAddress, - "gracePeriod": gracePeriod, - "logFormat": logFormat, - "slackWebhook": slackWebhook, + "labels": labelString, + "annotations": annString, + "kinds": kindsString, + "namespaces": nsString, + "namespaceLabels": nsLabelString, + "includedPodNames": includedPodNames, + "excludedPodNames": excludedPodNames, + "excludedWeekdays": excludedWeekdays, + "excludedTimesOfDay": excludedTimesOfDay, + "excludedDaysOfYear": excludedDaysOfYear, + "timezone": timezone, + "minimumAge": minimumAge, + "frequencyAnnotation": frequencyAnnotation, + "maxRuntime": maxRuntime, + "maxKill": maxKill, + "master": master, + "kubeconfig": kubeconfig, + "interval": interval, + "dryRun": dryRun, + "debug": debug, + "metricsAddress": metricsAddress, + "gracePeriod": gracePeriod, + "logFormat": logFormat, + "slackWebhook": slackWebhook, }).Debug("reading config") log.WithFields(log.Fields{ @@ -156,15 +159,16 @@ func main() { ) log.WithFields(log.Fields{ - "labels": labelSelector, - "annotations": annotations, - "kinds": kinds, - "namespaces": namespaces, - "namespaceLabels": namespaceLabels, - "includedPodNames": includedPodNames, - "excludedPodNames": excludedPodNames, - "minimumAge": minimumAge, - "maxKill": maxKill, + "labels": labelSelector, + "annotations": annotations, + "kinds": kinds, + "namespaces": namespaces, + "namespaceLabels": namespaceLabels, + "includedPodNames": includedPodNames, + "excludedPodNames": excludedPodNames, + "minimumAge": minimumAge, + "frequencyAnnotation": frequencyAnnotation, + "maxKill": maxKill, }).Info("setting pod filter") parsedWeekdays := util.ParseWeekdays(excludedWeekdays) @@ -208,6 +212,7 @@ func main() { chaoskube := chaoskube.New( client, + interval, labelSelector, annotations, kinds, @@ -220,6 +225,7 @@ func main() { parsedDaysOfYear, parsedTimezone, minimumAge, + frequencyAnnotation, log.StandardLogger(), dryRun, terminator.NewDeletePodTerminator(client, log.StandardLogger(), gracePeriod), diff --git a/notifier/slack_test.go b/notifier/slack_test.go index a001b7bb..cfa2799a 100644 --- a/notifier/slack_test.go +++ b/notifier/slack_test.go @@ -5,8 +5,6 @@ import ( "net/http/httptest" "testing" - v1 "k8s.io/api/core/v1" - "github.com/linki/chaoskube/internal/testutil" "github.com/linki/chaoskube/util" @@ -28,7 +26,7 @@ func (suite *SlackSuite) TestSlackNotificationForTerminationStatusOk() { })) defer testServer.Close() - testPod := util.NewPod("chaos", "chaos-57df4db6b-h9ktj", v1.PodRunning) + testPod := util.NewPodBuilder("chaos", "chaos-57df4db6b-h9ktj").Build() slack := NewSlackNotifier(testServer.URL + webhookPath) err := slack.NotifyPodTermination(testPod) @@ -47,7 +45,7 @@ func (suite *SlackSuite) TestSlackNotificationForTerminationStatus500() { })) defer testServer.Close() - testPod := util.NewPod("chaos", "chaos-57df4db6b-h9ktj", v1.PodRunning) + testPod := util.NewPodBuilder("chaos", "chaos-57df4db6b-h9ktj").Build() slack := NewSlackNotifier(testServer.URL + webhookPath) err := slack.NotifyPodTermination(testPod) diff --git a/terminator/delete_pod_test.go b/terminator/delete_pod_test.go index 369092a9..bcaa9be7 100644 --- a/terminator/delete_pod_test.go +++ b/terminator/delete_pod_test.go @@ -41,8 +41,8 @@ func (suite *DeletePodTerminatorSuite) TestTerminate() { terminator := NewDeletePodTerminator(client, logger, 10*time.Second) pods := []v1.Pod{ - util.NewPod("default", "foo", v1.PodRunning), - util.NewPod("testing", "bar", v1.PodRunning), + util.NewPodBuilder("default", "foo").Build(), + util.NewPodBuilder("testing", "bar").Build(), } for _, pod := range pods { @@ -50,7 +50,7 @@ func (suite *DeletePodTerminatorSuite) TestTerminate() { suite.Require().NoError(err) } - victim := util.NewPod("default", "foo", v1.PodRunning) + victim := util.NewPodBuilder("default", "foo").Build() err := terminator.Terminate(context.Background(), victim) suite.Require().NoError(err) diff --git a/util/util.go b/util/util.go index a17ec54b..868dbe98 100644 --- a/util/util.go +++ b/util/util.go @@ -3,6 +3,7 @@ package util import ( "fmt" "math/rand" + "strconv" "strings" "time" @@ -120,6 +121,35 @@ func ParseDays(days string) ([]time.Time, error) { return parsedDays, nil } +// Parses a "frequency" annotation in the form "[number] / [period]" (eg. 1/day) +// and converts it into a chance of occurrence in any given interval (eg. ~0.007) +func ParseFrequency(text string, interval time.Duration) (float64, error) { + parseablePeriods := map[string]time.Duration{ + "minute": 1 * time.Minute, + "hour": 1 * time.Hour, + "day": 24 * time.Hour, + "week": 24 * 7 * time.Hour, + } + + parts := strings.SplitN(text, "/", 2) + for i, p := range parts { + parts[i] = strings.TrimSpace(p) + } + + frequency, err := strconv.ParseFloat(parts[0], 64) + if err != nil { + return 0, err + } + + period, ok := parseablePeriods[parts[1]] + if !ok { + return 0, fmt.Errorf("unknown time period, %v", parts[1]) + } + + chance := (float64(interval) / float64(period)) * frequency + return chance, nil +} + // TimeOfDay normalizes the given point in time by returning a time object that represents the same // time of day of the given time but on the very first day (day 0). func TimeOfDay(pointInTime time.Time) time.Time { @@ -135,43 +165,6 @@ func FormatDays(days []time.Time) []string { return formattedDays } -// NewPod returns a new pod instance for testing purposes. -func NewPod(namespace, name string, phase v1.PodPhase) v1.Pod { - return NewPodWithOwner(namespace, name, phase, "") -} - -// NewPodWithOwner returns a new pod instance for testing purposes with a given owner UID -func NewPodWithOwner(namespace, name string, phase v1.PodPhase, owner types.UID) v1.Pod { - pod := v1.Pod{ - TypeMeta: metav1.TypeMeta{ - APIVersion: "v1", - Kind: "Pod", - }, - ObjectMeta: metav1.ObjectMeta{ - Namespace: namespace, - Name: name, - Labels: map[string]string{ - "app": name, - }, - Annotations: map[string]string{ - "chaos": name, - }, - SelfLink: fmt.Sprintf("/api/v1/namespaces/%s/pods/%s", namespace, name), - }, - Status: v1.PodStatus{ - Phase: phase, - }, - } - - if owner != "" { - pod.ObjectMeta.OwnerReferences = []metav1.OwnerReference{ - {UID: owner, Kind: "testkind"}, - } - } - - return pod -} - // NewNamespace returns a new namespace instance for testing purposes. func NewNamespace(name string) v1.Namespace { return v1.Namespace{ @@ -195,3 +188,77 @@ func RandomPodSubSlice(pods []v1.Pod, count int) []v1.Pod { res := pods[0:count] return res } + +type PodBuilder struct { + Name string + Namespace string + Phase v1.PodPhase + OwnerReference *metav1.OwnerReference + Labels map[string]string + Annotations map[string]string +} + +func NewPodBuilder(namespace string, name string) PodBuilder { + return PodBuilder{ + Name: name, + Namespace: namespace, + Phase: v1.PodRunning, + OwnerReference: nil, + Annotations: make(map[string]string), + Labels: make(map[string]string), + } +} + +func (b PodBuilder) Build() v1.Pod { + pod := v1.Pod{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "v1", + Kind: "Pod", + }, + ObjectMeta: metav1.ObjectMeta{ + Namespace: b.Namespace, + Name: b.Name, + Labels: b.Labels, + Annotations: b.Annotations, + SelfLink: fmt.Sprintf( + "/api/v1/namespaces/%s/pods/%s", + b.Namespace, + b.Name, + ), + }, + Status: v1.PodStatus{ + Phase: b.Phase, + }, + } + + if b.OwnerReference != nil { + pod.ObjectMeta.OwnerReferences = []metav1.OwnerReference{*b.OwnerReference} + } + + return pod +} + +func (b PodBuilder) WithPhase(phase v1.PodPhase) PodBuilder { + b.Phase = phase + return b +} +func (b PodBuilder) WithOwnerReference(ownerReference metav1.OwnerReference) PodBuilder { + b.OwnerReference = &ownerReference + return b +} +func (b PodBuilder) WithOwnerUID(owner types.UID) PodBuilder { + b.OwnerReference = &metav1.OwnerReference{UID: owner, Kind: "testkind"} + return b +} +func (b PodBuilder) WithAnnotations(annotations map[string]string) PodBuilder { + b.Annotations = annotations + return b +} +func (b PodBuilder) WithLabels(labels map[string]string) PodBuilder { + b.Labels = labels + return b +} +func (b PodBuilder) WithFrequency(text string) PodBuilder { + b.Annotations["chaos.alpha.kubernetes.io/frequency"] = text + return b +} diff --git a/util/util_test.go b/util/util_test.go index 98e073de..40b59ad0 100644 --- a/util/util_test.go +++ b/util/util_test.go @@ -366,6 +366,34 @@ func (suite *Suite) TestParseDates() { } } +func (suite *Suite) TestParseFrequency() { + interval := 10 * time.Minute + + for _, tt := range []struct { + given string + expectedApprox float64 + }{ + { + "1 / hour", + 0.166666667, + }, { + "1 / minute", + 10.0, + }, { + "2.5 / hour", + 0.416666667, + }, { + "60 / day", + 0.416666667, + }, + } { + result, err := ParseFrequency(tt.given, interval) + suite.Require().NoError(err) + + suite.Assert().InDelta(tt.expectedApprox, result, 0.001) + } +} + func (suite *Suite) TestFormatDays() { for _, tt := range []struct { given []time.Time @@ -389,7 +417,8 @@ func (suite *Suite) TestFormatDays() { } func (suite *Suite) TestNewPod() { - pod := NewPod("namespace", "name", "phase") + pod := NewPodBuilder("namespace", "name"). + WithPhase("phase").Build() suite.Equal("v1", pod.APIVersion) suite.Equal("Pod", pod.Kind) @@ -410,9 +439,9 @@ func (suite *Suite) TestNewNamespace() { func (suite *Suite) TestRandomPodSublice() { pods := []v1.Pod{ - NewPod("default", "foo", v1.PodRunning), - NewPod("testing", "bar", v1.PodRunning), - NewPod("test", "baz", v1.PodRunning), + NewPodBuilder("default", "foo").Build(), + NewPodBuilder("testing", "bar").Build(), + NewPodBuilder("test", "baz").Build(), } for _, tt := range []struct {