Skip to content

Commit

Permalink
Merge pull request #85 from target/pod-ordering
Browse files Browse the repository at this point in the history
Pod ordering
  • Loading branch information
GregoryDosh authored Apr 28, 2022
2 parents 5106ba1 + 8ddf20b commit e215341
Show file tree
Hide file tree
Showing 6 changed files with 315 additions and 24 deletions.
24 changes: 24 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ Pod-Reaper is configurable through environment variables. The pod-reaper specifi
- `REQUIRE_ANNOTATION_VALUES` comma-separated list of metadata annotation values (of key-value pair) that pod-reaper should require
- `DRY_RUN` log pod-reaper's actions but don't actually kill any pods
- `MAX_PODS` kill a maximum number of pods on each run
- `POD_SORTING_STRATEGY` sorts pods before killing them (most useful when used with MAX_PODS)
- `LOG_LEVEL` control verbosity level of log messages
- `LOG_FORMAT` choose between several formats of logging

Expand Down Expand Up @@ -115,6 +116,29 @@ Default value: unset (which will behave as if it were set to "0")

Acceptable values are positive integers. Negative integers will evaluate to 0 and any other values will error. This can be useful to prevent too many pods being killed in one run. Logging messages will reflect that a pod was selected for reaping and that pod was not killed because too many pods were reaped already.

### `POD_SORTING_STRATEGY`

Default value: unset (which will use the pod ordering return without specification from the API server).
Accepted values:
- (unset) - use the default ordering from the API server
- `random` (case-sensitive) will randomly shuffle the list of pods before killing
- `oldest-first` (case-sensitive) will sort pods into oldest-first based on the pods start time. (!! warning below).
- `youngest-first` (case-sensitive) will sort pods into youngest-first based on the pods start time (!! warning below)
- `pod-deletion-cost` (case-sensitive) will sort pods based on the [pod deletion cost annotation](https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/#pod-deletion-cost).

!! WARNINGS !!

Pod start time is not always defined. In these cases, sorting strategies based on age put pods without start times at the
end of the list. From my experience, this usually happens during a race condition with the pod initially being scheduled,
but there may be other cases hidden away.

Using pod-reaper against the kube-system namespace can have some surprising implications. For example, during testing I
found that the kube-schedule was owned by a master node (not a replicaset/daemon-set) and appeared to effectively ignore
delete actions. The age returned from `kubectl` was reset, but the actual pod start time was unaffected. As a result of
this, I found a looping scenario where the kube scheduler was effectively always the oldest pod.

In examples/pod-sorting-strategy.yml I mitigated this using by excluding on the label `tier: control-plane`

## Logging

Pod reaper logs in JSON format using a logrus (https://github.com/sirupsen/logrus).
Expand Down
85 changes: 85 additions & 0 deletions examples/pod-sorting-strategy.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# example configuration with permission for running pod-reaper against
# an entire cluster

---
# namespace for the reaper
apiVersion: v1
kind: Namespace
metadata:
name: reaper

---
# service account for running pod-reaper
apiVersion: v1
kind: ServiceAccount
metadata:
name: pod-reaper-service-account
namespace: reaper

---
# minimal permissions required for running pod-reaper at cluster level
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: pod-reaper-cluster-role
rules:
- apiGroups: [""]
resources: ["pods"]
verbs: ["list", "delete"]

---
# binding the above cluster role (permissions) to the above service account
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: pod-reaper-role-binding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: pod-reaper-cluster-role
subjects:
- kind: ServiceAccount
name: pod-reaper-service-account
namespace: reaper

---
# a basic pod-reaper deployment
apiVersion: apps/v1
kind: Deployment
metadata:
name: pod-reaper
namespace: reaper # namespace matches above
spec:
replicas: 1
selector:
matchLabels:
app: pod-reaper
template:
metadata:
labels:
app: pod-reaper
spec:
serviceAccount: pod-reaper-service-account # service account from above
containers:
- name: chaos
image: brianberzins/pod-reaper:alpha
resources:
limits:
cpu: 30m
memory: 30Mi
requests:
cpu: 20m
memory: 20Mi
env:
- name: EXCLUDE_LABEL_KEY
value: "tier"
- name: EXCLUDE_LABEL_VALUES
value: "control-plane"
- name: SCHEDULE
value: "@every 20s"
- name: CHAOS_CHANCE
value: "1"
- name: MAX_PODS
value: "1"
- name: POD_SORTING_STRATEGY
value: "oldest-first"
103 changes: 84 additions & 19 deletions reaper/options.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
package main

import (
"errors"
"fmt"
v1 "k8s.io/api/core/v1"
"math/rand"
"os"
"sort"
"strconv"
"strings"
"time"
Expand All @@ -26,6 +30,7 @@ const envRequireAnnotationKey = "REQUIRE_ANNOTATION_KEY"
const envRequireAnnotationValues = "REQUIRE_ANNOTATION_VALUES"
const envDryRun = "DRY_RUN"
const envMaxPods = "MAX_PODS"
const envPodSortingStrategy = "POD_SORTING_STRATEGY"
const envEvict = "EVICT"

type options struct {
Expand All @@ -38,6 +43,7 @@ type options struct {
annotationRequirement *labels.Requirement
dryRun bool
maxPods int
podSortingStrategy func([]v1.Pod)
rules rules.Rules
evict bool
}
Expand Down Expand Up @@ -163,6 +169,72 @@ func maxPods() (int, error) {
return v, nil
}

func getPodDeletionCost(pod v1.Pod) int32 {
// https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/#pod-deletion-cost
costString, present := pod.ObjectMeta.Annotations["controller.kubernetes.io/pod-deletion-cost"]
if !present {
return 0
}
// per k8s doc: invalid values should be rejected by the API server
cost, _ := strconv.ParseInt(costString, 10, 32)
return int32(cost)
}

func defaultSort([]v1.Pod) {}

func randomSort(pods []v1.Pod) {
rand.Shuffle(len(pods), func(i, j int) { pods[i], pods[j] = pods[j], pods[i] })
}

func oldestFirstSort(pods []v1.Pod) {
sort.Slice(pods, func(i, j int) bool {
if pods[i].Status.StartTime == nil {
return false
}
if pods[j].Status.StartTime == nil {
return true
}
return pods[i].Status.StartTime.Unix() < pods[j].Status.StartTime.Unix()
})
}

func youngestFirstSort(pods []v1.Pod) {
sort.Slice(pods, func(i, j int) bool {
if pods[i].Status.StartTime == nil {
return false
}
if pods[j].Status.StartTime == nil {
return true
}
return pods[j].Status.StartTime.Unix() < pods[i].Status.StartTime.Unix()
})
}

func podDeletionCostSort(pods []v1.Pod) {
sort.Slice(pods, func(i, j int) bool {
return getPodDeletionCost(pods[i]) < getPodDeletionCost(pods[j])
})
}

func podSortingStrategy() (func([]v1.Pod), error) {
sortingStrategy, present := os.LookupEnv(envPodSortingStrategy)
if !present {
return defaultSort, nil
}
switch sortingStrategy {
case "random":
return randomSort, nil
case "oldest-first":
return oldestFirstSort, nil
case "youngest-first":
return youngestFirstSort, nil
case "pod-deletion-cost":
return podDeletionCostSort, nil
default:
return nil, errors.New("unknown pod sorting strategy")
}
}

func evict() (bool, error) {
value, exists := os.LookupEnv(envEvict)
if !exists {
Expand All @@ -173,44 +245,37 @@ func evict() (bool, error) {

func loadOptions() (options options, err error) {
options.namespace = namespace()
options.gracePeriod, err = gracePeriod()
if err != nil {
if options.gracePeriod, err = gracePeriod(); err != nil {
return options, err
}
options.schedule = schedule()
options.runDuration, err = runDuration()
if err != nil {
if options.runDuration, err = runDuration(); err != nil {
return options, err
}
options.labelExclusion, err = labelExclusion()
if err != nil {
if options.labelExclusion, err = labelExclusion(); err != nil {
return options, err
}
options.labelRequirement, err = labelRequirement()
if err != nil {
if options.labelRequirement, err = labelRequirement(); err != nil {
return options, err
}
options.annotationRequirement, err = annotationRequirement()
if err != nil {
if options.annotationRequirement, err = annotationRequirement(); err != nil {
return options, err
}
options.dryRun, err = dryRun()
if err != nil {
if options.dryRun, err = dryRun(); err != nil {
return options, err
}
options.maxPods, err = maxPods()
if err != nil {
if options.maxPods, err = maxPods(); err != nil {
return options, err
}

options.evict, err = evict()
if err != nil {
if options.podSortingStrategy, err = podSortingStrategy(); err != nil {
return options, err
}
if options.evict, err = evict(); err != nil {
return options, err
}

// rules
options.rules, err = rules.LoadRules()
if err != nil {
if options.rules, err = rules.LoadRules(); err != nil {
return options, err
}
return options, nil
Expand Down
Loading

0 comments on commit e215341

Please sign in to comment.