Skip to content

Commit

Permalink
feat: Repository-wide RunnerDeployment Autoscaling (#57)
Browse files Browse the repository at this point in the history
* feat: Repository-wide RunnerDeployment Autoscaling

This adds `maxReplicas` and `minReplicas` to the RunnerDeploymentSpec. If and only if both fields are set, the controller computes and sets desired `replicas` automatically depending on the demand.

The number of demanded runner replicas is computed by `queued workflow runs + in_progress workflow runs` for the repository. The support for organizational runners is not included.

Ref #10
  • Loading branch information
KUOKA Yusuke authored Jun 27, 2020
1 parent 512cae6 commit 5bb2694
Show file tree
Hide file tree
Showing 12 changed files with 520 additions and 19 deletions.
38 changes: 38 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,44 @@ example-runnerdeploy2475h595fr mumoshu/actions-runner-controller-ci Running
example-runnerdeploy2475ht2qbr mumoshu/actions-runner-controller-ci Running
```

#### Autoscaling

`RunnerDeployment` can scale number of runners between `minReplicas` and `maxReplicas` fields, depending on pending workflow runs.

In the below example, `actions-runner` checks for pending workflow runs for each sync period, and scale to e.g. 3 if there're 3 pending jobs at sync time.

```
apiVersion: actions.summerwind.dev/v1alpha1
kind: RunnerDeployment
metadata:
name: summerwind-actions-runner-controller
spec:
minReplicas: 1
maxReplicas: 3
template:
spec:
repository: summerwind/actions-runner-controller
```

Please also note that the sync period is set to 10 minutes by default and it's configurable via `--sync-period` flag.

Additionally, the autoscaling feature has an anti-flapping option that prevents periodic loop of scaling up and down.
By default, it doesn't scale down until the grace period of 10 minutes passes after a scale up. The grace period can be configured by setting `scaleDownDelaySecondsAfterScaleUp`:

```
apiVersion: actions.summerwind.dev/v1alpha1
kind: RunnerDeployment
metadata:
name: summerwind-actions-runner-controller
spec:
minReplicas: 1
maxReplicas: 3
scaleDownDelaySecondsAfterScaleUp: 1m
template:
spec:
repository: summerwind/actions-runner-controller
```

## Additional tweaks

You can pass details through the spec selector. Here's an eg. of what you may like to do:
Expand Down
24 changes: 23 additions & 1 deletion api/v1alpha1/runnerdeployment_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,36 @@ import (

// RunnerReplicaSetSpec defines the desired state of RunnerDeployment
type RunnerDeploymentSpec struct {
Replicas *int `json:"replicas"`
// +optional
Replicas *int `json:"replicas,omitempty"`

// MinReplicas is the minimum number of replicas the deployment is allowed to scale
// +optional
MinReplicas *int `json:"minReplicas,omitempty"`

// MinReplicas is the maximum number of replicas the deployment is allowed to scale
// +optional
MaxReplicas *int `json:"maxReplicas,omitempty"`

// ScaleDownDelaySecondsAfterScaleUp is the approximate delay for a scale down followed by a scale up
// Used to prevent flapping (down->up->down->... loop)
// +optional
ScaleDownDelaySecondsAfterScaleUp *int `json:"scaleDownDelaySecondsAfterScaleOut,omitempty"`

Template RunnerTemplate `json:"template"`
}

type RunnerDeploymentStatus struct {
AvailableReplicas int `json:"availableReplicas"`
ReadyReplicas int `json:"readyReplicas"`

// Replicas is the total number of desired, non-terminated and latest pods to be set for the primary RunnerSet
// This doesn't include outdated pods while upgrading the deployment and replacing the runnerset.
// +optional
Replicas *int `json:"desiredReplicas,omitempty"`

// +optional
LastSuccessfulScaleOutTime *metav1.Time `json:"lastSuccessfulScaleOutTime,omitempty"`
}

// +kubebuilder:object:root=true
Expand Down
26 changes: 25 additions & 1 deletion api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 21 additions & 1 deletion config/crd/bases/actions.summerwind.dev_runnerdeployments.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,21 @@ spec:
spec:
description: RunnerReplicaSetSpec defines the desired state of RunnerDeployment
properties:
maxReplicas:
description: MinReplicas is the maximum number of replicas the deployment
is allowed to scale
type: integer
minReplicas:
description: MinReplicas is the minimum number of replicas the deployment
is allowed to scale
type: integer
replicas:
type: integer
scaleDownDelaySecondsAfterScaleOut:
description: ScaleDownDelaySecondsAfterScaleUp is the approximate delay
for a scale down followed by a scale up Used to prevent flapping (down->up->down->...
loop)
type: integer
template:
properties:
metadata:
Expand Down Expand Up @@ -6717,13 +6730,20 @@ spec:
type: object
type: object
required:
- replicas
- template
type: object
status:
properties:
availableReplicas:
type: integer
desiredReplicas:
description: Replicas is the total number of desired, non-terminated
and latest pods to be set for the primary RunnerSet This doesn't include
outdated pods while upgrading the deployment and replacing the runnerset.
type: integer
lastSuccessfulScaleOutTime:
format: date-time
type: string
readyReplicas:
type: integer
required:
Expand Down
92 changes: 92 additions & 0 deletions controllers/autoscaling.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
package controllers

import (
"context"
"fmt"
"github.com/summerwind/actions-runner-controller/api/v1alpha1"
"strings"
)

type NotSupported struct {
}

var _ error = NotSupported{}

func (e NotSupported) Error() string {
return "Autoscaling is currently supported only when spec.repository is set"
}

func (r *RunnerDeploymentReconciler) determineDesiredReplicas(rd v1alpha1.RunnerDeployment) (*int, error) {
if rd.Spec.Replicas != nil {
return nil, fmt.Errorf("bug: determineDesiredReplicas should not be called for deplomeny with specific replicas")
} else if rd.Spec.MinReplicas == nil {
return nil, fmt.Errorf("runnerdeployment %s/%s is missing minReplicas", rd.Namespace, rd.Name)
} else if rd.Spec.MaxReplicas == nil {
return nil, fmt.Errorf("runnerdeployment %s/%s is missing maxReplicas", rd.Namespace, rd.Name)
}

var replicas int

repoID := rd.Spec.Template.Spec.Repository
if repoID == "" {
return nil, NotSupported{}
}

repo := strings.Split(repoID, "/")
user, repoName := repo[0], repo[1]
list, _, err := r.GitHubClient.Actions.ListRepositoryWorkflowRuns(context.TODO(), user, repoName, nil)
if err != nil {
return nil, err
}

var total, inProgress, queued, completed, unknown int

for _, r := range list.WorkflowRuns {
total++

// In May 2020, there are only 3 statuses.
// Follow the below links for more details:
// - https://developer.github.com/v3/actions/workflow-runs/#list-repository-workflow-runs
// - https://developer.github.com/v3/checks/runs/#create-a-check-run
switch r.GetStatus() {
case "completed":
completed++
case "in_progress":
inProgress++
case "queued":
queued++
default:
unknown++
}
}

minReplicas := *rd.Spec.MinReplicas
maxReplicas := *rd.Spec.MaxReplicas
necessaryReplicas := queued + inProgress

var desiredReplicas int

if necessaryReplicas < minReplicas {
desiredReplicas = minReplicas
} else if necessaryReplicas > maxReplicas {
desiredReplicas = maxReplicas
} else {
desiredReplicas = necessaryReplicas
}

rd.Status.Replicas = &desiredReplicas
replicas = desiredReplicas

r.Log.V(1).Info(
"Calculated desired replicas",
"computed_replicas_desired", desiredReplicas,
"spec_replicas_min", minReplicas,
"spec_replicas_max", maxReplicas,
"workflow_runs_completed", completed,
"workflow_runs_in_progress", inProgress,
"workflow_runs_queued", queued,
"workflow_runs_unknown", unknown,
)

return &replicas, nil
}
Loading

0 comments on commit 5bb2694

Please sign in to comment.