From 1c818f91f4494ee0415883e265bb8a42e8876eb9 Mon Sep 17 00:00:00 2001 From: "Alex Ellis (OpenFaaS Ltd)" Date: Wed, 3 Nov 2021 10:18:45 +0000 Subject: [PATCH 1/3] Configure pod termination grace period from write_timeout Configures the Pod's termination grace period in seconds to the write_timeout environment-variable, when available. Otherwise, it's set to the default for Kubernetes, which is 30 seconds. This change is important for allowing function containers to drain active connections, without losing work in progress. Signed-off-by: Alex Ellis (OpenFaaS Ltd) --- pkg/controller/deployment.go | 19 +++++++++- pkg/controller/deployment_test.go | 61 +++++++++++++++++++++++++++++++ pkg/handlers/deploy.go | 21 ++++++++++- pkg/handlers/deploy_test.go | 41 ++++++++++++++++++++- 4 files changed, 139 insertions(+), 3 deletions(-) diff --git a/pkg/controller/deployment.go b/pkg/controller/deployment.go index 40c466774..2ae9b99b2 100644 --- a/pkg/controller/deployment.go +++ b/pkg/controller/deployment.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "strings" + "time" "github.com/google/go-cmp/cmp" faasv1 "github.com/openfaas/faas-netes/pkg/apis/openfaas/v1" @@ -56,6 +57,21 @@ func newDeployment( } } + terminationGracePeriod := time.Second * 30 + + if function.Spec.Environment != nil { + e := *function.Spec.Environment + if v, ok := e["write_timeout"]; ok && len(v) > 0 { + period, err := time.ParseDuration(v) + if err != nil { + glog.Warningf("Function %s failed to parse write_timeout: %s", + function.Spec.Name, err.Error()) + } + terminationGracePeriod = period + } + } + terminationGracePeriodSeconds := int64(terminationGracePeriod.Seconds()) + allowPrivilegeEscalation := false deploymentSpec := &appsv1.Deployment{ @@ -99,7 +115,8 @@ func newDeployment( Annotations: annotations, }, Spec: corev1.PodSpec{ - NodeSelector: nodeSelector, + NodeSelector: nodeSelector, + TerminationGracePeriodSeconds: &terminationGracePeriodSeconds, Containers: []corev1.Container{ { Name: function.Spec.Name, diff --git a/pkg/controller/deployment_test.go b/pkg/controller/deployment_test.go index 24b41eca0..35ba97fc2 100644 --- a/pkg/controller/deployment_test.go +++ b/pkg/controller/deployment_test.go @@ -10,6 +10,67 @@ import ( "k8s.io/client-go/kubernetes/fake" ) +func Test_GracePeriodFromWriteTimeout(t *testing.T) { + + scenarios := []struct { + name string + seconds int64 + envs map[string]string + }{ + {"grace period is the default", 30, map[string]string{}}, + {"grace period is set from write_timeout", 60, map[string]string{"write_timeout": "60s"}}, + } + + for _, s := range scenarios { + t.Run(s.name, func(t *testing.T) { + + want := int64(s.seconds) + function := &faasv1.Function{ + ObjectMeta: metav1.ObjectMeta{ + Name: "alpine", + }, + Spec: faasv1.FunctionSpec{ + Name: "alpine", + Image: "ghcr.io/openfaas/alpine:latest", + Annotations: &map[string]string{}, + ReadOnlyRootFilesystem: true, + Environment: &s.envs}, + } + + factory := NewFunctionFactory(fake.NewSimpleClientset(), + k8s.DeploymentConfig{ + HTTPProbe: false, + SetNonRootUser: true, + LivenessProbe: &k8s.ProbeConfig{ + PeriodSeconds: 1, + TimeoutSeconds: 3, + InitialDelaySeconds: 0, + }, + ReadinessProbe: &k8s.ProbeConfig{ + PeriodSeconds: 1, + TimeoutSeconds: 3, + InitialDelaySeconds: 0, + }, + }) + + secrets := map[string]*corev1.Secret{} + + deployment := newDeployment(function, nil, secrets, factory) + got := deployment.Spec.Template.Spec.TerminationGracePeriodSeconds + if got == nil { + t.Errorf("TerminationGracePeriodSeconds not set, but want %d", want) + t.Fail() + return + } + + if want != *got { + t.Errorf("TerminationGracePeriodSeconds want %d, but got %d", want, got) + t.Fail() + } + }) + } +} + func Test_newDeployment(t *testing.T) { function := &faasv1.Function{ ObjectMeta: metav1.ObjectMeta{ diff --git a/pkg/handlers/deploy.go b/pkg/handlers/deploy.go index 54cfd9ca3..4a2a906a6 100644 --- a/pkg/handlers/deploy.go +++ b/pkg/handlers/deploy.go @@ -14,6 +14,7 @@ import ( "sort" "strconv" "strings" + "time" "github.com/openfaas/faas-netes/pkg/k8s" @@ -24,6 +25,7 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" + glog "k8s.io/klog" ) // initialReplicasCount how many replicas to start of creating for a function @@ -172,6 +174,21 @@ func makeDeploymentSpec(request types.FunctionDeployment, existingSecrets map[st return nil, err } + terminationGracePeriod := time.Second * 30 + + if request.EnvVars != nil { + if v, ok := request.EnvVars["write_timeout"]; ok && len(v) > 0 { + period, err := time.ParseDuration(v) + if err != nil { + glog.Warningf("Function %s failed to parse write_timeout: %s", + request.Service, err.Error()) + } + terminationGracePeriod = period + } + } + + terminationGracePeriodSeconds := int64(terminationGracePeriod.Seconds()) + enableServiceLinks := false allowPrivilegeEscalation := false @@ -211,7 +228,9 @@ func makeDeploymentSpec(request types.FunctionDeployment, existingSecrets map[st Annotations: annotations, }, Spec: apiv1.PodSpec{ - NodeSelector: nodeSelector, + NodeSelector: nodeSelector, + TerminationGracePeriodSeconds: &terminationGracePeriodSeconds, + Containers: []apiv1.Container{ { Name: request.Service, diff --git a/pkg/handlers/deploy_test.go b/pkg/handlers/deploy_test.go index a72055577..a8c56183f 100644 --- a/pkg/handlers/deploy_test.go +++ b/pkg/handlers/deploy_test.go @@ -13,6 +13,46 @@ import ( apiv1 "k8s.io/api/core/v1" ) +func Test_GracePeriodFromWriteTimeout(t *testing.T) { + + scenarios := []struct { + name string + seconds int64 + envs map[string]string + }{ + {"grace period is the default", 30, map[string]string{}}, + {"grace period is set from write_timeout", 60, map[string]string{"write_timeout": "60s"}}, + } + + for _, s := range scenarios { + t.Run(s.name, func(t *testing.T) { + request := types.FunctionDeployment{Service: "testfunc", Image: "ghcr.io/openfaas/alpine:latest"} + factory := k8s.NewFunctionFactory(fake.NewSimpleClientset(), k8s.DeploymentConfig{ + LivenessProbe: &k8s.ProbeConfig{}, + ReadinessProbe: &k8s.ProbeConfig{}, + SetNonRootUser: false, + }, nil) + + request.EnvVars = s.envs + deployment, err := makeDeploymentSpec(request, map[string]*apiv1.Secret{}, factory) + if err != nil { + t.Errorf("unexpected makeDeploymentSpec error: %s", err.Error()) + } + want := s.seconds + got := deployment.Spec.Template.Spec.TerminationGracePeriodSeconds + + if got == nil { + t.Fatalf("want: %d, got: nil", want) + } + + if *got != want { + t.Errorf("TerminationGracePeriodSeconds want: %d, got: %d", want, *got) + } + + }) + } +} + func Test_buildAnnotations_Empty_In_CreateRequest(t *testing.T) { request := types.FunctionDeployment{} @@ -113,7 +153,6 @@ func Test_SetNonRootUser(t *testing.T) { } }) } - } func Test_buildEnvVars_NoSortedKeys(t *testing.T) { From eb6ac46d78a8a20349fa67b9f6362dad8178ee8b Mon Sep 17 00:00:00 2001 From: "Alex Ellis (OpenFaaS Ltd)" Date: Thu, 4 Nov 2021 11:11:12 +0000 Subject: [PATCH 2/3] Customise makefile Allow container registry to be overridden easily for local builds Signed-off-by: Alex Ellis (OpenFaaS Ltd) --- Makefile | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index dad31c2e0..6f6fbe936 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,7 @@ .PHONY: build local push namespaces install charts start-kind stop-kind build-buildx render-charts TAG?=latest OWNER?=openfaas +SERVER?=ghcr.io export DOCKER_CLI_EXPERIMENTAL=enabled TOOLS_DIR := .tools @@ -24,16 +25,16 @@ local: build-docker: docker build \ - -t ghcr.io/$(OWNER)/faas-netes:$(TAG) . + -t $(SERVER)/$(OWNER)/faas-netes:$(TAG) . .PHONY: build-buildx build-buildx: - @echo ghcr.io/$(OWNER)/faas-netes:$(TAG) && \ + @echo $(SERVER)/$(OWNER)/faas-netes:$(TAG) && \ docker buildx create --use --name=multiarch --node=multiarch && \ docker buildx build \ --push \ --platform linux/amd64 \ - --tag ghcr.io/$(OWNER)/faas-netes:$(TAG) \ + --tag $(SERVER)/$(OWNER)/faas-netes:$(TAG) \ . .PHONY: build-buildx-all @@ -42,21 +43,21 @@ build-buildx-all: docker buildx build \ --platform linux/amd64,linux/arm/v7,linux/arm64 \ --output "type=image,push=false" \ - --tag ghcr.io/$(OWNER)/faas-netes:$(TAG) \ + --tag $(SERVER)/$(OWNER)/faas-netes:$(TAG) \ . .PHONY: publish-buildx-all publish-buildx-all: - @echo ghcr.io/$(OWNER)/faas-netes:$(TAG) && \ + @echo $(SERVER)/$(OWNER)/faas-netes:$(TAG) && \ docker buildx create --use --name=multiarch --node=multiarch && \ docker buildx build \ --platform linux/amd64,linux/arm/v7,linux/arm64 \ --push=true \ - --tag ghcr.io/$(OWNER)/faas-netes:$(TAG) \ + --tag $(SERVER)/$(OWNER)/faas-netes:$(TAG) \ . push: - docker push ghcr.io/$(OWNER)/faas-netes:$(TAG) + docker push $(SERVER)/$(OWNER)/faas-netes:$(TAG) charts: cd chart && helm package openfaas/ && helm package kafka-connector/ && helm package cron-connector/ && helm package nats-connector/ && helm package mqtt-connector/ && helm package pro-builder/ From 4e3efd48357beec829c0dc525388905d71fd174b Mon Sep 17 00:00:00 2001 From: "Alex Ellis (OpenFaaS Ltd)" Date: Thu, 4 Nov 2021 14:56:04 +0000 Subject: [PATCH 3/3] Add 2s jitter to termination grace period The additional 2s should prevent an issue where the grace period is exactly the same as the timeout, and kills the Pod before the remaining requests have completed. Signed-off-by: Alex Ellis (OpenFaaS Ltd) --- pkg/controller/deployment.go | 8 ++++++-- pkg/controller/deployment_test.go | 12 ++++++------ pkg/handlers/deploy.go | 7 +++++-- pkg/handlers/deploy_test.go | 12 ++++++------ 4 files changed, 23 insertions(+), 16 deletions(-) diff --git a/pkg/controller/deployment.go b/pkg/controller/deployment.go index 2ae9b99b2..4d829522f 100644 --- a/pkg/controller/deployment.go +++ b/pkg/controller/deployment.go @@ -57,7 +57,9 @@ func newDeployment( } } - terminationGracePeriod := time.Second * 30 + // add 2s jitter to avoid a race condition between write_timeout and grace period + jitter := time.Second * 2 + terminationGracePeriod := time.Second*30 + jitter if function.Spec.Environment != nil { e := *function.Spec.Environment @@ -67,9 +69,11 @@ func newDeployment( glog.Warningf("Function %s failed to parse write_timeout: %s", function.Spec.Name, err.Error()) } - terminationGracePeriod = period + + terminationGracePeriod = period + jitter } } + terminationGracePeriodSeconds := int64(terminationGracePeriod.Seconds()) allowPrivilegeEscalation := false diff --git a/pkg/controller/deployment_test.go b/pkg/controller/deployment_test.go index 35ba97fc2..12ca6f554 100644 --- a/pkg/controller/deployment_test.go +++ b/pkg/controller/deployment_test.go @@ -13,18 +13,18 @@ import ( func Test_GracePeriodFromWriteTimeout(t *testing.T) { scenarios := []struct { - name string - seconds int64 - envs map[string]string + name string + wantSeconds int64 + envs map[string]string }{ - {"grace period is the default", 30, map[string]string{}}, - {"grace period is set from write_timeout", 60, map[string]string{"write_timeout": "60s"}}, + {"grace period is the default", 32, map[string]string{}}, + {"grace period is set from write_timeout", 62, map[string]string{"write_timeout": "60s"}}, } for _, s := range scenarios { t.Run(s.name, func(t *testing.T) { - want := int64(s.seconds) + want := int64(s.wantSeconds) function := &faasv1.Function{ ObjectMeta: metav1.ObjectMeta{ Name: "alpine", diff --git a/pkg/handlers/deploy.go b/pkg/handlers/deploy.go index 4a2a906a6..62494844a 100644 --- a/pkg/handlers/deploy.go +++ b/pkg/handlers/deploy.go @@ -174,7 +174,9 @@ func makeDeploymentSpec(request types.FunctionDeployment, existingSecrets map[st return nil, err } - terminationGracePeriod := time.Second * 30 + // add 2s jitter to avoid a race condition between write_timeout and grace period + jitter := time.Second * 2 + terminationGracePeriod := time.Second*30 + jitter if request.EnvVars != nil { if v, ok := request.EnvVars["write_timeout"]; ok && len(v) > 0 { @@ -183,7 +185,8 @@ func makeDeploymentSpec(request types.FunctionDeployment, existingSecrets map[st glog.Warningf("Function %s failed to parse write_timeout: %s", request.Service, err.Error()) } - terminationGracePeriod = period + + terminationGracePeriod = period + jitter } } diff --git a/pkg/handlers/deploy_test.go b/pkg/handlers/deploy_test.go index a8c56183f..e8609b075 100644 --- a/pkg/handlers/deploy_test.go +++ b/pkg/handlers/deploy_test.go @@ -16,12 +16,12 @@ import ( func Test_GracePeriodFromWriteTimeout(t *testing.T) { scenarios := []struct { - name string - seconds int64 - envs map[string]string + name string + wantSeconds int64 + envs map[string]string }{ - {"grace period is the default", 30, map[string]string{}}, - {"grace period is set from write_timeout", 60, map[string]string{"write_timeout": "60s"}}, + {"grace period is the default", 32, map[string]string{}}, + {"grace period is set from write_timeout", 62, map[string]string{"write_timeout": "60s"}}, } for _, s := range scenarios { @@ -38,7 +38,7 @@ func Test_GracePeriodFromWriteTimeout(t *testing.T) { if err != nil { t.Errorf("unexpected makeDeploymentSpec error: %s", err.Error()) } - want := s.seconds + want := s.wantSeconds got := deployment.Spec.Template.Spec.TerminationGracePeriodSeconds if got == nil {