From 0561a78ac7a1ab181cecbee5cc475d6b089f6be5 Mon Sep 17 00:00:00 2001 From: David Eliahu Date: Wed, 12 Jan 2022 21:36:45 -0800 Subject: [PATCH 1/3] WIP - add queue timeout --- pkg/lib/k8s/virtual_service.go | 9 +++++ .../resources/realtimeapi/k8s_specs.go | 1 + .../resources/trafficsplitter/k8s_specs.go | 1 + pkg/proxy/breaker.go | 36 ++++++++++++++++++- pkg/proxy/handler.go | 2 +- 5 files changed, 47 insertions(+), 2 deletions(-) diff --git a/pkg/lib/k8s/virtual_service.go b/pkg/lib/k8s/virtual_service.go index 98ce122c47..98f39e053b 100644 --- a/pkg/lib/k8s/virtual_service.go +++ b/pkg/lib/k8s/virtual_service.go @@ -45,6 +45,7 @@ type VirtualServiceSpec struct { Labels map[string]string Annotations map[string]string Headers *istionetworking.Headers + Retries *int32 } type Destination struct { @@ -153,6 +154,14 @@ func VirtualService(spec *VirtualServiceSpec) *istioclientnetworking.VirtualServ httpRoutes = append(httpRoutes, exactMatch, prefixMatch) } + if spec.Retries != nil { + for i := range httpRoutes { + httpRoutes[i].Retries = &istionetworking.HTTPRetry{ + Attempts: *spec.Retries, + } + } + } + virtualService := &istioclientnetworking.VirtualService{ TypeMeta: _virtualServiceTypeMeta, ObjectMeta: kmeta.ObjectMeta{ diff --git a/pkg/operator/resources/realtimeapi/k8s_specs.go b/pkg/operator/resources/realtimeapi/k8s_specs.go index 7a6824d14d..339fa07b54 100644 --- a/pkg/operator/resources/realtimeapi/k8s_specs.go +++ b/pkg/operator/resources/realtimeapi/k8s_specs.go @@ -149,6 +149,7 @@ func virtualServiceSpec(api *spec.API) *istioclientnetworking.VirtualService { }, PrefixPath: api.Networking.Endpoint, Rewrite: pointer.String("/"), + Retries: pointer.Int32(0), Annotations: api.ToK8sAnnotations(), Labels: map[string]string{ "apiName": api.Name, diff --git a/pkg/operator/resources/trafficsplitter/k8s_specs.go b/pkg/operator/resources/trafficsplitter/k8s_specs.go index 5c6834885f..2c87cb53a5 100644 --- a/pkg/operator/resources/trafficsplitter/k8s_specs.go +++ b/pkg/operator/resources/trafficsplitter/k8s_specs.go @@ -32,6 +32,7 @@ func virtualServiceSpec(trafficSplitter *spec.API) *istioclientnetworking.Virtua Destinations: getTrafficSplitterDestinations(trafficSplitter), ExactPath: trafficSplitter.Networking.Endpoint, Rewrite: pointer.String("/"), + Retries: pointer.Int32(0), Annotations: trafficSplitter.ToK8sAnnotations(), Labels: map[string]string{ "apiName": trafficSplitter.Name, diff --git a/pkg/proxy/breaker.go b/pkg/proxy/breaker.go index ff00f35c38..02515188fa 100644 --- a/pkg/proxy/breaker.go +++ b/pkg/proxy/breaker.go @@ -22,6 +22,7 @@ import ( "context" "errors" "fmt" + "time" "go.uber.org/atomic" ) @@ -29,6 +30,7 @@ import ( var ( // ErrRequestQueueFull indicates the breaker queue depth was exceeded. ErrRequestQueueFull = errors.New("pending request queue full") + ErrQueueTimeout = errors.New("queue timeout") ) // BreakerParams defines the parameters of the breaker. @@ -139,7 +141,7 @@ func (b *Breaker) Maybe(ctx context.Context, thunk func()) error { defer b.releasePending() // Wait for capacity in the active queue. - if err := b.sem.acquire(ctx); err != nil { + if err := b.sem.acquireWithTimeout(ctx, 0); err != nil { return err } // Defer releasing capacity in the active. @@ -239,6 +241,38 @@ func (s *semaphore) acquire(ctx context.Context) error { } } +// acquireWithTimeout acquires capacity from the semaphore, with a timeout. +// if timeout <= 0, no timeout is used +func (s *semaphore) acquireWithTimeout(ctx context.Context, timeout time.Duration) error { + if timeout <= 0 { + return s.acquire(ctx) + } + + timer := time.NewTimer(timeout) + + for { + old := s.state.Load() + capacity, in := unpack(old) + + if in >= capacity { + select { + case <-timer.C: + return ErrQueueTimeout + case <-ctx.Done(): + return ctx.Err() + case <-s.queue: + } + // Force reload state. + continue + } + + in++ + if s.state.CAS(old, pack(capacity, in)) { + return nil + } + } +} + // release releases capacity in the semaphore. // If the semaphore capacity was reduced in between and as a result inFlight is greater // than capacity, we don't wake up goroutines as they'd not get any capacity anyway. diff --git a/pkg/proxy/handler.go b/pkg/proxy/handler.go index 06e349ab1f..0a0420bea1 100644 --- a/pkg/proxy/handler.go +++ b/pkg/proxy/handler.go @@ -35,7 +35,7 @@ func Handler(breaker *Breaker, next http.Handler) http.HandlerFunc { if err := breaker.Maybe(r.Context(), func() { next.ServeHTTP(w, r) }); err != nil { - if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, ErrRequestQueueFull) { + if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, ErrRequestQueueFull) || errors.Is(err, ErrQueueTimeout) { http.Error(w, err.Error(), http.StatusServiceUnavailable) } else { w.WriteHeader(http.StatusInternalServerError) From 2b48266358ad4fa2eaf08370bf5c99b56eb96b47 Mon Sep 17 00:00:00 2001 From: David Eliahu Date: Wed, 12 Jan 2022 22:32:42 -0800 Subject: [PATCH 2/3] WIP - async queue timeout --- cmd/dequeuer/main.go | 1 + docs/workloads/realtime/configuration.md | 1 + pkg/dequeuer/async_handler.go | 18 ++++++++++++++++-- pkg/dequeuer/dequeuer.go | 1 + pkg/proxy/breaker.go | 1 + 5 files changed, 20 insertions(+), 2 deletions(-) diff --git a/cmd/dequeuer/main.go b/cmd/dequeuer/main.go index c5402f0283..188202f2f7 100644 --- a/cmd/dequeuer/main.go +++ b/cmd/dequeuer/main.go @@ -57,6 +57,7 @@ func main() { flag.StringVar(&clusterUID, "cluster-uid", "", "cluster unique identifier") flag.StringVar(&probesPath, "probes-path", "", "path to the probes spec") flag.StringVar(&queueURL, "queue", "", "target queue URL from which the api messages will be dequeued") + // TODO add queue_timeout_seconds flag.StringVar(&apiKind, "api-kind", "", fmt.Sprintf("api kind (%s|%s)", userconfig.BatchAPIKind.String(), userconfig.AsyncAPIKind.String())) flag.StringVar(&apiName, "api-name", "", "api name") flag.StringVar(&jobID, "job-id", "", "job ID") diff --git a/docs/workloads/realtime/configuration.md b/docs/workloads/realtime/configuration.md index b918ceb130..6608f6de34 100644 --- a/docs/workloads/realtime/configuration.md +++ b/docs/workloads/realtime/configuration.md @@ -7,6 +7,7 @@ port: # port to which requests will be sent (default: 8080; exported as $CORTEX_PORT) max_concurrency: # maximum number of requests that will be concurrently sent into the container (default: 1) max_queue_length: # maximum number of requests per replica which will be queued (beyond max_concurrency) before requests are rejected with error code 503 (default: 100) + max_queued_seconds: containers: # configurations for the containers to run (at least one constainer must be provided) - name: # name of the container (required) image: # docker image to use for the container (required) diff --git a/pkg/dequeuer/async_handler.go b/pkg/dequeuer/async_handler.go index c2065cdb9b..17a943ba88 100644 --- a/pkg/dequeuer/async_handler.go +++ b/pkg/dequeuer/async_handler.go @@ -28,7 +28,10 @@ import ( "github.com/aws/aws-sdk-go/service/s3" "github.com/aws/aws-sdk-go/service/sqs" awslib "github.com/cortexlabs/cortex/pkg/lib/aws" + "github.com/cortexlabs/cortex/pkg/lib/debug" "github.com/cortexlabs/cortex/pkg/lib/errors" + "github.com/cortexlabs/cortex/pkg/lib/pointer" + s "github.com/cortexlabs/cortex/pkg/lib/strings" "github.com/cortexlabs/cortex/pkg/lib/telemetry" "github.com/cortexlabs/cortex/pkg/types/async" "go.uber.org/zap" @@ -75,15 +78,26 @@ func (h *AsyncMessageHandler) Handle(message *sqs.Message) error { return errors.ErrorUnexpected("got unexpected sqs message with empty or nil body") } + var msgSentTime *time.Time + if msgSentTimestamp, ok := message.Attributes[sqs.MessageSystemAttributeNameSentTimestamp]; ok { + if msgSentTimestamp != nil { + if parsed, ok := s.ParseInt64(*msgSentTimestamp); ok { + msgSentTime = pointer.Time(time.UnixMilli(parsed)) + } + } + } + requestID := *message.Body - err := h.handleMessage(requestID) + err := h.handleMessage(requestID, msgSentTime) if err != nil { return err } return nil } -func (h *AsyncMessageHandler) handleMessage(requestID string) error { +func (h *AsyncMessageHandler) handleMessage(requestID string, msgSentTime *time.Time) error { + debug.Ppg(msgSentTime) + h.log.Infow("processing workload", "id", requestID) err := h.updateStatus(requestID, async.StatusInProgress) diff --git a/pkg/dequeuer/dequeuer.go b/pkg/dequeuer/dequeuer.go index beb42d0f1e..7f375a14f9 100644 --- a/pkg/dequeuer/dequeuer.go +++ b/pkg/dequeuer/dequeuer.go @@ -84,6 +84,7 @@ func (d *SQSDequeuer) ReceiveMessage() (*sqs.Message, error) { MessageAttributeNames: aws.StringSlice(_messageAttributes), VisibilityTimeout: d.visibilityTimeout, WaitTimeSeconds: d.waitTimeSeconds, + AttributeNames: aws.StringSlice([]string{sqs.MessageSystemAttributeNameSentTimestamp}), }) if err != nil { diff --git a/pkg/proxy/breaker.go b/pkg/proxy/breaker.go index 02515188fa..59d0ddd445 100644 --- a/pkg/proxy/breaker.go +++ b/pkg/proxy/breaker.go @@ -141,6 +141,7 @@ func (b *Breaker) Maybe(ctx context.Context, thunk func()) error { defer b.releasePending() // Wait for capacity in the active queue. + // TODO use the actual timeout if err := b.sem.acquireWithTimeout(ctx, 0); err != nil { return err } From 89f9222646c8a43318245e9071b4e31759abae90 Mon Sep 17 00:00:00 2001 From: David Eliahu Date: Tue, 18 Jan 2022 14:44:49 -0800 Subject: [PATCH 3/3] Update config name --- docs/workloads/async/configuration.md | 1 + docs/workloads/realtime/configuration.md | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/workloads/async/configuration.md b/docs/workloads/async/configuration.md index 798a697871..0c468e2c78 100644 --- a/docs/workloads/async/configuration.md +++ b/docs/workloads/async/configuration.md @@ -6,6 +6,7 @@ pod: # pod configuration (required) port: # port to which requests will be sent (default: 8080; exported as $CORTEX_PORT) max_concurrency: # maximum number of requests that will be concurrently sent into the container (default: 1, max allowed: 100) + queue_timeout_seconds: # maximum amount of time a request can be queued before beginning to be processed containers: # configurations for the containers to run (at least one constainer must be provided) - name: # name of the container (required) image: # docker image to use for the container (required) diff --git a/docs/workloads/realtime/configuration.md b/docs/workloads/realtime/configuration.md index 6608f6de34..95f08a31cb 100644 --- a/docs/workloads/realtime/configuration.md +++ b/docs/workloads/realtime/configuration.md @@ -7,7 +7,7 @@ port: # port to which requests will be sent (default: 8080; exported as $CORTEX_PORT) max_concurrency: # maximum number of requests that will be concurrently sent into the container (default: 1) max_queue_length: # maximum number of requests per replica which will be queued (beyond max_concurrency) before requests are rejected with error code 503 (default: 100) - max_queued_seconds: + queue_timeout_seconds: # maximum amount of time a request can be queued before beginning to be processed containers: # configurations for the containers to run (at least one constainer must be provided) - name: # name of the container (required) image: # docker image to use for the container (required)