Skip to content

Commit

Permalink
Merge pull request #286 from buildkite/feat_add_max_idle_connections
Browse files Browse the repository at this point in the history
This change enables configuration of http connection pooling
  • Loading branch information
wolfeidau authored Jun 5, 2024
2 parents 861b62b + 0e08a5d commit 4a430c8
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 35 deletions.
50 changes: 30 additions & 20 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,12 @@ It requires a `provided.al2` environment and respects the following env vars:
`Key=Value,Other=Value` containing the Cloudwatch dimensions to index metrics
under.

To adjust timeouts, and connection pooling in the HTTP client use the following env vars:

- `BUILDKITE_AGENT_METRICS_TIMEOUT` : Timeout, in seconds, TLS handshake and idle connections, for HTTP requests, to Buildkite API (default 15).
- `BUILDKITE_AGENT_METRICS_MAX_IDLE_CONNS` : Maximum number of idle (keep-alive) HTTP connections
for Buildkite Agent API. Zero means no limit, -1 disables pooling (default 100).

Additionally, one of the following groups of environment variables must be set
in order to define how the Lambda function should obtain the required Buildkite
Agent API token:
Expand Down Expand Up @@ -140,43 +146,47 @@ docker run --rm buildkite-agent-metrics -token abc123 -interval 30s -queue my-qu
$ buildkite-agent-metrics --help
Usage of buildkite-agent-metrics:
-backend string
Specify the backend to use: cloudwatch, statsd, prometheus, stackdriver (default "cloudwatch")
Specify the backend to use: cloudwatch, newrelic, prometheus, stackdriver, statsd (default "cloudwatch")
-cloudwatch-dimensions string
Cloudwatch dimensions to index metrics under, in the form of Key=Value, Other=Value
Cloudwatch dimensions to index metrics under, in the form of Key=Value, Other=Value
-cloudwatch-region string
AWS Region to connect to, defaults to $AWS_REGION or us-east-1
AWS Region to connect to, defaults to $AWS_REGION or us-east-1
-debug
Show debug output
Show debug output
-debug-http
Show full http traces
Show full http traces
-dry-run
Whether to only print metrics
Whether to only print metrics
-endpoint string
A custom Buildkite Agent API endpoint (default "https://agent.buildkite.com/v3")
A custom Buildkite Agent API endpoint (default "https://agent.buildkite.com/v3")
-interval duration
Update metrics every interval, rather than once
Update metrics every interval, rather than once
-max-idle-conns int
Maximum number of idle (keep-alive) HTTP connections for Buildkite Agent API. Zero means no limit, -1 disables connection reuse. (default 100)
-newrelic-app-name string
New Relic application name for metric events
New Relic application name for metric events
-newrelic-license-key string
New Relic license key for publishing events
New Relic license key for publishing events
-prometheus-addr string
Prometheus metrics transport bind address (default ":8080")
Prometheus metrics transport bind address (default ":8080")
-prometheus-path string
Prometheus metrics transport path (default "/metrics")
Prometheus metrics transport path (default "/metrics")
-queue value
Specific queues to process
Specific queues to process
-quiet
Only print errors
Only print errors
-stackdriver-projectid string
Specify Stackdriver Project ID
Specify Stackdriver Project ID
-statsd-host string
Specify the StatsD server (default "127.0.0.1:8125")
Specify the StatsD server (default "127.0.0.1:8125")
-statsd-tags
Whether your StatsD server supports tagging like Datadog
-token string
A Buildkite Agent Registration Token
Whether your StatsD server supports tagging like Datadog
-timeout int
Timeout, in seconds, for HTTP requests to Buildkite API (default 15)
-token value
Buildkite Agent registration tokens. At least one is required. Multiple cluster tokens can be used to gather metrics for multiple clusters.
-version
Show the version
Show the version
```

### Backends
Expand Down
9 changes: 8 additions & 1 deletion collector/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -407,18 +407,25 @@ func traceHTTPRequest(req *http.Request) *http.Request {
return req
}

func NewHTTPClient(timeout int) *http.Client {
func NewHTTPClient(timeout, maxIdleConns int) *http.Client {

connectionTimeout := time.Duration(timeout) * time.Second

return &http.Client{
Timeout: connectionTimeout,
Transport: &http.Transport{
MaxIdleConns: maxIdleConns,
IdleConnTimeout: connectionTimeout,
ResponseHeaderTimeout: connectionTimeout,
DisableKeepAlives: false,
Dial: (&net.Dialer{
Timeout: connectionTimeout,
KeepAlive: connectionTimeout,
}).Dial,
TLSHandshakeTimeout: connectionTimeout,
},
CheckRedirect: func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
},
}
}
19 changes: 14 additions & 5 deletions lambda/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ func Handler(ctx context.Context, evt json.RawMessage) (string, error) {
quietString := os.Getenv("BUILDKITE_QUIET")
quiet := quietString == "1" || quietString == "true"
timeout := os.Getenv("BUILDKITE_AGENT_METRICS_TIMEOUT")
maxIdleConns := os.Getenv("BUILDKITE_AGENT_METRICS_MAX_IDLE_CONNS")

debugEnvVar := os.Getenv("BUILDKITE_AGENT_METRICS_DEBUG")
debug := debugEnvVar == "1" || debugEnvVar == "true"
Expand Down Expand Up @@ -94,17 +95,17 @@ func Handler(ctx context.Context, evt json.RawMessage) (string, error) {
queues = strings.Split(queue, ",")
}

if timeout == "" {
timeout = "15"
configuredTimeout, err := toIntWithDefault(timeout, 15)
if err != nil {
return "", err
}

configuredTimeout, err := strconv.Atoi(timeout)

configuredMaxIdleConns, err := toIntWithDefault(maxIdleConns, 100) // Default to 100 in line with http.DefaultTransport
if err != nil {
return "", err
}

httpClient := collector.NewHTTPClient(configuredTimeout)
httpClient := collector.NewHTTPClient(configuredTimeout, configuredMaxIdleConns)

userAgent := fmt.Sprintf("buildkite-agent-metrics/%s buildkite-agent-metrics-lambda", version.Version)

Expand Down Expand Up @@ -274,3 +275,11 @@ func checkMutuallyExclusiveEnvVars(varNames ...string) error {
return fmt.Errorf("the environment variables [%s] are mutually exclusive", strings.Join(foundVars, ","))
}
}

func toIntWithDefault(val string, defaultVal int) (int, error) {
if val == "" {
return defaultVal, nil
}

return strconv.Atoi(val)
}
44 changes: 44 additions & 0 deletions lambda/main_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package main

import "testing"

func Test_toIntWithDefault(t *testing.T) {
type args struct {
val string
defaultVal int
}
tests := []struct {
name string
args args
want int
wantErr bool
}{
{
name: "empty",
args: args{val: "", defaultVal: 10},
want: 10,
},
{
name: "invalid",
args: args{val: "invalid", defaultVal: 10},
wantErr: true,
},
{
name: "valid",
args: args{val: "20", defaultVal: 10},
want: 20,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := toIntWithDefault(tt.args.val, tt.args.defaultVal)
if (err != nil) != tt.wantErr {
t.Errorf("toIntWithDefault(%q, %d) error = %v, wantErr %v", tt.args.val, tt.args.defaultVal, err, tt.wantErr)
return
}
if got != tt.want {
t.Errorf("toIntWithDefault(%q, %d) = %v, want %v", tt.args.val, tt.args.defaultVal, got, tt.want)
}
})
}
}
19 changes: 10 additions & 9 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,15 @@ var metricsBackend backend.Backend

func main() {
var (
interval = flag.Duration("interval", 0, "Update metrics every interval, rather than once")
showVersion = flag.Bool("version", false, "Show the version")
quiet = flag.Bool("quiet", false, "Only print errors")
debug = flag.Bool("debug", false, "Show debug output")
debugHttp = flag.Bool("debug-http", false, "Show full http traces")
dryRun = flag.Bool("dry-run", false, "Whether to only print metrics")
endpoint = flag.String("endpoint", "https://agent.buildkite.com/v3", "A custom Buildkite Agent API endpoint")
timeout = flag.Int("timeout", 15, "Timeout, in seconds, for HTTP requests to Buildkite API")
interval = flag.Duration("interval", 0, "Update metrics every interval, rather than once")
showVersion = flag.Bool("version", false, "Show the version")
quiet = flag.Bool("quiet", false, "Only print errors")
debug = flag.Bool("debug", false, "Show debug output")
debugHttp = flag.Bool("debug-http", false, "Show full http traces")
dryRun = flag.Bool("dry-run", false, "Whether to only print metrics")
endpoint = flag.String("endpoint", "https://agent.buildkite.com/v3", "A custom Buildkite Agent API endpoint")
timeout = flag.Int("timeout", 15, "Timeout, in seconds, TLS handshake and idle connections, for HTTP requests, to Buildkite API")
maxIdleConns = flag.Int("max-idle-conns", 100, "Maximum number of idle (keep-alive) HTTP connections for Buildkite Agent API. Zero means no limit, -1 disables connection reuse.")

// backend config
backendOpt = flag.String("backend", "cloudwatch", "Specify the backend to use: cloudwatch, newrelic, prometheus, stackdriver, statsd")
Expand Down Expand Up @@ -141,7 +142,7 @@ func main() {
}
}

httpClient := collector.NewHTTPClient(*timeout)
httpClient := collector.NewHTTPClient(*timeout, *maxIdleConns)

collectors := make([]*collector.Collector, 0, len(tokens))
for _, token := range tokens {
Expand Down

0 comments on commit 4a430c8

Please sign in to comment.