Skip to content

Commit

Permalink
feat: add api http metrics
Browse files Browse the repository at this point in the history
- add api http metrics for requests and latency
- set `version="v2"` label on all api metrics
- update existing metric help strings for distinction (api vs. web)

Signed-off-by: Siavash Safi <[email protected]>
  • Loading branch information
siavashs committed Dec 17, 2024
1 parent 0d28327 commit e475c70
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 11 deletions.
65 changes: 56 additions & 9 deletions api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@ import (
"log/slog"
"net/http"
"runtime"
"strings"
"time"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/prometheus/common/route"
Expand All @@ -40,6 +42,8 @@ type API struct {
v2 *apiv2.API
deprecationRouter *V1DeprecationRouter

requests *prometheus.CounterVec
latency *prometheus.HistogramVec
requestsInFlight prometheus.Gauge
concurrencyLimitExceeded prometheus.Counter
timeout time.Duration
Expand Down Expand Up @@ -132,19 +136,39 @@ func New(opts Options) (*API, error) {
return nil, err
}

latency := prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "alertmanager_api_http_request_duration_seconds",
Help: "Histogram of latencies for api HTTP requests.",
ConstLabels: prometheus.Labels{"version": "v2"},
Buckets: []float64{.05, 0.1, .25, .5, .75, 1, 2, 5, 10, 20},
},
[]string{"code", "handler", "method"},
)
receivedRequests := prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "alertmanager_api_http_requests_received_total",
Help: "The total number of received api HTTP requests.",
ConstLabels: prometheus.Labels{"version": "v2"},
}, []string{"code", "handler", "method"})
// TODO(beorn7): For now, this hardcodes the method="get" label. Other
// methods should get the same instrumentation.
requestsInFlight := prometheus.NewGauge(prometheus.GaugeOpts{
Name: "alertmanager_http_requests_in_flight",
Help: "Current number of HTTP requests being processed.",
ConstLabels: prometheus.Labels{"method": "get"},
Help: "Current number of api HTTP requests being processed.",
ConstLabels: prometheus.Labels{"method": "get", "version": "v2"},
})
concurrencyLimitExceeded := prometheus.NewCounter(prometheus.CounterOpts{
Name: "alertmanager_http_concurrency_limit_exceeded_total",
Help: "Total number of times an HTTP request failed because the concurrency limit was reached.",
ConstLabels: prometheus.Labels{"method": "get"},
Help: "Total number of times an api HTTP request failed because the concurrency limit was reached.",
ConstLabels: prometheus.Labels{"method": "get", "version": "v2"},
})
if opts.Registry != nil {
if err := opts.Registry.Register(receivedRequests); err != nil {
return nil, err
}
if err := opts.Registry.Register(latency); err != nil {
return nil, err
}
if err := opts.Registry.Register(requestsInFlight); err != nil {
return nil, err
}
Expand All @@ -156,6 +180,8 @@ func New(opts Options) (*API, error) {
return &API{
deprecationRouter: NewV1DeprecationRouter(l.With("version", "v1")),
v2: v2,
requests: receivedRequests,
latency: latency,
requestsInFlight: requestsInFlight,
concurrencyLimitExceeded: concurrencyLimitExceeded,
timeout: opts.Timeout,
Expand All @@ -181,13 +207,17 @@ func (api *API) Register(r *route.Router, routePrefix string) *http.ServeMux {
if routePrefix != "/" {
apiPrefix = routePrefix
}
// TODO(beorn7): HTTP instrumentation is only in place for Router. Since
// /api/v2 works on the Handler level, it is currently not instrumented
// at all (with the exception of requestsInFlight, which is handled in
// limitHandler below).
mux.Handle(
apiPrefix+"/api/v2/",
api.limitHandler(http.StripPrefix(apiPrefix, api.v2.Handler)),
api.instrumentHandler(
apiPrefix,
api.limitHandler(
http.StripPrefix(
apiPrefix,
api.v2.Handler,
),
),
),
)

return mux
Expand Down Expand Up @@ -226,3 +256,20 @@ func (api *API) limitHandler(h http.Handler) http.Handler {
"Exceeded configured timeout of %v.\n", api.timeout,
))
}

func (api *API) instrumentHandler(prefix string, h http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
path, _ := strings.CutPrefix(r.URL.Path, prefix)
// avoid high cardinality label values by replacing the actual silence IDs with a placeholder
if strings.HasPrefix(path, "/api/v2/silence/") {
path = "/api/v2/silence/{silenceID}"
}
promhttp.InstrumentHandlerDuration(
api.latency.MustCurryWith(prometheus.Labels{"handler": path}),
promhttp.InstrumentHandlerCounter(
api.requests.MustCurryWith(prometheus.Labels{"handler": path}),
h,
),
).ServeHTTP(w, r)
})
}
4 changes: 2 additions & 2 deletions cmd/alertmanager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ var (
requestDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "alertmanager_http_request_duration_seconds",
Help: "Histogram of latencies for HTTP requests.",
Help: "Histogram of latencies for web HTTP requests.",
Buckets: []float64{.05, 0.1, .25, .5, .75, 1, 2, 5, 20, 60},
NativeHistogramBucketFactor: 1.1,
NativeHistogramMaxBucketNumber: 100,
Expand All @@ -78,7 +78,7 @@ var (
responseSize = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "alertmanager_http_response_size_bytes",
Help: "Histogram of response size for HTTP requests.",
Help: "Histogram of response size for web HTTP requests.",
Buckets: prometheus.ExponentialBuckets(100, 10, 7),
},
[]string{"handler", "method"},
Expand Down

0 comments on commit e475c70

Please sign in to comment.