From 8e9713320d62c107e301bd44501e09498ce7bde2 Mon Sep 17 00:00:00 2001 From: Achilleas Triantafyllou Date: Tue, 12 Apr 2022 20:34:55 +0300 Subject: [PATCH] Add support for LoadBalancing metrics (#60) * git: Add gitignore file * feat: Export LoadBalancer analytics metrics This commit enables support for exporting LoadBalancer analytics metrics. LoadBalancer's pool health is reported: * 1 -> health * 0 -> unhealthy as long as the number of requests each pool receives from each CFL network location[1] 1. https://developers.cloudflare.com/load-balancing/reference/load-balancing-analytics/#graphql-analytics * feat: Export browser map page views count Export browser_map_page_views_count requests metric --- .gitignore | 25 ++++++++++++++++++ README.md | 2 ++ cloudflare.go | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++ main.go | 1 + prometheus.go | 56 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 156 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cf863e7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,25 @@ +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, built with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# Local environment +.envrc + +# Bin directory +bin/ + +# Dependency directories (remove the comment below to include it) +vendor/ + +# editors +*.swp + diff --git a/README.md b/README.md index 2123f90..acea378 100644 --- a/README.md +++ b/README.md @@ -86,10 +86,12 @@ Note: `ZONE_` configuration is not supported as flag. # HELP cloudflare_zone_requests_ssl_encrypted Number of encrypted requests for zone # HELP cloudflare_zone_requests_status Number of request for zone per HTTP status # HELP cloudflare_zone_requests_status_country_host Count of requests for zone per edge HTTP status per country per host +# HELP cloudflare_zone_requests_browser_map_page_views_count Number of successful requests for HTML pages per zone # HELP cloudflare_zone_requests_total Number of requests for zone # HELP cloudflare_zone_threats_country Threats per zone per country # HELP cloudflare_zone_threats_total Threats per zone # HELP cloudflare_zone_uniques_total Uniques per zone +# HELP cloudflare_zone_pool_health_status Reports the health of a pool, 1 for healthy, 0 for unhealthy. ``` ## Helm chart repository diff --git a/cloudflare.go b/cloudflare.go index 41ecb41..de30b44 100644 --- a/cloudflare.go +++ b/cloudflare.go @@ -31,6 +31,12 @@ type cloudflareResponseColo struct { } `json:"viewer"` } +type cloudflareResponseLb struct { + Viewer struct { + Zones []lbResp `json:"zones"` + } `json:"viewer"` +} + type accountResp struct { WorkersInvocationsAdaptive []struct { Dimensions struct { @@ -172,6 +178,22 @@ type zoneResp struct { ZoneTag string `json:"zoneTag"` } +type lbResp struct { + LoadBalancingRequestsAdaptiveGroups []struct { + Count uint64 `json:"count"` + Dimensions struct { + ColoCode string `json:"coloCode"` + LbName string `json:"lbName"` + Region string `json:"region"` + SelectedOriginName string `json:"selectedOriginName"` + SelectedPoolHealthy int `json:"selectedPoolHealthy"` + SelectedPoolName string `json:"selectedPoolName"` + SteeringPolicy string `json:"steeringPolicy"` + } `json:"dimensions"` + } `json:"loadBalancingRequestsAdaptiveGroups"` + ZoneTag string `json:"zoneTag"` +} + func fetchZones() []cloudflare.Zone { var api *cloudflare.API var err error @@ -453,6 +475,56 @@ func fetchWorkerTotals(accountID string) (*cloudflareResponseAccts, error) { return &resp, nil } +func fetchLoadBalancerTotals(zoneIDs []string) (*cloudflareResponseLb, error) { + now := time.Now().Add(-time.Duration(cfgScrapeDelay) * time.Second).UTC() + s := 60 * time.Second + now = now.Truncate(s) + now1mAgo := now.Add(-60 * time.Second) + + request := graphql.NewRequest(` + query ($zoneIDs: [String!], $mintime: Time!, $maxtime: Time!, $limit: Int!) { + viewer { + zones(filter: { zoneTag_in: $zoneIDs }) { + zoneTag + loadBalancingRequestsAdaptiveGroups( + filter: { datetime_geq: $mintime, datetime_lt: $maxtime}, + limit: $limit) { + count + dimensions { + coloCode + region + lbName + selectedPoolName + selectedOriginName + selectedPoolHealthy + steeringPolicy + } + } + } + } + } +`) + if len(cfgCfAPIToken) > 0 { + request.Header.Set("Authorization", "Bearer "+cfgCfAPIToken) + } else { + request.Header.Set("X-AUTH-EMAIL", cfgCfAPIEmail) + request.Header.Set("X-AUTH-KEY", cfgCfAPIKey) + } + request.Var("limit", 9999) + request.Var("maxtime", now) + request.Var("mintime", now1mAgo) + request.Var("zoneIDs", zoneIDs) + + ctx := context.Background() + graphqlClient := graphql.NewClient(cfGraphQLEndpoint) + var resp cloudflareResponseLb + if err := graphqlClient.Run(ctx, request, &resp); err != nil { + log.Error(err) + return nil, err + } + return &resp, nil +} + func findZoneName(zones []cloudflare.Zone, ID string) string { for _, z := range zones { if z.ID == ID { diff --git a/main.go b/main.go index 565dbfd..ec92bc5 100644 --- a/main.go +++ b/main.go @@ -84,6 +84,7 @@ func fetchMetrics() { go fetchZoneAnalytics(targetZones, &wg) go fetchZoneColocationAnalytics(targetZones, &wg) + go fetchLoadBalancerAnalytics(targetZones, &wg) } wg.Wait() diff --git a/prometheus.go b/prometheus.go index 8dce3a8..5a337c2 100644 --- a/prometheus.go +++ b/prometheus.go @@ -48,6 +48,12 @@ var ( }, []string{"zone", "status"}, ) + zoneRequestBrowserMap = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "cloudflare_zone_requests_browser_map_page_views_count", + Help: "Number of successful requests for HTML pages per zone", + }, []string{"zone", "family"}, + ) + zoneRequestOriginStatusCountryHost = promauto.NewCounterVec(prometheus.CounterOpts{ Name: "cloudflare_zone_requests_origin_status_country_host", Help: "Count of not cached requests for zone per origin HTTP status per country per host", @@ -167,6 +173,14 @@ var ( Help: "Duration quantiles by script name (GB*s)", }, []string{"script_name", "quantile"}, ) + + poolHealthStatus = promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "cloudflare_zone_pool_health_status", + Help: "Reports the health of a pool, 1 for healthy, 0 for unhealthy.", + }, + []string{"zone", "colo_code", "load_balancer_name", "origin_name", "steering_policy", "pool_name", "region"}, + ) ) func fetchWorkerAnalytics(account cloudflare.Account, wg *sync.WaitGroup) { @@ -274,6 +288,10 @@ func addHTTPGroups(z *zoneResp, name string) { zoneRequestHTTPStatus.With(prometheus.Labels{"zone": name, "status": strconv.Itoa(status.EdgeResponseStatus)}).Add(float64(status.Requests)) } + for _, browser := range zt.Sum.BrowserMap { + zoneRequestBrowserMap.With(prometheus.Labels{"zone": name, "family": browser.UaBrowserFamily}).Add(float64(browser.PageViews)) + } + zoneBandwidthTotal.With(prometheus.Labels{"zone": name}).Add(float64(zt.Sum.Bytes)) zoneBandwidthCached.With(prometheus.Labels{"zone": name}).Add(float64(zt.Sum.CachedBytes)) zoneBandwidthSSLEncrypted.With(prometheus.Labels{"zone": name}).Add(float64(zt.Sum.EncryptedBytes)) @@ -348,3 +366,41 @@ func addHTTPAdaptiveGroups(z *zoneResp, name string) { } } + +func fetchLoadBalancerAnalytics(zones []cloudflare.Zone, wg *sync.WaitGroup) { + wg.Add(1) + defer wg.Done() + + // None of the below referenced metrics are available in the free tier + if cfgFreeTier { + return + } + + zoneIDs := extractZoneIDs(zones) + + l, err := fetchLoadBalancerTotals(zoneIDs) + if err != nil { + return + } + for _, lb := range l.Viewer.Zones { + name := findZoneName(zones, lb.ZoneTag) + addLoadBalancingRequestsAdaptiveGroups(&lb, name) + } +} + +func addLoadBalancingRequestsAdaptiveGroups(z *lbResp, name string) { + + for _, g := range z.LoadBalancingRequestsAdaptiveGroups { + poolHealthStatus.With( + prometheus.Labels{ + "zone": name, + "colo_code": g.Dimensions.ColoCode, + "load_balancer_name": g.Dimensions.LbName, + "origin_name": g.Dimensions.SelectedOriginName, + "steering_policy": g.Dimensions.SteeringPolicy, + "pool_name": g.Dimensions.SelectedPoolName, + "region": g.Dimensions.Region, + }).Set(float64(g.Dimensions.SelectedPoolHealthy)) + } + +}