From 9c2694e2c5d74ad0139000d81f38d98ea5ae7213 Mon Sep 17 00:00:00 2001 From: Pedro Ladaria Date: Wed, 4 Dec 2024 12:09:58 +0100 Subject: [PATCH] Clear gauges after scrape (#3) * WEB-2117 wipe gauges after scrape --------- Co-authored-by: Pedro Ladaria --- CONTRIBUTING.md | 64 +++++++++++++++++++++++++++++++++++++++ metrics/aggregate.go | 17 ++++++++++- metrics/aggregate_test.go | 55 ++++++++++++++++++++++++++++++--- 3 files changed, 131 insertions(+), 5 deletions(-) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..e4b0874 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,64 @@ +# Contributing + +## Install Go + +https://golang.org/doc/install + +## Install dependencies + +```sh +go mod tidy +``` + +## Run + +```sh +go run . --apiListen 127.0.0.1:8080 +``` + +## Send some metrics + +``` +echo "some_metric 3.14" | curl --data-binary @- http://127.0.0.1:8080/metrics/job/some_job + +printf "#TYPE another_metric gauge\nanother_metric 42\n" | curl --data-binary @- http://127.0.0.1:8080/metrics/job/some_job +``` + +## See your metric + +Open http://127.0.0.1:8080/metrics in your browser or use `curl`: + +```sh +curl http://127.0.0.1:8080/metrics +``` + +Expected result + +``` +# TYPE another_metric gauge +another_metric{job="some_job"} 42 +# TYPE some_metric untyped +some_metric{job="some_job"} 3.14 +``` + +## Simulate a scrape from Prometheus + +- With "Prometheus/1.0" as user-agent +- Gauges will be cleared after the scrape + +```sh +curl -H "User-Agent: Prometheus/1.0" http://127.0.0.1:8080/metrics +``` + +Returns the same as above. But if executed again, `gauge` metrics are cleared: + +``` +# TYPE some_metric untyped +some_metric{job="some_job"} 3.14 +``` + +## Run tests + +```sh +go test ./... +``` diff --git a/metrics/aggregate.go b/metrics/aggregate.go index b96399c..c2bccf1 100644 --- a/metrics/aggregate.go +++ b/metrics/aggregate.go @@ -149,7 +149,22 @@ func (a *Aggregate) HandleRender(c *gin.Context) { c.Header("Content-Type", string(contentType)) a.encodeAllMetrics(c.Writer, contentType) - // TODO reset gauges + // Remove gauge metrics after serving the response to Prometheus + userAgent := c.Request.Header.Get("User-Agent") + if strings.Contains(userAgent, "Prometheus/") { + a.removeGaugeMetrics() + } +} + +func (a *Aggregate) removeGaugeMetrics() { + a.familiesLock.Lock() + defer a.familiesLock.Unlock() + + for name, family := range a.families { + if family.GetType() == dto.MetricType_GAUGE { + delete(a.families, name) + } + } } func (a *Aggregate) encodeAllMetrics(writer io.Writer, contentType expfmt.Format) { diff --git a/metrics/aggregate_test.go b/metrics/aggregate_test.go index e0f25d1..3de0199 100644 --- a/metrics/aggregate_test.go +++ b/metrics/aggregate_test.go @@ -4,9 +4,12 @@ import ( "bytes" "context" "fmt" + "net/http" + "net/http/httptest" "strings" "testing" + "github.com/gin-gonic/gin" "github.com/pmezard/go-difflib/difflib" "github.com/prometheus/common/expfmt" "github.com/stretchr/testify/require" @@ -65,7 +68,7 @@ histogram_count 1 counter{job="test"} 60 # HELP gauge A gauge # TYPE gauge gauge -gauge{job="test"} 99 +gauge{job="test"} 57 # HELP histogram A histogram # TYPE histogram histogram histogram_bucket{job="test",le="1"} 0 @@ -118,9 +121,9 @@ ui_external_lib_loaded{name="mixpanel",loaded="true"} 1 ` gaugeOutput = `# HELP ui_external_lib_loaded A gauge with entries in un-sorted order # TYPE ui_external_lib_loaded gauge -ui_external_lib_loaded{job="test",loaded="true",name="Intercom"} 2 -ui_external_lib_loaded{job="test",loaded="true",name="ga"} 2 -ui_external_lib_loaded{job="test",loaded="true",name="mixpanel"} 2 +ui_external_lib_loaded{job="test",loaded="true",name="Intercom"} 1 +ui_external_lib_loaded{job="test",loaded="true",name="ga"} 1 +ui_external_lib_loaded{job="test",loaded="true",name="mixpanel"} 1 ` duplicateLabels = ` # HELP ui_external_lib_loaded Test with duplicate values @@ -277,3 +280,47 @@ func BenchmarkConcurrentAggregate(b *testing.B) { }) } } + +func TestRemoveGaugeMetrics(t *testing.T) { + a := NewAggregate() + + // Add some metrics to the aggregate + a.parseAndMerge(strings.NewReader(`# TYPE gauge_metric gauge +gauge_metric{label="value"} 123 +# TYPE counter_metric counter +counter_metric{label="value"} 456 +`), nil) + + // Simulate a scrape with Prometheus user-agent + req, err := http.NewRequest("GET", "/metrics", nil) + if err != nil { + t.Fatalf("unexpected error creating request: %s", err) + } + req.Header.Set("User-Agent", "Prometheus/2.0") + + // Create a response recorder + rr := httptest.NewRecorder() + + // Handle the request + router := gin.Default() + router.GET("/metrics", a.HandleRender) + router.ServeHTTP(rr, req) + + // Check the response code + if status := rr.Code; status != http.StatusOK { + t.Errorf("handler returned wrong status code: got %v want %v", status, http.StatusOK) + } + + // Check that the gauge metric has been removed + a.familiesLock.RLock() + defer a.familiesLock.RUnlock() + + if _, exists := a.families["gauge_metric"]; exists { + t.Errorf("gauge_metric was not removed") + } + + // Check that the counter metric still exists + if _, exists := a.families["counter_metric"]; !exists { + t.Errorf("counter_metric was removed but it should not have been") + } +}