From 86ae1ba9cae47c18fe238bb9c10d536c31aa0aff Mon Sep 17 00:00:00 2001 From: lewismiddleton <66401219+lewismiddleton@users.noreply.github.com> Date: Thu, 11 Jul 2024 15:41:00 +0100 Subject: [PATCH] api: add metrics endpoint (#630) Adds a metrics endpoint that computes updated metric values on each request. On each request we: - fetch runner groups from Redis - get all runners for each group - update a Gauge for the runners_count on each group - render out the prometheus metrics in standard format Resolves #625 --- poetry.lock | 18 ++++++++- pyproject.toml | 1 + runner_manager/main.py | 10 ++++- runner_manager/routers/metrics.py | 22 +++++++++++ tests/api/test_metrics.py | 65 +++++++++++++++++++++++++++++++ 5 files changed, 113 insertions(+), 3 deletions(-) create mode 100644 runner_manager/routers/metrics.py create mode 100644 tests/api/test_metrics.py diff --git a/poetry.lock b/poetry.lock index d059fb19..371c6bc0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "anyio" @@ -2039,6 +2039,20 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "prometheus-client" +version = "0.20.0" +description = "Python client for the Prometheus monitoring system." +optional = false +python-versions = ">=3.8" +files = [ + {file = "prometheus_client-0.20.0-py3-none-any.whl", hash = "sha256:cde524a85bce83ca359cc837f28b8c0db5cac7aa653a588fd7e84ba061c329e7"}, + {file = "prometheus_client-0.20.0.tar.gz", hash = "sha256:287629d00b147a32dcb2be0b9df905da599b2d82f80377083ec8463309a4bb89"}, +] + +[package.extras] +twisted = ["twisted"] + [[package]] name = "proto-plus" version = "1.23.0" @@ -3454,4 +3468,4 @@ dev = ["doc8", "flake8", "flake8-import-order", "rstcheck[sphinx]", "sphinx"] [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "7501cd3e6328fc7532cfbfa041d47d561c1fcc911f8329f208e98e9a4ad9be98" +content-hash = "5325a4e0733da3dd5eaf698284abbb3665247697ecd8518cb570a5ffef951b85" diff --git a/pyproject.toml b/pyproject.toml index 6888928d..7e693f81 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,7 @@ rq-scheduler = "^0.13.1" pyvmomi = "^8.0.2.0.1" vapi-runtime = { url = "https://raw.githubusercontent.com/vmware/vsphere-automation-sdk-python/v8.0.1.0/lib/vapi-runtime/vapi_runtime-2.40.0-py2.py3-none-any.whl" } vcenter-bindings = { url = "https://raw.githubusercontent.com/vmware/vsphere-automation-sdk-python/v8.0.1.0/lib/vcenter-bindings/vcenter_bindings-4.1.0-py2.py3-none-any.whl" } +prometheus-client = "^0.20.0" [tool.poetry.group.docs] diff --git a/runner_manager/main.py b/runner_manager/main.py index e4b15e59..808be460 100644 --- a/runner_manager/main.py +++ b/runner_manager/main.py @@ -8,7 +8,14 @@ from runner_manager import Runner, RunnerGroup, Settings, log from runner_manager.dependencies import get_queue, get_redis, get_settings from runner_manager.jobs.startup import startup -from runner_manager.routers import _health, private, public, runner_groups, webhook +from runner_manager.routers import ( + _health, + metrics, + private, + public, + runner_groups, + webhook, +) settings = get_settings() log.setLevel(settings.log_level) @@ -39,6 +46,7 @@ async def lifespan(app: FastAPI): app.include_router(private.router) app.include_router(public.router) app.include_router(runner_groups.router) +app.include_router(metrics.router) @app.get("/") diff --git a/runner_manager/routers/metrics.py b/runner_manager/routers/metrics.py new file mode 100644 index 00000000..78feb34b --- /dev/null +++ b/runner_manager/routers/metrics.py @@ -0,0 +1,22 @@ +from typing import List + +from fastapi import APIRouter +from fastapi.responses import PlainTextResponse +from prometheus_client import Gauge, generate_latest + +from runner_manager import RunnerGroup +from runner_manager.models.runner import Runner + +router = APIRouter(prefix="/metrics") + +runners_count = Gauge("runners_count", "Number of runners", ["runner_group"]) + + +@router.get("/", response_class=PlainTextResponse) +def compute_metrics() -> PlainTextResponse: + groups: List[RunnerGroup] = RunnerGroup.find().all() + for group in groups: + runners: List[Runner] = group.get_runners() + runners_count.labels(runner_group=group.name).set(len(runners)) + metrics = generate_latest().decode() + return PlainTextResponse(content=metrics) diff --git a/tests/api/test_metrics.py b/tests/api/test_metrics.py new file mode 100644 index 00000000..b3eb367b --- /dev/null +++ b/tests/api/test_metrics.py @@ -0,0 +1,65 @@ +from fastapi.testclient import TestClient + +from runner_manager import RunnerGroup +from runner_manager.clients.github import GitHub + + +def test_metrics_endpoint(client: TestClient, runner_group: RunnerGroup): + runner_group.save() + response = client.get("/metrics") + assert response.status_code == 200 + runner_lines = [ + line for line in response.text.splitlines() if line.startswith("runners_") + ] + assert f'runners_count{{runner_group="{runner_group.name}"}} 0.0' in runner_lines + + +def test_runner_count(client: TestClient, runner_group: RunnerGroup, github: GitHub): + runner_group.save() + want = len(runner_group.get_runners()) + + response = client.get("/metrics") + assert response.status_code == 200 + got = [line for line in response.text.splitlines() if line.startswith("runners_")] + print(want) + assert f'runners_count{{runner_group="{runner_group.name}"}} {want:.1f}' in got + + runner_group.max = 2 + runner_group.min = 1 + runner_group.save() + + want = 0.0 + response = client.get("/metrics") + assert response.status_code == 200 + before = [ + line for line in response.text.splitlines() if line.startswith("runners_") + ] + assert f'runners_count{{runner_group="{runner_group.name}"}} {want:.1f}' in before + + runner = runner_group.create_runner(github) + assert runner is not None + + want = 1.0 + response = client.get("/metrics") + assert response.status_code == 200 + after_create = [ + line for line in response.text.splitlines() if line.startswith("runners_") + ] + assert ( + f'runners_count{{runner_group="{runner_group.name}"}} {want:.1f}' + in after_create + ) + + runner = runner_group.delete_runner(runner, github) + assert runner is not None + + want = 0.0 + response = client.get("/metrics") + assert response.status_code == 200 + after_delete = [ + line for line in response.text.splitlines() if line.startswith("runners_") + ] + assert ( + f'runners_count{{runner_group="{runner_group.name}"}} {want:.1f}' + in after_delete + )