From 734bf3a42ac2dab7b39ed4023608cc1d8b52b642 Mon Sep 17 00:00:00 2001 From: Flavien David Date: Thu, 23 May 2024 18:52:47 +0200 Subject: [PATCH] Qdrant cluster monitor (#5252) * Monitor more metrics on Qdrant cluster * :sparkles: --- alerting/temporal/src/qdrant/index.ts | 49 ++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 4 deletions(-) diff --git a/alerting/temporal/src/qdrant/index.ts b/alerting/temporal/src/qdrant/index.ts index 784e1ae9f08a..899224d1b21e 100644 --- a/alerting/temporal/src/qdrant/index.ts +++ b/alerting/temporal/src/qdrant/index.ts @@ -2,12 +2,35 @@ import axios from "axios"; import { client, v2 } from "@datadog/datadog-api-client"; import assert from "assert"; +import { + COUNT, + GAUGE, +} from "@datadog/datadog-api-client/dist/packages/datadog-api-client-v2/models/MetricIntakeType"; const { QDRANT_CLUSTERS, QDRANT_MONITORING_API_KEY } = process.env; assert(QDRANT_CLUSTERS, "QDRANT_CLUSTERS is not set."); assert(QDRANT_MONITORING_API_KEY, "QDRANT_MONITORING_API_KEY is not set."); +const QDRANT_METRICS_TO_WATCH: Record< + "count_metrics" | "gauge_metrics", + ReadonlyArray +> = { + count_metrics: ["app_info"], + gauge_metrics: [ + "cluster_peers_total", + "collections_total", + "collections_vector_total", + "grpc_responses_avg_duration_seconds", + "grpc_responses_fail_total", + "grpc_responses_max_duration_seconds", + "grpc_responses_min_duration_seconds", + "rest_responses_avg_duration_seconds", + "rest_responses_max_duration_seconds", + "rest_responses_min_duration_seconds", + ], +}; + // This automatically pulls API keys from env vars DD_API_KEY. const configuration = client.createConfiguration(); @@ -18,6 +41,10 @@ configuration.setServerVariables({ const datadogMetricsApi = new v2.MetricsApi(configuration); const qdrantClusters = QDRANT_CLUSTERS.split(","); +function formatMetricName(rawMetricName: string) { + return `qdrant.${rawMetricName.replace("_", ".")}`; +} + async function fetchPrometheusMetrics( clusterName: string ): Promise { @@ -35,17 +62,31 @@ async function fetchPrometheusMetrics( metricLines.forEach((line) => { const [metricName, metricValue] = line.split(" "); - if (metricName === "collections_total") { + const timestamp = Math.floor(Date.now() / 1000); + + if (QDRANT_METRICS_TO_WATCH.gauge_metrics.includes(metricName)) { metrics.push({ - metric: `qdrant.${metricName.replace("_", ".")}`, + metric: formatMetricName(metricName), points: [ { - timestamp: Math.floor(Date.now() / 1000), + timestamp, value: parseFloat(metricValue), }, ], tags: ["resource:qdrant", `cluster:${clusterName}`], - type: 3, + type: GAUGE, + }); + } else if (QDRANT_METRICS_TO_WATCH.count_metrics.includes(metricName)) { + metrics.push({ + metric: formatMetricName(metricName), + points: [ + { + timestamp, + value: parseInt(metricValue), + }, + ], + tags: ["resource:qdrant", `cluster:${clusterName}`], + type: COUNT, }); } });