diff --git a/src/main/java/io/statnett/k3a/lagexporter/ClusterLagCollector.java b/src/main/java/io/statnett/k3a/lagexporter/ClusterLagCollector.java index fee5c61..63da790 100644 --- a/src/main/java/io/statnett/k3a/lagexporter/ClusterLagCollector.java +++ b/src/main/java/io/statnett/k3a/lagexporter/ClusterLagCollector.java @@ -41,16 +41,20 @@ public ClusterLagCollector(final String clusterName, } public ClusterData collectClusterData() { - final boolean clientConnected = client.isConnected(); + final boolean clientWasAlreadyConnected = client.isConnected(); final long startMs = System.currentTimeMillis(); final Set allConsumerGroupIds = client.consumerGroupIds(consumerGroupFilter); final Map> groupOffsets = findConsumerGroupOffsets(allConsumerGroupIds); final Map topicPartitionData = findTopicPartitionData(groupOffsets.keySet()); final Map> topicAndConsumerData = calculateLag(groupOffsets, topicPartitionData); final long pollTimeMs; - if (clientConnected) { + if (clientWasAlreadyConnected) { pollTimeMs = System.currentTimeMillis() - startMs; } else { + /* The first cluster data collection after connecting takes an order of magnitude longer + * than the runs that come after. If we publish this metric, graphs in Prometheus will + * be scaled to a range that makes it impossible to see variations in the actual + * collection times. Thus, clamp it down. */ pollTimeMs = -1; } final ClusterData clusterData = new ClusterData(clusterName, topicAndConsumerData, pollTimeMs);