diff --git a/docs/operations/metrics.md b/docs/operations/metrics.md index 95099406f282..c8918e35170f 100644 --- a/docs/operations/metrics.md +++ b/docs/operations/metrics.md @@ -338,7 +338,7 @@ These metrics are for the Druid Coordinator and are reset each time the Coordina |Metric|Description|Dimensions|Normal value| |------|-----------|----------|------------| -| `service/heartbeat` | Metric indicating the service is up. `ServiceStatusMonitor` must be enabled. |`leader` on the Overlord and Coordinator.|1| +| `service/heartbeat` | Metric indicating the service is up. `ServiceStatusMonitor` must be enabled. | `leader` on the Overlord and Coordinator.
`workerVersion`, `category`, `status` on the Middle Manager.
`taskId`, `groupId`, `taskType`, `dataSource` on the Peon |1| ### Historical diff --git a/extensions-contrib/statsd-emitter/src/main/resources/defaultMetricDimensions.json b/extensions-contrib/statsd-emitter/src/main/resources/defaultMetricDimensions.json index a7bdfbf3e585..9b134cc54218 100644 --- a/extensions-contrib/statsd-emitter/src/main/resources/defaultMetricDimensions.json +++ b/extensions-contrib/statsd-emitter/src/main/resources/defaultMetricDimensions.json @@ -175,5 +175,5 @@ "namespace/cache/numEntries" : { "dimensions" : [], "type" : "gauge" }, "namespace/cache/heapSizeInBytes" : { "dimensions" : [], "type" : "gauge" }, - "service/heartbeat" : { "dimensions" : ["leader"], "type" : "gauge" } + "service/heartbeat" : { "dimensions" : ["leader"], "type" : "count" } } diff --git a/processing/src/main/java/org/apache/druid/query/DruidMetrics.java b/processing/src/main/java/org/apache/druid/query/DruidMetrics.java index 3dc8685fc002..f85dd0016005 100644 --- a/processing/src/main/java/org/apache/druid/query/DruidMetrics.java +++ b/processing/src/main/java/org/apache/druid/query/DruidMetrics.java @@ -49,6 +49,9 @@ public class DruidMetrics public static final String TAGS = "tags"; + public static final String CATEGORY = "category"; + public static final String WORKER_VERSION = "workerVersion"; + public static int findNumComplexAggs(List aggs) { int retVal = 0; diff --git a/server/src/main/java/org/apache/druid/server/metrics/ServiceStatusMonitor.java b/server/src/main/java/org/apache/druid/server/metrics/ServiceStatusMonitor.java index d56bf76ec43c..ad87c1bcb638 100644 --- a/server/src/main/java/org/apache/druid/server/metrics/ServiceStatusMonitor.java +++ b/server/src/main/java/org/apache/druid/server/metrics/ServiceStatusMonitor.java @@ -33,8 +33,12 @@ */ public class ServiceStatusMonitor extends AbstractMonitor { + /** + * The named binding for tags that should be reported with the `service/heartbeat` metric. + */ + public static final String HEARTBEAT_TAGS_BINDING = "heartbeat"; - @Named("heartbeat") + @Named(HEARTBEAT_TAGS_BINDING) @Inject(optional = true) Supplier> heartbeatTagsSupplier = null; @@ -43,9 +47,7 @@ public boolean doMonitor(ServiceEmitter emitter) { final ServiceMetricEvent.Builder builder = new ServiceMetricEvent.Builder(); if (heartbeatTagsSupplier != null && heartbeatTagsSupplier.get() != null) { - heartbeatTagsSupplier.get().forEach((k, v) -> { - builder.setDimension(k, v); - }); + heartbeatTagsSupplier.get().forEach(builder::setDimension); } emitter.emit(builder.build("service/heartbeat", 1)); diff --git a/server/src/main/java/org/apache/druid/server/metrics/WorkerTaskCountStatsMonitor.java b/server/src/main/java/org/apache/druid/server/metrics/WorkerTaskCountStatsMonitor.java index 26d23bf9aca9..a464a81a81de 100644 --- a/server/src/main/java/org/apache/druid/server/metrics/WorkerTaskCountStatsMonitor.java +++ b/server/src/main/java/org/apache/druid/server/metrics/WorkerTaskCountStatsMonitor.java @@ -26,6 +26,7 @@ import org.apache.druid.java.util.emitter.service.ServiceEmitter; import org.apache.druid.java.util.emitter.service.ServiceMetricEvent; import org.apache.druid.java.util.metrics.AbstractMonitor; +import org.apache.druid.query.DruidMetrics; import java.util.Set; @@ -71,8 +72,8 @@ private void emit(ServiceEmitter emitter, String metricName, Long value) { if (value != null) { final ServiceMetricEvent.Builder builder = new ServiceMetricEvent.Builder(); - builder.setDimension("category", workerCategory); - builder.setDimension("workerVersion", workerVersion); + builder.setDimension(DruidMetrics.CATEGORY, workerCategory); + builder.setDimension(DruidMetrics.WORKER_VERSION, workerVersion); emitter.emit(builder.build(metricName, value)); } } diff --git a/services/src/main/java/org/apache/druid/cli/CliCoordinator.java b/services/src/main/java/org/apache/druid/cli/CliCoordinator.java index 6d9be301d184..dcc8c1a95ac0 100644 --- a/services/src/main/java/org/apache/druid/cli/CliCoordinator.java +++ b/services/src/main/java/org/apache/druid/cli/CliCoordinator.java @@ -114,6 +114,7 @@ import org.apache.druid.server.initialization.jetty.JettyServerInitializer; import org.apache.druid.server.lookup.cache.LookupCoordinatorManager; import org.apache.druid.server.lookup.cache.LookupCoordinatorManagerConfig; +import org.apache.druid.server.metrics.ServiceStatusMonitor; import org.apache.druid.server.router.TieredBrokerConfig; import org.eclipse.jetty.server.Server; import org.joda.time.Duration; @@ -332,7 +333,7 @@ public void configure(Binder binder) binder.bind(RowIngestionMetersFactory.class).toProvider(Providers.of(null)); // Bind HeartbeatSupplier only when the service operates independently of Overlord. binder.bind(new TypeLiteral>>() {}) - .annotatedWith(Names.named("heartbeat")) + .annotatedWith(Names.named(ServiceStatusMonitor.HEARTBEAT_TAGS_BINDING)) .toProvider(HeartbeatSupplier.class); } diff --git a/services/src/main/java/org/apache/druid/cli/CliMiddleManager.java b/services/src/main/java/org/apache/druid/cli/CliMiddleManager.java index d5d5cba5cfcc..70aa26b7c132 100644 --- a/services/src/main/java/org/apache/druid/cli/CliMiddleManager.java +++ b/services/src/main/java/org/apache/druid/cli/CliMiddleManager.java @@ -20,7 +20,9 @@ package org.apache.druid.cli; import com.github.rvesse.airline.annotations.Command; +import com.google.common.base.Supplier; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.inject.Binder; import com.google.inject.Inject; @@ -28,6 +30,7 @@ import com.google.inject.Module; import com.google.inject.Provides; import com.google.inject.multibindings.MapBinder; +import com.google.inject.name.Named; import com.google.inject.name.Names; import com.google.inject.util.Providers; import org.apache.druid.curator.ZkEnablementConfig; @@ -67,6 +70,7 @@ import org.apache.druid.indexing.worker.shuffle.ShuffleModule; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.metadata.input.InputSourceModule; +import org.apache.druid.query.DruidMetrics; import org.apache.druid.query.lookup.LookupSerdeModule; import org.apache.druid.segment.incremental.RowIngestionMetersFactory; import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager; @@ -76,11 +80,13 @@ import org.apache.druid.server.DruidNode; import org.apache.druid.server.http.SelfDiscoveryResource; import org.apache.druid.server.initialization.jetty.JettyServerInitializer; +import org.apache.druid.server.metrics.ServiceStatusMonitor; import org.apache.druid.server.metrics.WorkerTaskCountStatsProvider; import org.apache.druid.timeline.PruneLastCompactionState; import org.eclipse.jetty.server.Server; import java.util.List; +import java.util.Map; import java.util.Properties; import java.util.Set; @@ -195,6 +201,18 @@ private void configureIntermediaryData(Binder binder) biddy.addBinding("deepstore").to(DeepStorageIntermediaryDataManager.class).in(LazySingleton.class); } + @Provides + @LazySingleton + @Named(ServiceStatusMonitor.HEARTBEAT_TAGS_BINDING) + public Supplier> heartbeatDimensions(WorkerConfig workerConfig, WorkerTaskManager workerTaskManager) + { + return () -> ImmutableMap.of( + DruidMetrics.WORKER_VERSION, workerConfig.getVersion(), + DruidMetrics.CATEGORY, workerConfig.getCategory(), + DruidMetrics.STATUS, workerTaskManager.isWorkerEnabled() ? "Enabled" : "Disabled" + ); + } + @Provides @LazySingleton public Worker getWorker(@Self DruidNode node, WorkerConfig config) diff --git a/services/src/main/java/org/apache/druid/cli/CliOverlord.java b/services/src/main/java/org/apache/druid/cli/CliOverlord.java index 1bbe1a0e9c88..d72d5eb24737 100644 --- a/services/src/main/java/org/apache/druid/cli/CliOverlord.java +++ b/services/src/main/java/org/apache/druid/cli/CliOverlord.java @@ -118,6 +118,7 @@ import org.apache.druid.server.initialization.ServerConfig; import org.apache.druid.server.initialization.jetty.JettyServerInitUtils; import org.apache.druid.server.initialization.jetty.JettyServerInitializer; +import org.apache.druid.server.metrics.ServiceStatusMonitor; import org.apache.druid.server.metrics.TaskCountStatsProvider; import org.apache.druid.server.metrics.TaskSlotCountStatsProvider; import org.apache.druid.server.security.AuthConfig; @@ -361,7 +362,7 @@ public TaskStorageDirTracker getTaskStorageDirTracker(WorkerConfig workerConfig, @Provides @LazySingleton - @Named("heartbeat") + @Named(ServiceStatusMonitor.HEARTBEAT_TAGS_BINDING) public Supplier> getHeartbeatSupplier(TaskMaster taskMaster) { return () -> { diff --git a/services/src/main/java/org/apache/druid/cli/CliPeon.java b/services/src/main/java/org/apache/druid/cli/CliPeon.java index bad007af6731..8b8e1a426e8f 100644 --- a/services/src/main/java/org/apache/druid/cli/CliPeon.java +++ b/services/src/main/java/org/apache/druid/cli/CliPeon.java @@ -24,8 +24,11 @@ import com.github.rvesse.airline.annotations.Command; import com.github.rvesse.airline.annotations.Option; import com.github.rvesse.airline.annotations.restrictions.Required; +import com.google.common.base.Supplier; +import com.google.common.base.Suppliers; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.inject.Binder; import com.google.inject.Inject; @@ -96,6 +99,7 @@ import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator; import org.apache.druid.metadata.input.InputSourceModule; +import org.apache.druid.query.DruidMetrics; import org.apache.druid.query.QuerySegmentWalker; import org.apache.druid.query.lookup.LookupModule; import org.apache.druid.segment.handoff.CoordinatorBasedSegmentHandoffNotifierConfig; @@ -124,12 +128,14 @@ import org.apache.druid.server.initialization.jetty.ChatHandlerServerModule; import org.apache.druid.server.initialization.jetty.JettyServerInitializer; import org.apache.druid.server.metrics.DataSourceTaskIdHolder; +import org.apache.druid.server.metrics.ServiceStatusMonitor; import org.eclipse.jetty.server.Server; import java.io.File; import java.io.IOException; import java.nio.file.Paths; import java.util.List; +import java.util.Map; import java.util.Properties; import java.util.Set; @@ -259,6 +265,21 @@ public void configure(Binder binder) } } + @Provides + @LazySingleton + @Named(ServiceStatusMonitor.HEARTBEAT_TAGS_BINDING) + public Supplier> heartbeatDimensions(Task task) + { + return Suppliers.ofInstance( + ImmutableMap.of( + DruidMetrics.TASK_ID, task.getId(), + DruidMetrics.DATASOURCE, task.getDataSource(), + DruidMetrics.TASK_TYPE, task.getType(), + DruidMetrics.GROUP_ID, task.getGroupId() + ) + ); + } + @Provides @LazySingleton public Task readTask(@Json ObjectMapper mapper, ExecutorLifecycleConfig config)