Skip to content

Commit

Permalink
Additional dimensions for service/heartbeat (apache#14743)
Browse files Browse the repository at this point in the history
* Additional dimensions for service/heartbeat

* docs

* review

* review
  • Loading branch information
suneet-s authored Aug 4, 2023
1 parent 590734b commit 62ddeaf
Show file tree
Hide file tree
Showing 9 changed files with 57 additions and 10 deletions.
2 changes: 1 addition & 1 deletion docs/operations/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ These metrics are for the Druid Coordinator and are reset each time the Coordina

|Metric|Description|Dimensions|Normal value|
|------|-----------|----------|------------|
| `service/heartbeat` | Metric indicating the service is up. `ServiceStatusMonitor` must be enabled. |`leader` on the Overlord and Coordinator.|1|
| `service/heartbeat` | Metric indicating the service is up. `ServiceStatusMonitor` must be enabled. | `leader` on the Overlord and Coordinator.<br />`workerVersion`, `category`, `status` on the Middle Manager.<br />`taskId`, `groupId`, `taskType`, `dataSource` on the Peon |1|

### Historical

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,5 +175,5 @@
"namespace/cache/numEntries" : { "dimensions" : [], "type" : "gauge" },
"namespace/cache/heapSizeInBytes" : { "dimensions" : [], "type" : "gauge" },

"service/heartbeat" : { "dimensions" : ["leader"], "type" : "gauge" }
"service/heartbeat" : { "dimensions" : ["leader"], "type" : "count" }
}
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ public class DruidMetrics

public static final String TAGS = "tags";

public static final String CATEGORY = "category";
public static final String WORKER_VERSION = "workerVersion";

public static int findNumComplexAggs(List<AggregatorFactory> aggs)
{
int retVal = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,12 @@
*/
public class ServiceStatusMonitor extends AbstractMonitor
{
/**
* The named binding for tags that should be reported with the `service/heartbeat` metric.
*/
public static final String HEARTBEAT_TAGS_BINDING = "heartbeat";

@Named("heartbeat")
@Named(HEARTBEAT_TAGS_BINDING)
@Inject(optional = true)
Supplier<Map<String, Object>> heartbeatTagsSupplier = null;

Expand All @@ -43,9 +47,7 @@ public boolean doMonitor(ServiceEmitter emitter)
{
final ServiceMetricEvent.Builder builder = new ServiceMetricEvent.Builder();
if (heartbeatTagsSupplier != null && heartbeatTagsSupplier.get() != null) {
heartbeatTagsSupplier.get().forEach((k, v) -> {
builder.setDimension(k, v);
});
heartbeatTagsSupplier.get().forEach(builder::setDimension);
}

emitter.emit(builder.build("service/heartbeat", 1));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
import org.apache.druid.java.util.metrics.AbstractMonitor;
import org.apache.druid.query.DruidMetrics;

import java.util.Set;

Expand Down Expand Up @@ -71,8 +72,8 @@ private void emit(ServiceEmitter emitter, String metricName, Long value)
{
if (value != null) {
final ServiceMetricEvent.Builder builder = new ServiceMetricEvent.Builder();
builder.setDimension("category", workerCategory);
builder.setDimension("workerVersion", workerVersion);
builder.setDimension(DruidMetrics.CATEGORY, workerCategory);
builder.setDimension(DruidMetrics.WORKER_VERSION, workerVersion);
emitter.emit(builder.build(metricName, value));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@
import org.apache.druid.server.initialization.jetty.JettyServerInitializer;
import org.apache.druid.server.lookup.cache.LookupCoordinatorManager;
import org.apache.druid.server.lookup.cache.LookupCoordinatorManagerConfig;
import org.apache.druid.server.metrics.ServiceStatusMonitor;
import org.apache.druid.server.router.TieredBrokerConfig;
import org.eclipse.jetty.server.Server;
import org.joda.time.Duration;
Expand Down Expand Up @@ -332,7 +333,7 @@ public void configure(Binder binder)
binder.bind(RowIngestionMetersFactory.class).toProvider(Providers.of(null));
// Bind HeartbeatSupplier only when the service operates independently of Overlord.
binder.bind(new TypeLiteral<Supplier<Map<String, Object>>>() {})
.annotatedWith(Names.named("heartbeat"))
.annotatedWith(Names.named(ServiceStatusMonitor.HEARTBEAT_TAGS_BINDING))
.toProvider(HeartbeatSupplier.class);
}

Expand Down
18 changes: 18 additions & 0 deletions services/src/main/java/org/apache/druid/cli/CliMiddleManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,17 @@
package org.apache.druid.cli;

import com.github.rvesse.airline.annotations.Command;
import com.google.common.base.Supplier;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.inject.Binder;
import com.google.inject.Inject;
import com.google.inject.Key;
import com.google.inject.Module;
import com.google.inject.Provides;
import com.google.inject.multibindings.MapBinder;
import com.google.inject.name.Named;
import com.google.inject.name.Names;
import com.google.inject.util.Providers;
import org.apache.druid.curator.ZkEnablementConfig;
Expand Down Expand Up @@ -67,6 +70,7 @@
import org.apache.druid.indexing.worker.shuffle.ShuffleModule;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.metadata.input.InputSourceModule;
import org.apache.druid.query.DruidMetrics;
import org.apache.druid.query.lookup.LookupSerdeModule;
import org.apache.druid.segment.incremental.RowIngestionMetersFactory;
import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager;
Expand All @@ -76,11 +80,13 @@
import org.apache.druid.server.DruidNode;
import org.apache.druid.server.http.SelfDiscoveryResource;
import org.apache.druid.server.initialization.jetty.JettyServerInitializer;
import org.apache.druid.server.metrics.ServiceStatusMonitor;
import org.apache.druid.server.metrics.WorkerTaskCountStatsProvider;
import org.apache.druid.timeline.PruneLastCompactionState;
import org.eclipse.jetty.server.Server;

import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;

Expand Down Expand Up @@ -195,6 +201,18 @@ private void configureIntermediaryData(Binder binder)
biddy.addBinding("deepstore").to(DeepStorageIntermediaryDataManager.class).in(LazySingleton.class);
}

@Provides
@LazySingleton
@Named(ServiceStatusMonitor.HEARTBEAT_TAGS_BINDING)
public Supplier<Map<String, Object>> heartbeatDimensions(WorkerConfig workerConfig, WorkerTaskManager workerTaskManager)
{
return () -> ImmutableMap.of(
DruidMetrics.WORKER_VERSION, workerConfig.getVersion(),
DruidMetrics.CATEGORY, workerConfig.getCategory(),
DruidMetrics.STATUS, workerTaskManager.isWorkerEnabled() ? "Enabled" : "Disabled"
);
}

@Provides
@LazySingleton
public Worker getWorker(@Self DruidNode node, WorkerConfig config)
Expand Down
3 changes: 2 additions & 1 deletion services/src/main/java/org/apache/druid/cli/CliOverlord.java
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@
import org.apache.druid.server.initialization.ServerConfig;
import org.apache.druid.server.initialization.jetty.JettyServerInitUtils;
import org.apache.druid.server.initialization.jetty.JettyServerInitializer;
import org.apache.druid.server.metrics.ServiceStatusMonitor;
import org.apache.druid.server.metrics.TaskCountStatsProvider;
import org.apache.druid.server.metrics.TaskSlotCountStatsProvider;
import org.apache.druid.server.security.AuthConfig;
Expand Down Expand Up @@ -361,7 +362,7 @@ public TaskStorageDirTracker getTaskStorageDirTracker(WorkerConfig workerConfig,

@Provides
@LazySingleton
@Named("heartbeat")
@Named(ServiceStatusMonitor.HEARTBEAT_TAGS_BINDING)
public Supplier<Map<String, Object>> getHeartbeatSupplier(TaskMaster taskMaster)
{
return () -> {
Expand Down
21 changes: 21 additions & 0 deletions services/src/main/java/org/apache/druid/cli/CliPeon.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,11 @@
import com.github.rvesse.airline.annotations.Command;
import com.github.rvesse.airline.annotations.Option;
import com.github.rvesse.airline.annotations.restrictions.Required;
import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.inject.Binder;
import com.google.inject.Inject;
Expand Down Expand Up @@ -96,6 +99,7 @@
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator;
import org.apache.druid.metadata.input.InputSourceModule;
import org.apache.druid.query.DruidMetrics;
import org.apache.druid.query.QuerySegmentWalker;
import org.apache.druid.query.lookup.LookupModule;
import org.apache.druid.segment.handoff.CoordinatorBasedSegmentHandoffNotifierConfig;
Expand Down Expand Up @@ -124,12 +128,14 @@
import org.apache.druid.server.initialization.jetty.ChatHandlerServerModule;
import org.apache.druid.server.initialization.jetty.JettyServerInitializer;
import org.apache.druid.server.metrics.DataSourceTaskIdHolder;
import org.apache.druid.server.metrics.ServiceStatusMonitor;
import org.eclipse.jetty.server.Server;

import java.io.File;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;

Expand Down Expand Up @@ -259,6 +265,21 @@ public void configure(Binder binder)
}
}

@Provides
@LazySingleton
@Named(ServiceStatusMonitor.HEARTBEAT_TAGS_BINDING)
public Supplier<Map<String, Object>> heartbeatDimensions(Task task)
{
return Suppliers.ofInstance(
ImmutableMap.of(
DruidMetrics.TASK_ID, task.getId(),
DruidMetrics.DATASOURCE, task.getDataSource(),
DruidMetrics.TASK_TYPE, task.getType(),
DruidMetrics.GROUP_ID, task.getGroupId()
)
);
}

@Provides
@LazySingleton
public Task readTask(@Json ObjectMapper mapper, ExecutorLifecycleConfig config)
Expand Down

0 comments on commit 62ddeaf

Please sign in to comment.