Skip to content

Commit

Permalink
metrics: introduce client config to include alloc metadata as part of…
Browse files Browse the repository at this point in the history
… the base labels
  • Loading branch information
mvegter committed Oct 2, 2024
1 parent 651d8d6 commit 825f5a2
Show file tree
Hide file tree
Showing 7 changed files with 88 additions and 11 deletions.
3 changes: 3 additions & 0 deletions .changelog/23964.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
metrics: introduce client config to include alloc metadata as part of the base labels
```
14 changes: 14 additions & 0 deletions client/allocrunner/taskrunner/task_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,20 @@ func (tr *TaskRunner) initLabels() {
},
}

if tr.clientConfig.IncludeAllocMetadataInMetrics {
combined := alloc.Job.CombinedTaskMeta(alloc.TaskGroup, tr.taskName)
for meta, metaValue := range combined {
if len(tr.clientConfig.AllowedMetadataKeysInMetrics) > 0 && !slices.Contains(tr.clientConfig.AllowedMetadataKeysInMetrics, meta) {
continue
}

tr.baseLabels = append(tr.baseLabels, metrics.Label{
Name: strings.ReplaceAll(meta, "-", "_"),
Value: metaValue,
})
}
}

if tr.alloc.Job.ParentID != "" {
tr.baseLabels = append(tr.baseLabels, metrics.Label{
Name: "parent_id",
Expand Down
34 changes: 34 additions & 0 deletions client/allocrunner/taskrunner/task_runner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2866,6 +2866,40 @@ func TestTaskRunner_BaseLabels(t *testing.T) {
require.Equal(alloc.Namespace, labels["namespace"])
}

// TestTaskRunner_BaseLabels_IncludesAllocMetadata tests that the base labels include
// the allocation metadata fields using the provided allowed list of keys
func TestTaskRunner_BaseLabels_IncludesAllocMetadata(t *testing.T) {
ci.Parallel(t)

alloc := mock.BatchAlloc()
alloc.Namespace = "not-default"
job := alloc.Job
job.Meta = map[string]string{"owner": "HashiCorp", "my-key": "my-value", "some_dynamic_value": "now()"}
task := job.TaskGroups[0].Tasks[0]
task.Driver = "raw_exec"
task.Config = map[string]interface{}{
"command": "whoami",
}

trConfig, cleanup := testTaskRunnerConfig(t, alloc, task.Name, nil)
defer cleanup()

trConfig.ClientConfig.IncludeAllocMetadataInMetrics = true
trConfig.ClientConfig.AllowedMetadataKeysInMetrics = []string{"owner", "my-key"}

tr, err := NewTaskRunner(trConfig)
must.NoError(t, err)

labels := map[string]string{}
for _, e := range tr.baseLabels {
labels[e.Name] = e.Value
}

must.Eq(t, "HashiCorp", labels["owner"])
must.Eq(t, "my-value", labels["my_key"])
must.MapNotContainsKey(t, labels, "some_dynamic_value")
}

// TestTaskRunner_IdentityHook_Enabled asserts that the identity hook exposes a
// workload identity to a task.
func TestTaskRunner_IdentityHook_Enabled(t *testing.T) {
Expand Down
8 changes: 8 additions & 0 deletions client/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,14 @@ type Config struct {
// allocation metrics to remote Telemetry sinks
PublishAllocationMetrics bool

// IncludeAllocMetadataInMetrics determines whether nomad should include the
// allocation metadata as labels in the metrics to remote Telemetry sinks
IncludeAllocMetadataInMetrics bool

// AllowedMetadataKeysInMetrics when provided nomad will only include the
// configured metadata keys as part of the metrics to remote Telemetry sinks
AllowedMetadataKeysInMetrics []string

// TLSConfig holds various TLS related configurations
TLSConfig *structsc.TLSConfig

Expand Down
2 changes: 2 additions & 0 deletions command/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -861,6 +861,8 @@ func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) {
conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval
conf.PublishNodeMetrics = agentConfig.Telemetry.PublishNodeMetrics
conf.PublishAllocationMetrics = agentConfig.Telemetry.PublishAllocationMetrics
conf.IncludeAllocMetadataInMetrics = agentConfig.Telemetry.IncludeAllocMetadataInMetrics
conf.AllowedMetadataKeysInMetrics = agentConfig.Telemetry.AllowedMetadataKeysInMetrics

// Set the TLS related configs
conf.TLSConfig = agentConfig.TLSConfig
Expand Down
30 changes: 19 additions & 11 deletions command/agent/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -969,17 +969,19 @@ type Telemetry struct {
InMemoryRetentionPeriod string `hcl:"in_memory_retention_period"`
inMemoryRetentionPeriod time.Duration `hcl:"-"`

StatsiteAddr string `hcl:"statsite_address"`
StatsdAddr string `hcl:"statsd_address"`
DataDogAddr string `hcl:"datadog_address"`
DataDogTags []string `hcl:"datadog_tags"`
PrometheusMetrics bool `hcl:"prometheus_metrics"`
DisableHostname bool `hcl:"disable_hostname"`
UseNodeName bool `hcl:"use_node_name"`
CollectionInterval string `hcl:"collection_interval"`
collectionInterval time.Duration `hcl:"-"`
PublishAllocationMetrics bool `hcl:"publish_allocation_metrics"`
PublishNodeMetrics bool `hcl:"publish_node_metrics"`
StatsiteAddr string `hcl:"statsite_address"`
StatsdAddr string `hcl:"statsd_address"`
DataDogAddr string `hcl:"datadog_address"`
DataDogTags []string `hcl:"datadog_tags"`
PrometheusMetrics bool `hcl:"prometheus_metrics"`
DisableHostname bool `hcl:"disable_hostname"`
UseNodeName bool `hcl:"use_node_name"`
CollectionInterval string `hcl:"collection_interval"`
collectionInterval time.Duration `hcl:"-"`
PublishAllocationMetrics bool `hcl:"publish_allocation_metrics"`
PublishNodeMetrics bool `hcl:"publish_node_metrics"`
IncludeAllocMetadataInMetrics bool `hcl:"include_alloc_metadata_in_metrics"`
AllowedMetadataKeysInMetrics []string `hcl:"allowed_metadata_keys_in_metrics"`

// PrefixFilter allows for filtering out metrics from being collected
PrefixFilter []string `hcl:"prefix_filter"`
Expand Down Expand Up @@ -1343,6 +1345,8 @@ func DevConfig(mode *devModeConfig) *Config {
conf.Telemetry.PrometheusMetrics = true
conf.Telemetry.PublishAllocationMetrics = true
conf.Telemetry.PublishNodeMetrics = true
conf.Telemetry.IncludeAllocMetadataInMetrics = true
conf.Telemetry.AllowedMetadataKeysInMetrics = []string{}

if mode.consulMode {
conf.Consuls[0].ServiceIdentity = &config.WorkloadIdentityConfig{
Expand Down Expand Up @@ -2524,6 +2528,10 @@ func (t *Telemetry) Merge(b *Telemetry) *Telemetry {
if b.PublishAllocationMetrics {
result.PublishAllocationMetrics = true
}
if b.IncludeAllocMetadataInMetrics {
result.IncludeAllocMetadataInMetrics = true
}
result.AllowedMetadataKeysInMetrics = append(result.AllowedMetadataKeysInMetrics, b.AllowedMetadataKeysInMetrics...)
if b.CirconusAPIToken != "" {
result.CirconusAPIToken = b.CirconusAPIToken
}
Expand Down
8 changes: 8 additions & 0 deletions website/content/docs/configuration/telemetry.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,14 @@ The following options are available on all telemetry configurations.
- `publish_allocation_metrics` `(bool: false)` - Specifies if Nomad should
publish runtime metrics of allocations.

- `include_alloc_metadata_in_metrics` `(bool: false)` - This controls whether
allocation metadata is included in metric labels. Enabling this option may result in
high cardinality labels. You should also configure [allowed_metadata_keys_in_metrics](#allowed_metadata_keys_in_metrics).

- `allowed_metadata_keys_in_metrics` `(list: [])` - This filters the metadata
keys to be included in the metric publishing. By default it will not filter
out any keys and thus include all metadata.

- `publish_node_metrics` `(bool: false)` - Specifies if Nomad should publish
runtime metrics of nodes.

Expand Down

0 comments on commit 825f5a2

Please sign in to comment.