diff --git a/receiver/splunkenterprisereceiver/documentation.md b/receiver/splunkenterprisereceiver/documentation.md index dfc2559816a4..e5948f0a5699 100644 --- a/receiver/splunkenterprisereceiver/documentation.md +++ b/receiver/splunkenterprisereceiver/documentation.md @@ -41,6 +41,21 @@ Gauge tracking the number of buckets and their searchable status. *Note:** Searc | splunk.host | The name of the splunk host | Any Str | | splunk.indexer.searchable | The searchability status reported for a specific object | Any Str | +### splunk.health + +The status (color) of the Splunk server. + +| Unit | Metric Type | Value Type | +| ---- | ----------- | ---------- | +| {status} | Gauge | Int | + +#### Attributes + +| Name | Description | Values | +| ---- | ----------- | ------ | +| splunk.feature | The Feature name from the Splunk Health Introspection Endpoint | Any Str | +| splunk.feature.health | The Health (in color form) of a Splunk Feature from the Splunk Health Introspection Endpoint | Any Str | + ### splunk.indexer.avg.rate Gauge tracking the average rate of indexed data. **Note:** Search is best run against a Cluster Manager. diff --git a/receiver/splunkenterprisereceiver/internal/metadata/generated_config.go b/receiver/splunkenterprisereceiver/internal/metadata/generated_config.go index 4ea3c87bfe5c..f3b59169f00c 100644 --- a/receiver/splunkenterprisereceiver/internal/metadata/generated_config.go +++ b/receiver/splunkenterprisereceiver/internal/metadata/generated_config.go @@ -36,6 +36,7 @@ type MetricsConfig struct { SplunkDataIndexesExtendedEventCount MetricConfig `mapstructure:"splunk.data.indexes.extended.event.count"` SplunkDataIndexesExtendedRawSize MetricConfig `mapstructure:"splunk.data.indexes.extended.raw.size"` SplunkDataIndexesExtendedTotalSize MetricConfig `mapstructure:"splunk.data.indexes.extended.total.size"` + SplunkHealth MetricConfig `mapstructure:"splunk.health"` SplunkIndexerAvgRate MetricConfig `mapstructure:"splunk.indexer.avg.rate"` SplunkIndexerCPUTime MetricConfig `mapstructure:"splunk.indexer.cpu.time"` SplunkIndexerQueueRatio MetricConfig `mapstructure:"splunk.indexer.queue.ratio"` @@ -98,6 +99,9 @@ func DefaultMetricsConfig() MetricsConfig { SplunkDataIndexesExtendedTotalSize: MetricConfig{ Enabled: false, }, + SplunkHealth: MetricConfig{ + Enabled: true, + }, SplunkIndexerAvgRate: MetricConfig{ Enabled: true, }, diff --git a/receiver/splunkenterprisereceiver/internal/metadata/generated_config_test.go b/receiver/splunkenterprisereceiver/internal/metadata/generated_config_test.go index 6dead898ea26..5376660051a3 100644 --- a/receiver/splunkenterprisereceiver/internal/metadata/generated_config_test.go +++ b/receiver/splunkenterprisereceiver/internal/metadata/generated_config_test.go @@ -34,6 +34,7 @@ func TestMetricsBuilderConfig(t *testing.T) { SplunkDataIndexesExtendedEventCount: MetricConfig{Enabled: true}, SplunkDataIndexesExtendedRawSize: MetricConfig{Enabled: true}, SplunkDataIndexesExtendedTotalSize: MetricConfig{Enabled: true}, + SplunkHealth: MetricConfig{Enabled: true}, SplunkIndexerAvgRate: MetricConfig{Enabled: true}, SplunkIndexerCPUTime: MetricConfig{Enabled: true}, SplunkIndexerQueueRatio: MetricConfig{Enabled: true}, @@ -81,6 +82,7 @@ func TestMetricsBuilderConfig(t *testing.T) { SplunkDataIndexesExtendedEventCount: MetricConfig{Enabled: false}, SplunkDataIndexesExtendedRawSize: MetricConfig{Enabled: false}, SplunkDataIndexesExtendedTotalSize: MetricConfig{Enabled: false}, + SplunkHealth: MetricConfig{Enabled: false}, SplunkIndexerAvgRate: MetricConfig{Enabled: false}, SplunkIndexerCPUTime: MetricConfig{Enabled: false}, SplunkIndexerQueueRatio: MetricConfig{Enabled: false}, diff --git a/receiver/splunkenterprisereceiver/internal/metadata/generated_metrics.go b/receiver/splunkenterprisereceiver/internal/metadata/generated_metrics.go index c6d6e18c8d5b..272673d43700 100644 --- a/receiver/splunkenterprisereceiver/internal/metadata/generated_metrics.go +++ b/receiver/splunkenterprisereceiver/internal/metadata/generated_metrics.go @@ -474,6 +474,58 @@ func newMetricSplunkDataIndexesExtendedTotalSize(cfg MetricConfig) metricSplunkD return m } +type metricSplunkHealth struct { + data pmetric.Metric // data buffer for generated metric. + config MetricConfig // metric config provided by user. + capacity int // max observed number of data points added to the metric. +} + +// init fills splunk.health metric with initial data. +func (m *metricSplunkHealth) init() { + m.data.SetName("splunk.health") + m.data.SetDescription("The status (color) of the Splunk server.") + m.data.SetUnit("{status}") + m.data.SetEmptyGauge() + m.data.Gauge().DataPoints().EnsureCapacity(m.capacity) +} + +func (m *metricSplunkHealth) recordDataPoint(start pcommon.Timestamp, ts pcommon.Timestamp, val int64, splunkFeatureAttributeValue string, splunkFeatureHealthAttributeValue string) { + if !m.config.Enabled { + return + } + dp := m.data.Gauge().DataPoints().AppendEmpty() + dp.SetStartTimestamp(start) + dp.SetTimestamp(ts) + dp.SetIntValue(val) + dp.Attributes().PutStr("splunk.feature", splunkFeatureAttributeValue) + dp.Attributes().PutStr("splunk.feature.health", splunkFeatureHealthAttributeValue) +} + +// updateCapacity saves max length of data point slices that will be used for the slice capacity. +func (m *metricSplunkHealth) updateCapacity() { + if m.data.Gauge().DataPoints().Len() > m.capacity { + m.capacity = m.data.Gauge().DataPoints().Len() + } +} + +// emit appends recorded metric data to a metrics slice and prepares it for recording another set of data points. +func (m *metricSplunkHealth) emit(metrics pmetric.MetricSlice) { + if m.config.Enabled && m.data.Gauge().DataPoints().Len() > 0 { + m.updateCapacity() + m.data.MoveTo(metrics.AppendEmpty()) + m.init() + } +} + +func newMetricSplunkHealth(cfg MetricConfig) metricSplunkHealth { + m := metricSplunkHealth{config: cfg} + if cfg.Enabled { + m.data = pmetric.NewMetric() + m.init() + } + return m +} + type metricSplunkIndexerAvgRate struct { data pmetric.Metric // data buffer for generated metric. config MetricConfig // metric config provided by user. @@ -2075,6 +2127,7 @@ type MetricsBuilder struct { metricSplunkDataIndexesExtendedEventCount metricSplunkDataIndexesExtendedEventCount metricSplunkDataIndexesExtendedRawSize metricSplunkDataIndexesExtendedRawSize metricSplunkDataIndexesExtendedTotalSize metricSplunkDataIndexesExtendedTotalSize + metricSplunkHealth metricSplunkHealth metricSplunkIndexerAvgRate metricSplunkIndexerAvgRate metricSplunkIndexerCPUTime metricSplunkIndexerCPUTime metricSplunkIndexerQueueRatio metricSplunkIndexerQueueRatio @@ -2141,6 +2194,7 @@ func NewMetricsBuilder(mbc MetricsBuilderConfig, settings receiver.Settings, opt metricSplunkDataIndexesExtendedEventCount: newMetricSplunkDataIndexesExtendedEventCount(mbc.Metrics.SplunkDataIndexesExtendedEventCount), metricSplunkDataIndexesExtendedRawSize: newMetricSplunkDataIndexesExtendedRawSize(mbc.Metrics.SplunkDataIndexesExtendedRawSize), metricSplunkDataIndexesExtendedTotalSize: newMetricSplunkDataIndexesExtendedTotalSize(mbc.Metrics.SplunkDataIndexesExtendedTotalSize), + metricSplunkHealth: newMetricSplunkHealth(mbc.Metrics.SplunkHealth), metricSplunkIndexerAvgRate: newMetricSplunkIndexerAvgRate(mbc.Metrics.SplunkIndexerAvgRate), metricSplunkIndexerCPUTime: newMetricSplunkIndexerCPUTime(mbc.Metrics.SplunkIndexerCPUTime), metricSplunkIndexerQueueRatio: newMetricSplunkIndexerQueueRatio(mbc.Metrics.SplunkIndexerQueueRatio), @@ -2246,6 +2300,7 @@ func (mb *MetricsBuilder) EmitForResource(options ...ResourceMetricsOption) { mb.metricSplunkDataIndexesExtendedEventCount.emit(ils.Metrics()) mb.metricSplunkDataIndexesExtendedRawSize.emit(ils.Metrics()) mb.metricSplunkDataIndexesExtendedTotalSize.emit(ils.Metrics()) + mb.metricSplunkHealth.emit(ils.Metrics()) mb.metricSplunkIndexerAvgRate.emit(ils.Metrics()) mb.metricSplunkIndexerCPUTime.emit(ils.Metrics()) mb.metricSplunkIndexerQueueRatio.emit(ils.Metrics()) @@ -2343,6 +2398,11 @@ func (mb *MetricsBuilder) RecordSplunkDataIndexesExtendedTotalSizeDataPoint(ts p mb.metricSplunkDataIndexesExtendedTotalSize.recordDataPoint(mb.startTime, ts, val, splunkIndexNameAttributeValue) } +// RecordSplunkHealthDataPoint adds a data point to splunk.health metric. +func (mb *MetricsBuilder) RecordSplunkHealthDataPoint(ts pcommon.Timestamp, val int64, splunkFeatureAttributeValue string, splunkFeatureHealthAttributeValue string) { + mb.metricSplunkHealth.recordDataPoint(mb.startTime, ts, val, splunkFeatureAttributeValue, splunkFeatureHealthAttributeValue) +} + // RecordSplunkIndexerAvgRateDataPoint adds a data point to splunk.indexer.avg.rate metric. func (mb *MetricsBuilder) RecordSplunkIndexerAvgRateDataPoint(ts pcommon.Timestamp, val float64, splunkHostAttributeValue string) { mb.metricSplunkIndexerAvgRate.recordDataPoint(mb.startTime, ts, val, splunkHostAttributeValue) diff --git a/receiver/splunkenterprisereceiver/internal/metadata/generated_metrics_test.go b/receiver/splunkenterprisereceiver/internal/metadata/generated_metrics_test.go index 76406e0fdbb8..7687d538966d 100644 --- a/receiver/splunkenterprisereceiver/internal/metadata/generated_metrics_test.go +++ b/receiver/splunkenterprisereceiver/internal/metadata/generated_metrics_test.go @@ -88,6 +88,10 @@ func TestMetricsBuilder(t *testing.T) { allMetricsCount++ mb.RecordSplunkDataIndexesExtendedTotalSizeDataPoint(ts, 1, "splunk.index.name-val") + defaultMetricsCount++ + allMetricsCount++ + mb.RecordSplunkHealthDataPoint(ts, 1, "splunk.feature-val", "splunk.feature.health-val") + defaultMetricsCount++ allMetricsCount++ mb.RecordSplunkIndexerAvgRateDataPoint(ts, 1, "splunk.host-val") @@ -367,6 +371,24 @@ func TestMetricsBuilder(t *testing.T) { attrVal, ok := dp.Attributes().Get("splunk.index.name") assert.True(t, ok) assert.EqualValues(t, "splunk.index.name-val", attrVal.Str()) + case "splunk.health": + assert.False(t, validatedMetrics["splunk.health"], "Found a duplicate in the metrics slice: splunk.health") + validatedMetrics["splunk.health"] = true + assert.Equal(t, pmetric.MetricTypeGauge, ms.At(i).Type()) + assert.Equal(t, 1, ms.At(i).Gauge().DataPoints().Len()) + assert.Equal(t, "The status (color) of the Splunk server.", ms.At(i).Description()) + assert.Equal(t, "{status}", ms.At(i).Unit()) + dp := ms.At(i).Gauge().DataPoints().At(0) + assert.Equal(t, start, dp.StartTimestamp()) + assert.Equal(t, ts, dp.Timestamp()) + assert.Equal(t, pmetric.NumberDataPointValueTypeInt, dp.ValueType()) + assert.Equal(t, int64(1), dp.IntValue()) + attrVal, ok := dp.Attributes().Get("splunk.feature") + assert.True(t, ok) + assert.EqualValues(t, "splunk.feature-val", attrVal.Str()) + attrVal, ok = dp.Attributes().Get("splunk.feature.health") + assert.True(t, ok) + assert.EqualValues(t, "splunk.feature.health-val", attrVal.Str()) case "splunk.indexer.avg.rate": assert.False(t, validatedMetrics["splunk.indexer.avg.rate"], "Found a duplicate in the metrics slice: splunk.indexer.avg.rate") validatedMetrics["splunk.indexer.avg.rate"] = true diff --git a/receiver/splunkenterprisereceiver/internal/metadata/testdata/config.yaml b/receiver/splunkenterprisereceiver/internal/metadata/testdata/config.yaml index 90380c4d00e2..aeb4be01c4b3 100644 --- a/receiver/splunkenterprisereceiver/internal/metadata/testdata/config.yaml +++ b/receiver/splunkenterprisereceiver/internal/metadata/testdata/config.yaml @@ -19,6 +19,8 @@ all_set: enabled: true splunk.data.indexes.extended.total.size: enabled: true + splunk.health: + enabled: true splunk.indexer.avg.rate: enabled: true splunk.indexer.cpu.time: @@ -101,6 +103,8 @@ none_set: enabled: false splunk.data.indexes.extended.total.size: enabled: false + splunk.health: + enabled: false splunk.indexer.avg.rate: enabled: false splunk.indexer.cpu.time: diff --git a/receiver/splunkenterprisereceiver/metadata.yaml b/receiver/splunkenterprisereceiver/metadata.yaml index 6ead19fbe79b..d330a112c6a4 100644 --- a/receiver/splunkenterprisereceiver/metadata.yaml +++ b/receiver/splunkenterprisereceiver/metadata.yaml @@ -39,6 +39,12 @@ attributes: splunk.searchartifacts.cache.type: description: The search artifacts cache type type: string + splunk.feature: + description: The Feature name from the Splunk Health Introspection Endpoint + type: string + splunk.feature.health: + description: The Health (in color form) of a Splunk Feature from the Splunk Health Introspection Endpoint + type: string metrics: splunk.license.index.usage: @@ -345,6 +351,14 @@ metrics: aggregation_temporality: cumulative value_type: int attributes: [splunk.host] + #`services/server/health/splunkd/details` + splunk.health: + enabled: True + description: The status (color) of the Splunk server. + unit: "{status}" + gauge: + value_type: int + attributes: [splunk.feature, splunk.feature.health] tests: config: diff --git a/receiver/splunkenterprisereceiver/scraper.go b/receiver/splunkenterprisereceiver/scraper.go index d517a2da07af..aa79be6e86d5 100644 --- a/receiver/splunkenterprisereceiver/scraper.go +++ b/receiver/splunkenterprisereceiver/scraper.go @@ -101,6 +101,7 @@ func (s *splunkScraper) scrape(ctx context.Context) (pmetric.Metrics, error) { s.scrapeIndexerAvgRate, s.scrapeKVStoreStatus, s.scrapeSearchArtifacts, + s.scrapeHealth, } errChan := make(chan error, len(metricScrapes)) @@ -1075,12 +1076,12 @@ func unmarshallSearchReq(res *http.Response, sr *searchResponse) error { body, err := io.ReadAll(res.Body) if err != nil { - return fmt.Errorf("Failed to read response: %w", err) + return fmt.Errorf("failed to read response: %w", err) } err = xml.Unmarshal(body, &sr) if err != nil { - return fmt.Errorf("Failed to unmarshall response: %w", err) + return fmt.Errorf("failed to unmarshall response: %w", err) } return nil @@ -1733,3 +1734,50 @@ func (s *splunkScraper) scrapeSearchArtifacts(ctx context.Context, now pcommon.T } } } + +// Scrape Health Introspection Endpoint +func (s *splunkScraper) scrapeHealth(ctx context.Context, now pcommon.Timestamp, errs chan error) { + if !s.conf.MetricsBuilderConfig.Metrics.SplunkHealth.Enabled { + return + } + + ctx = context.WithValue(ctx, endpointType("type"), typeCm) + + ept := apiDict[`SplunkHealth`] + var hd HealthDetails + + req, err := s.splunkClient.createAPIRequest(ctx, ept) + if err != nil { + errs <- err + return + } + + res, err := s.splunkClient.makeRequest(req) + if err != nil { + errs <- err + return + } + defer res.Body.Close() + + if err := json.NewDecoder(res.Body).Decode(&hd); err != nil { + errs <- err + return + } + + s.traverseHealthDetailFeatures(hd.Features, now) +} + +func (s *splunkScraper) traverseHealthDetailFeatures(features map[string]HealthDetails, now pcommon.Timestamp) { + if features == nil { + return + } + + for k, feature := range features { + if feature.Health != "red" { + s.mb.RecordSplunkHealthDataPoint(now, 1, k, feature.Health) + } else { + s.mb.RecordSplunkHealthDataPoint(now, 0, k, feature.Health) + } + s.traverseHealthDetailFeatures(feature.Features, now) + } +} diff --git a/receiver/splunkenterprisereceiver/search_result.go b/receiver/splunkenterprisereceiver/search_result.go index bd6c4318b016..6bdc2c00ee32 100644 --- a/receiver/splunkenterprisereceiver/search_result.go +++ b/receiver/splunkenterprisereceiver/search_result.go @@ -25,6 +25,7 @@ var apiDict = map[string]string{ `SplunkIntrospectionQueues`: `/services/server/introspection/queues?output_mode=json&count=-1`, `SplunkKVStoreStatus`: `/services/kvstore/status?output_mode=json`, `SplunkDispatchArtifacts`: `/services/server/status/dispatch-artifacts?output_mode=json&count=-1`, + `SplunkHealth`: `/services/server/health/splunkd/details?output_mode=json`, } type searchResponse struct { @@ -156,3 +157,9 @@ type DispatchArtifactContent struct { StatusCacheSize string `json:"cached_job_status_status_csv_size_mb"` CacheTotalEntries string `json:"cached_job_status_total_entries"` } + +// '/services/server/health/splunkd/details +type HealthDetails struct { + Health string `json:"health"` + Features map[string]HealthDetails `json:"features,omitempty"` +}