From 8021d401e0fb4ea1a4f7070b568ca17b82104eff Mon Sep 17 00:00:00 2001 From: Matthew Kocher Date: Fri, 18 Aug 2023 16:16:41 -0700 Subject: [PATCH] add otel collector service telemetry metrics (#364) * add otel collector service telemetry metrics Configure a prometheus endpoint which serves metrics about the otel-collector itself. Also adds a prom_scraper_config so promscraper scrapes the metrics. * add otel service telemetry metrics to windows jobs * fix: selectively enable prom scraping of otel collectors When OTel Collector jobs are disabled, don't output any prom scraper configuration so that the prom scraper skips over these jobs. This avoids continual error messages logged by the prom scraper when it can't scrape a destination. * fix(otel-collector-windows): align default port with otel-collector * refactor: remove service prefix from telemetry properties Since we already dropped exact parity with OTel Collector settings with `ingress.grpc.tls.*`, we think it's preferable to remove the `service.` prefix from telemetry properties in the OTel Collector jobs. --------- Signed-off-by: Rebecca Roberts Signed-off-by: Andrew Crump Signed-off-by: Carson Long --- jobs/otel-collector-windows/spec | 9 ++++++++- jobs/otel-collector-windows/templates/config.yml.erb | 7 ++++++- .../templates/prom_scraper_config.yml.erb | 9 +++++++++ jobs/otel-collector/spec | 7 +++++++ jobs/otel-collector/templates/config.yml.erb | 7 ++++++- .../otel-collector/templates/prom_scraper_config.yml.erb | 9 +++++++++ 6 files changed, 45 insertions(+), 3 deletions(-) create mode 100644 jobs/otel-collector-windows/templates/prom_scraper_config.yml.erb create mode 100644 jobs/otel-collector/templates/prom_scraper_config.yml.erb diff --git a/jobs/otel-collector-windows/spec b/jobs/otel-collector-windows/spec index 341ebbfb2..72b1af3a4 100644 --- a/jobs/otel-collector-windows/spec +++ b/jobs/otel-collector-windows/spec @@ -7,6 +7,7 @@ templates: otel-collector.crt.erb: config/certs/otel-collector.crt otel-collector.key.erb: config/certs/otel-collector.key otel-collector-ca.crt.erb: config/certs/otel-collector-ca.crt + prom_scraper_config.yml.erb: config/prom_scraper_config.yml packages: - otel-collector-windows @@ -17,13 +18,19 @@ properties: default: true ingress.grpc.port: description: "Port the collector is listening on to receive OTLP over gRPC" - default: 3462 + default: 9100 ingress.grpc.tls.ca_cert: description: "CA root required for key/cert verification in gRPC ingress" ingress.grpc.tls.cert: description: "TLS server certificate for gRPC ingress" ingress.grpc.tls.key: description: "TLS server key for gRPC ingress" + telemetry.metrics.level: + description: "Level of metrics the collector exposes about itself" + default: "basic" + telemetry.metrics.port: + description: "Port to serve the collector's internal metrics" + default: 14830 metric_exporters: description: "Exporter configuration for aggregate metric egress" default: {} diff --git a/jobs/otel-collector-windows/templates/config.yml.erb b/jobs/otel-collector-windows/templates/config.yml.erb index c5209f1d3..d32d2ffb0 100644 --- a/jobs/otel-collector-windows/templates/config.yml.erb +++ b/jobs/otel-collector-windows/templates/config.yml.erb @@ -25,7 +25,12 @@ config = { }, "exporters"=>metric_exporters, "service"=>{ - "telemetry"=>{"metrics"=>{"level"=>"none"}}, + "telemetry"=>{ + "metrics"=>{ + "level"=>p('telemetry.metrics.level'), + "address"=>"127.0.0.1:#{p('telemetry.metrics.port')}" + } + }, "pipelines"=>{"metrics"=>{"receivers"=>["otlp"], "exporters"=>metric_exporters.keys}} } } diff --git a/jobs/otel-collector-windows/templates/prom_scraper_config.yml.erb b/jobs/otel-collector-windows/templates/prom_scraper_config.yml.erb new file mode 100644 index 000000000..7df5d90d5 --- /dev/null +++ b/jobs/otel-collector-windows/templates/prom_scraper_config.yml.erb @@ -0,0 +1,9 @@ +<% if p('enabled') %> +--- +port: <%= p("telemetry.metrics.port") %> +source_id: "otel-collector" +instance_id: <%= spec.id || spec.index.to_s %> +scheme: http +labels: + origin: otel-collector +<% end %> diff --git a/jobs/otel-collector/spec b/jobs/otel-collector/spec index 3d78a24ed..3eb2b1bee 100644 --- a/jobs/otel-collector/spec +++ b/jobs/otel-collector/spec @@ -8,6 +8,7 @@ templates: otel-collector.crt.erb: config/certs/otel-collector.crt otel-collector.key.erb: config/certs/otel-collector.key otel-collector-ca.crt.erb: config/certs/otel-collector-ca.crt + prom_scraper_config.yml.erb: config/prom_scraper_config.yml packages: - otel-collector @@ -25,6 +26,12 @@ properties: description: "TLS server certificate for gRPC ingress" ingress.grpc.tls.key: description: "TLS server key for gRPC ingress" + telemetry.metrics.level: + description: "Level of metrics the collector exposes about itself" + default: "basic" + telemetry.metrics.port: + description: "Port to serve the collector's internal metrics" + default: 14830 metric_exporters: description: "Exporter configuration for aggregate metric egress" default: {} diff --git a/jobs/otel-collector/templates/config.yml.erb b/jobs/otel-collector/templates/config.yml.erb index b537d81e5..c17d201fd 100644 --- a/jobs/otel-collector/templates/config.yml.erb +++ b/jobs/otel-collector/templates/config.yml.erb @@ -25,7 +25,12 @@ config = { }, "exporters"=>metric_exporters, "service"=>{ - "telemetry"=>{"metrics"=>{"level"=>"none"}}, + "telemetry"=>{ + "metrics"=>{ + "level"=>p('telemetry.metrics.level'), + "address"=>"127.0.0.1:#{p('telemetry.metrics.port')}" + } + }, "pipelines"=>{"metrics"=>{"receivers"=>["otlp"], "exporters"=>metric_exporters.keys}} } } diff --git a/jobs/otel-collector/templates/prom_scraper_config.yml.erb b/jobs/otel-collector/templates/prom_scraper_config.yml.erb new file mode 100644 index 000000000..7df5d90d5 --- /dev/null +++ b/jobs/otel-collector/templates/prom_scraper_config.yml.erb @@ -0,0 +1,9 @@ +<% if p('enabled') %> +--- +port: <%= p("telemetry.metrics.port") %> +source_id: "otel-collector" +instance_id: <%= spec.id || spec.index.to_s %> +scheme: http +labels: + origin: otel-collector +<% end %>