From 4be6df018b029d59a2710dea5691070110585e17 Mon Sep 17 00:00:00 2001 From: Bastian Krol Date: Thu, 5 Sep 2024 11:04:52 +0200 Subject: [PATCH] feat(metrics): add collector deployment to collect cluster metrics --- Makefile | 2 - .../templates/operator/cluster-roles.yaml | 8 +- .../__snapshot__/cluster-roles_test.yaml.snap | 1 + images/collector/src/builder/config.yaml | 2 + .../otelcolresources/collector_config_maps.go | 27 ++- .../collector_config_maps_test.go | 159 +++++++++------ .../daemonset.config.yaml.template | 27 +++ .../deployment.config.yaml.template | 5 +- .../otelcolresources/desired_state.go | 191 +++++++++++++++--- .../otelcolresources/desired_state_test.go | 54 ++++- .../otelcol_resources_test.go | 8 +- .../dash0/selfmonitoring/self_monitoring.go | 40 +++- test-resources/bin/test-cleanup.sh | 4 +- test-resources/bin/util | 4 +- test/util/collector.go | 21 +- 15 files changed, 426 insertions(+), 127 deletions(-) diff --git a/Makefile b/Makefile index a73e7057..21ce344c 100644 --- a/Makefile +++ b/Makefile @@ -244,8 +244,6 @@ install: manifests kustomize ## Install CRDs into the K8s cluster specified in ~ .PHONY: uninstall uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion. $(KUSTOMIZE) build config/crd | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) --wait=false -f - - sleep 1 - $(KUSTOMIZE) build config/crd | $(KUBECTL) patch CustomResourceDefinition dash0monitorings.operator.dash0.com -p '{"metadata":{"finalizers":null}}' --type=merge .PHONY: deploy-via-helm deploy-via-helm: ## Deploy the controller via helm to the K8s cluster specified in ~/.kube/config. diff --git a/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml b/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml index bf64412c..80e81da5 100644 --- a/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml +++ b/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml @@ -166,10 +166,12 @@ rules: - apiGroups: - apps resources: - # Note: apps.daemonsets are also listed further up together with the other workload types in the apps API group, with - # fewer permissions. The declaration here extends that list of permissions to make sure we can also create and delete - # the daemonsets for the OTel collector instances that the Dash0 operator manages. + # Note: apps.daemonsets and app.deployments are also listed further up together with the other workload types in the + # apps API group, with fewer permissions. The declaration here extends that list of permissions to make sure we can + # also create and delete the daemonset and deployment for the OTel collector instance that the Dash0 operator + # manages. - daemonsets + - deployments verbs: - create - delete diff --git a/helm-chart/dash0-operator/tests/operator/__snapshot__/cluster-roles_test.yaml.snap b/helm-chart/dash0-operator/tests/operator/__snapshot__/cluster-roles_test.yaml.snap index dd5c6d7c..937fe797 100644 --- a/helm-chart/dash0-operator/tests/operator/__snapshot__/cluster-roles_test.yaml.snap +++ b/helm-chart/dash0-operator/tests/operator/__snapshot__/cluster-roles_test.yaml.snap @@ -147,6 +147,7 @@ cluster roles should match snapshot: - apps resources: - daemonsets + - deployments verbs: - create - delete diff --git a/images/collector/src/builder/config.yaml b/images/collector/src/builder/config.yaml index 69e6e0ce..852f535e 100644 --- a/images/collector/src/builder/config.yaml +++ b/images/collector/src/builder/config.yaml @@ -21,6 +21,7 @@ exporters: receivers: - gomod: "go.opentelemetry.io/collector/receiver/otlpreceiver v0.106.1" - gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/filelogreceiver v0.106.1" + - gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver v0.106.1" - gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/kubeletstatsreceiver v0.106.1" processors: @@ -30,4 +31,5 @@ processors: - gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/processor/filterprocessor v0.106.1" - gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/processor/k8sattributesprocessor v0.106.1" - gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/processor/resourceprocessor v0.106.1" + - gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/processor/resourcedetectionprocessor v0.106.1" - gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/processor/transformprocessor v0.106.1" diff --git a/internal/backendconnection/otelcolresources/collector_config_maps.go b/internal/backendconnection/otelcolresources/collector_config_maps.go index 971b259f..15deaa16 100644 --- a/internal/backendconnection/otelcolresources/collector_config_maps.go +++ b/internal/backendconnection/otelcolresources/collector_config_maps.go @@ -29,25 +29,34 @@ var ( daemonSetCollectorConfigurationTemplate = template.Must( template.New("daemonset-collector-configuration").Parse(daemonSetCollectorConfigurationTemplateSource)) - ////go:embed deployment.config.yaml.template - //deploymentCollectorConfigurationTemplateSource string - //deploymentCollectorConfigurationTemplate = template.Must( - // template.New("deployment-collector-configuration").Parse(deploymentCollectorConfigurationTemplateSource)) + //go:embed deployment.config.yaml.template + deploymentCollectorConfigurationTemplateSource string + deploymentCollectorConfigurationTemplate = template.Must( + template.New("deployment-collector-configuration").Parse(deploymentCollectorConfigurationTemplateSource)) authHeaderValue = fmt.Sprintf("Bearer ${env:%s}", authTokenEnvVarName) ) func assembleDaemonSetCollectorConfigMap(config *oTelColConfig) (*corev1.ConfigMap, error) { - return assembleCollectorConfigMap(config, daemonSetCollectorConfigurationTemplate) + return assembleCollectorConfigMap( + config, + daemonSetCollectorConfigurationTemplate, + daemonSetCollectorConfigConfigMapName(config.NamePrefix), + ) } -//func assembleDeploymentCollectorConfigMap(config *oTelColConfig) (*corev1.ConfigMap, error) { -// return assembleCollectorConfigMap(config, deploymentCollectorConfigurationTemplate) -//} +func assembleDeploymentCollectorConfigMap(config *oTelColConfig) (*corev1.ConfigMap, error) { + return assembleCollectorConfigMap( + config, + deploymentCollectorConfigurationTemplate, + deploymentCollectorConfigConfigMapName(config.NamePrefix), + ) +} func assembleCollectorConfigMap( config *oTelColConfig, template *template.Template, + configMapName string, ) (*corev1.ConfigMap, error) { exporters, err := ConvertExportSettingsToExporterList(config.Export) if err != nil { @@ -79,7 +88,7 @@ func assembleCollectorConfigMap( APIVersion: "v1", }, ObjectMeta: metav1.ObjectMeta{ - Name: collectorConfigConfigMapName(config.NamePrefix), + Name: configMapName, Namespace: config.Namespace, Labels: labels(false), }, diff --git a/internal/backendconnection/otelcolresources/collector_config_maps_test.go b/internal/backendconnection/otelcolresources/collector_config_maps_test.go index 21657fda..a460f207 100644 --- a/internal/backendconnection/otelcolresources/collector_config_maps_test.go +++ b/internal/backendconnection/otelcolresources/collector_config_maps_test.go @@ -18,6 +18,11 @@ import ( . "github.com/dash0hq/dash0-operator/test/util" ) +type testConfig struct { + assembleConfigMapFunction func(config *oTelColConfig) (*corev1.ConfigMap, error) + pipelineNames []string +} + const ( GrpcEndpointTest = "example.com:4317" HttpEndpointTest = "https://example.com:4318" @@ -29,17 +34,38 @@ var ( var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() { - It("should fail if no exporter is configured", func() { - _, err := assembleDaemonSetCollectorConfigMap(&oTelColConfig{ + testConfigs := []TableEntry{ + Entry( + "for the DaemonSet", + testConfig{ + assembleConfigMapFunction: assembleDaemonSetCollectorConfigMap, + pipelineNames: []string{ + "traces/downstream", + "metrics/downstream", + "logs/downstream", + }, + }), + Entry( + "for the Deployment", + testConfig{ + assembleConfigMapFunction: assembleDeploymentCollectorConfigMap, + pipelineNames: []string{ + "metrics/downstream", + }, + }), + } + + DescribeTable("should fail if no exporter is configured", func(testConfig testConfig) { + _, err := testConfig.assembleConfigMapFunction(&oTelColConfig{ Namespace: namespace, NamePrefix: namePrefix, Export: dash0v1alpha1.Export{}, }) Expect(err).To(HaveOccurred()) - }) + }, testConfigs) - It("should fail to render the Dash0 exporter when no endpoint is provided", func() { - _, err := assembleDaemonSetCollectorConfigMap(&oTelColConfig{ + DescribeTable("should fail to render the Dash0 exporter when no endpoint is provided", func(testConfig testConfig) { + _, err := testConfig.assembleConfigMapFunction(&oTelColConfig{ Namespace: namespace, NamePrefix: namePrefix, Export: dash0v1alpha1.Export{ @@ -55,10 +81,10 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() { ContainSubstring( "no endpoint provided for the Dash0 exporter, unable to create the OpenTelemetry collector"))) - }) + }, testConfigs) - It("should render the Dash0 exporter", func() { - configMap, err := assembleDaemonSetCollectorConfigMap(&oTelColConfig{ + DescribeTable("should render the Dash0 exporter", func(testConfig testConfig) { + configMap, err := testConfig.assembleConfigMapFunction(&oTelColConfig{ Namespace: namespace, NamePrefix: namePrefix, Export: dash0v1alpha1.Export{ @@ -91,11 +117,11 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() { Expect(headers[util.Dash0DatasetHeaderName]).To(BeNil()) Expect(dash0OtlpExporter["encoding"]).To(BeNil()) - verifyDownstreamExportersInPipelines(collectorConfig, "otlp/dash0") - }) + verifyDownstreamExportersInPipelines(collectorConfig, testConfig, "otlp/dash0") + }, testConfigs) - It("should render the Dash0 exporter with custom dataset", func() { - configMap, err := assembleDaemonSetCollectorConfigMap(&oTelColConfig{ + DescribeTable("should render the Dash0 exporter with custom dataset", func(testConfig testConfig) { + configMap, err := testConfig.assembleConfigMapFunction(&oTelColConfig{ Namespace: namespace, NamePrefix: namePrefix, Export: dash0v1alpha1.Export{ @@ -129,11 +155,11 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() { Expect(headers[util.Dash0DatasetHeaderName]).To(Equal("custom-dataset")) Expect(dash0OtlpExporter["encoding"]).To(BeNil()) - verifyDownstreamExportersInPipelines(collectorConfig, "otlp/dash0") - }) + verifyDownstreamExportersInPipelines(collectorConfig, testConfig, "otlp/dash0") + }, testConfigs) - It("should render a verbose debug exporter in development mode", func() { - configMap, err := assembleDaemonSetCollectorConfigMap(&oTelColConfig{ + DescribeTable("should render a verbose debug exporter in development mode", func(testConfig testConfig) { + configMap, err := testConfig.assembleConfigMapFunction(&oTelColConfig{ Namespace: namespace, NamePrefix: namePrefix, Export: dash0v1alpha1.Export{ @@ -172,11 +198,11 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() { Expect(headers[util.Dash0DatasetHeaderName]).To(BeNil()) Expect(dash0OtlpExporter["encoding"]).To(BeNil()) - verifyDownstreamExportersInPipelines(collectorConfig, "debug", "otlp/dash0") - }) + verifyDownstreamExportersInPipelines(collectorConfig, testConfig, "debug", "otlp/dash0") + }, testConfigs) - It("should fail to render a gRPC exporter when no endpoint is provided", func() { - _, err := assembleDaemonSetCollectorConfigMap(&oTelColConfig{ + DescribeTable("should fail to render a gRPC exporter when no endpoint is provided", func(testConfig testConfig) { + _, err := testConfig.assembleConfigMapFunction(&oTelColConfig{ Namespace: namespace, NamePrefix: namePrefix, Export: dash0v1alpha1.Export{ @@ -193,10 +219,10 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() { ContainSubstring( "no endpoint provided for the gRPC exporter, unable to create the OpenTelemetry collector"))) - }) + }, testConfigs) - It("should render an arbitrary gRPC exporter", func() { - configMap, err := assembleDaemonSetCollectorConfigMap(&oTelColConfig{ + DescribeTable("should render an arbitrary gRPC exporter", func(testConfig testConfig) { + configMap, err := testConfig.assembleConfigMapFunction(&oTelColConfig{ Namespace: namespace, NamePrefix: namePrefix, Export: dash0v1alpha1.Export{ @@ -236,11 +262,11 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() { Expect(headers["Key2"]).To(Equal("Value2")) Expect(otlpGrpcExporter["encoding"]).To(BeNil()) - verifyDownstreamExportersInPipelines(collectorConfig, "otlp/grpc") - }) + verifyDownstreamExportersInPipelines(collectorConfig, testConfig, "otlp/grpc") + }, testConfigs) - It("should fail to render an HTTP exporter when no endpoint is provided", func() { - _, err := assembleDaemonSetCollectorConfigMap(&oTelColConfig{ + DescribeTable("should fail to render an HTTP exporter when no endpoint is provided", func(testConfig testConfig) { + _, err := testConfig.assembleConfigMapFunction(&oTelColConfig{ Namespace: namespace, NamePrefix: namePrefix, Export: dash0v1alpha1.Export{ @@ -257,10 +283,10 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() { MatchError( ContainSubstring( "no endpoint provided for the HTTP exporter, unable to create the OpenTelemetry collector"))) - }) + }, testConfigs) - It("should fail to render an HTTP exporter when no encoding is provided", func() { - _, err := assembleDaemonSetCollectorConfigMap(&oTelColConfig{ + DescribeTable("should fail to render an HTTP exporter when no encoding is provided", func(testConfig testConfig) { + _, err := testConfig.assembleConfigMapFunction(&oTelColConfig{ Namespace: namespace, NamePrefix: namePrefix, Export: dash0v1alpha1.Export{ @@ -278,10 +304,10 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() { ContainSubstring( "no encoding provided for the HTTP exporter, unable to create the OpenTelemetry collector"))) - }) + }, testConfigs) - It("should render an arbitrary HTTP exporter", func() { - configMap, err := assembleDaemonSetCollectorConfigMap(&oTelColConfig{ + DescribeTable("should render an arbitrary HTTP exporter", func(testConfig testConfig) { + configMap, err := testConfig.assembleConfigMapFunction(&oTelColConfig{ Namespace: namespace, NamePrefix: namePrefix, Export: dash0v1alpha1.Export{ @@ -322,11 +348,11 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() { Expect(headers["Key2"]).To(Equal("Value2")) Expect(otlpHttpExporter["encoding"]).To(Equal("json")) - verifyDownstreamExportersInPipelines(collectorConfig, "otlphttp/json") - }) + verifyDownstreamExportersInPipelines(collectorConfig, testConfig, "otlphttp/json") + }, testConfigs) - It("should render the Dash0 exporter together with a gRPC exporter", func() { - configMap, err := assembleDaemonSetCollectorConfigMap(&oTelColConfig{ + DescribeTable("should render the Dash0 exporter together with a gRPC exporter", func(testConfig testConfig) { + configMap, err := testConfig.assembleConfigMapFunction(&oTelColConfig{ Namespace: namespace, NamePrefix: namePrefix, Export: dash0v1alpha1.Export{ @@ -376,11 +402,11 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() { Expect(headers["Key1"]).To(Equal("Value1")) Expect(httpExporter["encoding"]).To(BeNil()) - verifyDownstreamExportersInPipelines(collectorConfig, "otlp/dash0", "otlp/grpc") - }) + verifyDownstreamExportersInPipelines(collectorConfig, testConfig, "otlp/dash0", "otlp/grpc") + }, testConfigs) - It("should render the Dash0 exporter together with an HTTP exporter", func() { - configMap, err := assembleDaemonSetCollectorConfigMap(&oTelColConfig{ + DescribeTable("should render the Dash0 exporter together with an HTTP exporter", func(testConfig testConfig) { + configMap, err := testConfig.assembleConfigMapFunction(&oTelColConfig{ Namespace: namespace, NamePrefix: namePrefix, Export: dash0v1alpha1.Export{ @@ -431,11 +457,11 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() { Expect(headers["Key1"]).To(Equal("Value1")) Expect(httpExporter["encoding"]).To(Equal("proto")) - verifyDownstreamExportersInPipelines(collectorConfig, "otlp/dash0", "otlphttp/proto") - }) + verifyDownstreamExportersInPipelines(collectorConfig, testConfig, "otlp/dash0", "otlphttp/proto") + }, testConfigs) - It("should render a gRPC exporter together with an HTTP exporter", func() { - configMap, err := assembleDaemonSetCollectorConfigMap(&oTelColConfig{ + DescribeTable("should render a gRPC exporter together with an HTTP exporter", func(testConfig testConfig) { + configMap, err := testConfig.assembleConfigMapFunction(&oTelColConfig{ Namespace: namespace, NamePrefix: namePrefix, Export: dash0v1alpha1.Export{ @@ -488,11 +514,11 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() { Expect(headers["Key2"]).To(Equal("Value2")) Expect(httpExporter["encoding"]).To(Equal("proto")) - verifyDownstreamExportersInPipelines(collectorConfig, "otlp/grpc", "otlphttp/proto") - }) + verifyDownstreamExportersInPipelines(collectorConfig, testConfig, "otlp/grpc", "otlphttp/proto") + }, testConfigs) - It("should render a combination of all three exporter types", func() { - configMap, err := assembleDaemonSetCollectorConfigMap(&oTelColConfig{ + DescribeTable("should render a combination of all three exporter types", func(testConfig testConfig) { + configMap, err := testConfig.assembleConfigMapFunction(&oTelColConfig{ Namespace: namespace, NamePrefix: namePrefix, Export: dash0v1alpha1.Export{ @@ -566,8 +592,15 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() { Expect(headers["Key2"]).To(Equal("Value2")) Expect(httpExporter["encoding"]).To(Equal("json")) - verifyDownstreamExportersInPipelines(collectorConfig, "debug", "otlp/dash0", "otlp/grpc", "otlphttp/json") - }) + verifyDownstreamExportersInPipelines( + collectorConfig, + testConfig, + "debug", + "otlp/dash0", + "otlp/grpc", + "otlphttp/json", + ) + }, testConfigs) }) func parseConfigMapContent(configMap *corev1.ConfigMap) map[string]interface{} { @@ -578,19 +611,17 @@ func parseConfigMapContent(configMap *corev1.ConfigMap) map[string]interface{} { return *configMapParsed } -func verifyDownstreamExportersInPipelines(collectorConfig map[string]interface{}, expectedExporters ...string) { +func verifyDownstreamExportersInPipelines( + collectorConfig map[string]interface{}, + testConfig testConfig, + expectedExporters ...string, +) { pipelines := ((collectorConfig["service"]).(map[string]interface{})["pipelines"]).(map[string]interface{}) Expect(pipelines).ToNot(BeNil()) - tracesPipeline := (pipelines["traces/downstream"]).(map[string]interface{}) - tracesExporters := (tracesPipeline["exporters"]).([]interface{}) - Expect(tracesExporters).To(HaveLen(len(expectedExporters))) - Expect(tracesExporters).To(ContainElements(expectedExporters)) - metricsPipeline := (pipelines["metrics/downstream"]).(map[string]interface{}) - metricsExporters := (metricsPipeline["exporters"]).([]interface{}) - Expect(metricsExporters).To(HaveLen(len(expectedExporters))) - Expect(metricsExporters).To(ContainElements(expectedExporters)) - logsPipeline := (pipelines["logs/downstream"]).(map[string]interface{}) - logsExporters := (logsPipeline["exporters"]).([]interface{}) - Expect(logsExporters).To(ContainElements(expectedExporters)) - Expect(logsExporters).To(HaveLen(len(expectedExporters))) + for _, pipelineName := range testConfig.pipelineNames { + pipeline := (pipelines[pipelineName]).(map[string]interface{}) + exporters := (pipeline["exporters"]).([]interface{}) + Expect(exporters).To(HaveLen(len(expectedExporters))) + Expect(exporters).To(ContainElements(expectedExporters)) + } } diff --git a/internal/backendconnection/otelcolresources/daemonset.config.yaml.template b/internal/backendconnection/otelcolresources/daemonset.config.yaml.template index 0eb81522..b4da1ac7 100644 --- a/internal/backendconnection/otelcolresources/daemonset.config.yaml.template +++ b/internal/backendconnection/otelcolresources/daemonset.config.yaml.template @@ -29,6 +29,19 @@ extensions: processors: batch: {} + + resourcedetection: + detectors: + - env + - system + - eks + - ecs + - ec2 + - gcp + - aks + - azure + - k8snode + filter/only_dash0_monitored_resources: error_mode: ignore traces: @@ -88,6 +101,17 @@ receivers: collection_interval: 20s endpoint: ${env:K8S_NODE_NAME}:10250 {{- if .DevelopmentMode }} +{{- /* +On Docker Desktop, Kind, etc. the API server uses a self-signed cert. Scraping will not work without +insecure_skip_verify=true in these environments: + +kubeletstatsreceiver@v0.106.1/scraper.go:104 call to /stats/summary endpoint failed +{"kind": "receiver", "name": "kubeletstats", "data_type": "metrics", "error": "Get +\"https://docker-desktop:10250/stats/summary\": tls: failed to verify certificate: x509: certificate signed by unknown +authority"} + +Thus we add this parameter when the helm chart is installed with --set operator.developmentMode=true for local tests and +e2e tests. */}} insecure_skip_verify: true {{- end }} @@ -183,6 +207,7 @@ service: - otlp processors: - k8sattributes + - resourcedetection - memory_limiter - batch exporters: @@ -199,6 +224,7 @@ service: - kubeletstats processors: - k8sattributes + - resourcedetection - memory_limiter - batch exporters: @@ -230,6 +256,7 @@ service: receivers: - forward/logs processors: + - resourcedetection - memory_limiter - batch exporters: diff --git a/internal/backendconnection/otelcolresources/deployment.config.yaml.template b/internal/backendconnection/otelcolresources/deployment.config.yaml.template index db04b49b..73bb0902 100644 --- a/internal/backendconnection/otelcolresources/deployment.config.yaml.template +++ b/internal/backendconnection/otelcolresources/deployment.config.yaml.template @@ -2,8 +2,6 @@ exporters: {{- if .DevelopmentMode }} debug: verbosity: detailed -{{- else }} - debug: {} {{- end }} {{- range $i, $exporter := .Exporters }} {{ $exporter.Name }}: @@ -60,6 +58,9 @@ service: - resourcedetection - batch exporters: + {{- if .DevelopmentMode }} + - debug + {{- end }} {{- range $i, $exporter := .Exporters }} - {{ $exporter.Name }} {{- end }} diff --git a/internal/backendconnection/otelcolresources/desired_state.go b/internal/backendconnection/otelcolresources/desired_state.go index bf7d174b..a414f593 100644 --- a/internal/backendconnection/otelcolresources/desired_state.go +++ b/internal/backendconnection/otelcolresources/desired_state.go @@ -49,11 +49,14 @@ const ( probesHttpPort = 13133 - rbacApiVersion = "rbac.authorization.k8s.io/v1" - serviceComponent = "agent-collector" + rbacApiVersion = "rbac.authorization.k8s.io/v1" - openTelemetryCollector = "opentelemetry-collector" - openTelemetryCollectorAgent = "opentelemetry-collector-agent" + openTelemetryCollector = "opentelemetry-collector" + openTelemetryCollectorAgent = "opentelemetry-collector-agent" + openTelemetryCollectorDeploymentNameSuffix = "cluster-metrics-collector" + + daemonSetServiceComponent = "agent-collector" + deploymentServiceComponent = openTelemetryCollectorDeploymentNameSuffix configReloader = "configuration-reloader" @@ -83,7 +86,12 @@ var ( daemonSetMatchLabels = map[string]string{ appKubernetesIoNameKey: appKubernetesIoNameValue, appKubernetesIoInstanceKey: appKubernetesIoInstanceValue, - appKubernetesIoComponentLabelKey: serviceComponent, + appKubernetesIoComponentLabelKey: daemonSetServiceComponent, + } + deploymentMatchLabels = map[string]string{ + appKubernetesIoNameKey: appKubernetesIoNameValue, + appKubernetesIoInstanceKey: appKubernetesIoInstanceValue, + appKubernetesIoComponentLabelKey: deploymentServiceComponent, } nodeNameFieldSpec = corev1.ObjectFieldSelector{ @@ -134,6 +142,8 @@ var ( }, }, } + + deploymentReplicas int32 = 1 ) func assembleDesiredState(config *oTelColConfig) ([]client.Object, error) { @@ -156,16 +166,16 @@ func assembleDesiredState(config *oTelColConfig) ([]client.Object, error) { } desiredState = append(desiredState, collectorDaemonSet) - //deploymentCollectorConfigMap, err := assembleDeploymentCollectorConfigMap(config) - //if err != nil { - // return desiredState, err - //} - //desiredState = append(desiredState, deploymentCollectorConfigMap) - //collectorDeployment, err := assembleCollectorDeployment(config) - //if err != nil { - // return desiredState, err - //} - // desiredState = append(desiredState, collectorDeployment) + deploymentCollectorConfigMap, err := assembleDeploymentCollectorConfigMap(config) + if err != nil { + return desiredState, err + } + desiredState = append(desiredState, deploymentCollectorConfigMap) + collectorDeployment, err := assembleCollectorDeployment(config) + if err != nil { + return desiredState, err + } + desiredState = append(desiredState, collectorDeployment) return desiredState, nil } @@ -275,6 +285,13 @@ func assembleClusterRole(config *oTelColConfig) *rbacv1.ClusterRole { Resources: []string{"replicasets"}, Verbs: []string{"get", "watch", "list"}, }, + { + // Required for the EKS resource detector, to read the config map aws-auth in the namespace kube-system. + APIGroups: []string{""}, + Resources: []string{"configmaps"}, + Verbs: []string{"get"}, + ResourceNames: []string{"kube-system/aws-auth"}, + }, }, } } @@ -334,7 +351,7 @@ func assembleService(config *oTelColConfig) *corev1.Service { Selector: map[string]string{ appKubernetesIoNameKey: appKubernetesIoNameValue, appKubernetesIoInstanceKey: appKubernetesIoInstanceValue, - appKubernetesIoComponentLabelKey: serviceComponent, + appKubernetesIoComponentLabelKey: daemonSetServiceComponent, }, InternalTrafficPolicy: ptr.To(corev1.ServiceInternalTrafficPolicyLocal), }, @@ -342,7 +359,7 @@ func assembleService(config *oTelColConfig) *corev1.Service { } func assembleCollectorDaemonSet(config *oTelColConfig) (*appsv1.DaemonSet, error) { - collectorContainer, err := assembleCollectorContainer(config) + collectorContainer, err := assembleDaemonSetCollectorContainer(config) if err != nil { return nil, err } @@ -478,7 +495,7 @@ func assembleCollectorDaemonSetVolumes( VolumeSource: corev1.VolumeSource{ ConfigMap: &corev1.ConfigMapVolumeSource{ LocalObjectReference: corev1.LocalObjectReference{ - Name: collectorConfigConfigMapName(config.NamePrefix), + Name: daemonSetCollectorConfigConfigMapName(config.NamePrefix), }, Items: configMapItems, }, @@ -515,7 +532,7 @@ func assembleCollectorDaemonSetVolumeMounts() []corev1.VolumeMount { } } -func assembleCollectorDaemonSetEnvVars(config *oTelColConfig) ([]corev1.EnvVar, error) { +func assembleCollectorEnvVars(config *oTelColConfig) ([]corev1.EnvVar, error) { collectorEnv := []corev1.EnvVar{ { Name: "MY_POD_IP", @@ -551,11 +568,11 @@ func assembleCollectorDaemonSetEnvVars(config *oTelColConfig) ([]corev1.EnvVar, return collectorEnv, nil } -func assembleCollectorContainer( +func assembleDaemonSetCollectorContainer( config *oTelColConfig, ) (corev1.Container, error) { collectorVolumeMounts := assembleCollectorDaemonSetVolumeMounts() - collectorEnv, err := assembleCollectorDaemonSetEnvVars(config) + collectorEnv, err := assembleCollectorEnvVars(config) if err != nil { return corev1.Container{}, err } @@ -667,10 +684,126 @@ func assembleFileLogOffsetSynchInitContainer(config *oTelColConfig) corev1.Conta return initFilelogOffsetSynchContainer } -// func assembleCollectorDeployment(config *oTelColConfig) (*appsv1.DaemonSet, error) { -// // TODO implement the deployment assemblage -// return nil, nil -// } +func assembleCollectorDeployment(config *oTelColConfig) (*appsv1.Deployment, error) { + collectorContainer, err := assembleDeploymentCollectorContainer(config) + if err != nil { + return nil, err + } + + collectorDeployment := &appsv1.Deployment{ + TypeMeta: metav1.TypeMeta{ + Kind: "Deployment", + APIVersion: "apps/v1", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: name(config.NamePrefix, openTelemetryCollectorDeploymentNameSuffix), + Namespace: config.Namespace, + Labels: labels(true), + }, + Spec: appsv1.DeploymentSpec{ + Replicas: &deploymentReplicas, + Selector: &metav1.LabelSelector{ + MatchLabels: deploymentMatchLabels, + }, + Strategy: appsv1.DeploymentStrategy{ + Type: appsv1.RollingUpdateDeploymentStrategyType, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: deploymentMatchLabels, + }, + Spec: corev1.PodSpec{ + ServiceAccountName: serviceAccountName(config.NamePrefix), + SecurityContext: &corev1.PodSecurityContext{}, + // This setting is required to enable the configuration reloader process to send Unix signals to the + // collector process. + ShareProcessNamespace: &util.True, + Containers: []corev1.Container{ + collectorContainer, + assembleConfigurationReloaderContainer(config), + }, + Volumes: assembleCollectorDeploymentVolumes(config, configMapItems), + HostNetwork: false, + }, + }, + }, + } + + if config.SelfMonitoringConfiguration.Enabled { + err = selfmonitoring.EnableSelfMonitoringInCollectorDeployment( + collectorDeployment, + config.SelfMonitoringConfiguration, + config.Images.GetOperatorVersion(), + config.DevelopmentMode, + ) + if err != nil { + return nil, err + } + } + + return collectorDeployment, nil +} + +func assembleCollectorDeploymentVolumes( + config *oTelColConfig, + configMapItems []corev1.KeyToPath, +) []corev1.Volume { + pidFileVolumeSizeLimit := resource.MustParse("1M") + return []corev1.Volume{ + { + Name: "opentelemetry-collector-configmap", + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: deploymentCollectorConfigConfigMapName(config.NamePrefix), + }, + Items: configMapItems, + }, + }, + }, + { + Name: pidFileVolumeName, + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{ + SizeLimit: &pidFileVolumeSizeLimit, + }, + }, + }, + } +} + +func assembleDeploymentCollectorContainer( + config *oTelColConfig, +) (corev1.Container, error) { + collectorVolumeMounts := []corev1.VolumeMount{ + collectorConfigVolume, + collectorPidFileMountRW, + } + collectorEnv, err := assembleCollectorEnvVars(config) + if err != nil { + return corev1.Container{}, err + } + + collectorContainer := corev1.Container{ + Name: openTelemetryCollector, + Args: []string{"--config=file:" + collectorConfigurationFilePath}, + SecurityContext: &corev1.SecurityContext{}, + Image: config.Images.CollectorImage, + Env: collectorEnv, + LivenessProbe: &collectorProbe, + ReadinessProbe: &collectorProbe, + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + corev1.ResourceMemory: resource.MustParse("300Mi"), + }, + }, + VolumeMounts: collectorVolumeMounts, + } + if config.Images.CollectorImagePullPolicy != "" { + collectorContainer.ImagePullPolicy = config.Images.CollectorImagePullPolicy + } + return collectorContainer, nil +} func serviceAccountName(namePrefix string) string { return name(namePrefix, openTelemetryCollector) @@ -680,10 +813,14 @@ func filelogReceiverOffsetsConfigMapName(namePrefix string) string { return name(namePrefix, "filelogoffsets") } -func collectorConfigConfigMapName(namePrefix string) string { +func daemonSetCollectorConfigConfigMapName(namePrefix string) string { return name(namePrefix, openTelemetryCollectorAgent) } +func deploymentCollectorConfigConfigMapName(namePrefix string) string { + return name(namePrefix, openTelemetryCollectorDeploymentNameSuffix) +} + func clusterRoleName(namePrefix string) string { return name(namePrefix, openTelemetryCollector) } @@ -694,7 +831,7 @@ func roleName(namePrefix string) string { func serviceLabels() map[string]string { lbls := labels(false) - lbls[appKubernetesIoComponentLabelKey] = serviceComponent + lbls[appKubernetesIoComponentLabelKey] = daemonSetServiceComponent return lbls } diff --git a/internal/backendconnection/otelcolresources/desired_state_test.go b/internal/backendconnection/otelcolresources/desired_state_test.go index 55cb42f6..504b14c0 100644 --- a/internal/backendconnection/otelcolresources/desired_state_test.go +++ b/internal/backendconnection/otelcolresources/desired_state_test.go @@ -53,7 +53,7 @@ var _ = Describe("The desired state of the OpenTelemetry Collector resources", f }) Expect(err).ToNot(HaveOccurred()) - Expect(desiredState).To(HaveLen(9)) + Expect(desiredState).To(HaveLen(11)) collectorConfigConfigMapContent := getCollectorConfigConfigMapContent(desiredState) Expect(collectorConfigConfigMapContent).To(ContainSubstring(fmt.Sprintf("endpoint: %s", EndpointDash0TestQuoted))) Expect(collectorConfigConfigMapContent).NotTo(ContainSubstring("file/traces")) @@ -116,6 +116,49 @@ var _ = Describe("The desired state of the OpenTelemetry Collector resources", f ContainElement(MatchVolumeMount("opentelemetry-collector-configmap", "/etc/otelcol/conf"))) Expect(configReloaderContainer.VolumeMounts).To( ContainElement(MatchVolumeMount("opentelemetry-collector-pidfile", "/etc/otelcol/run"))) + + deployment := getDeployment(desiredState) + Expect(deployment).NotTo(BeNil()) + Expect(deployment.Labels["dash0.com/enable"]).To(Equal("false")) + podSpec = deployment.Spec.Template.Spec + + Expect(podSpec.Volumes).To(HaveLen(2)) + configMapVolume = findVolumeByName(podSpec.Volumes, "opentelemetry-collector-configmap") + Expect(configMapVolume).NotTo(BeNil()) + Expect(configMapVolume.VolumeSource.ConfigMap.LocalObjectReference.Name). + To(Equal("unit-test-cluster-metrics-collector")) + Expect(findVolumeMountByName(findContainerByName(podSpec.Containers, "opentelemetry-collector").VolumeMounts, "opentelemetry-collector-configmap")).NotTo(BeNil()) + Expect(findVolumeMountByName(findContainerByName(podSpec.Containers, "configuration-reloader").VolumeMounts, "opentelemetry-collector-configmap")).NotTo(BeNil()) + + Expect(podSpec.Containers).To(HaveLen(2)) + + collectorContainer = podSpec.Containers[0] + Expect(collectorContainer).NotTo(BeNil()) + Expect(collectorContainer.Image).To(Equal(CollectorImageTest)) + Expect(collectorContainer.ImagePullPolicy).To(Equal(corev1.PullAlways)) + collectorContainerArgs = collectorContainer.Args + Expect(collectorContainerArgs).To(HaveLen(1)) + Expect(collectorContainerArgs[0]).To(Equal("--config=file:/etc/otelcol/conf/config.yaml")) + Expect(collectorContainer.VolumeMounts).To(HaveLen(2)) + Expect(collectorContainer.VolumeMounts).To( + ContainElement(MatchVolumeMount("opentelemetry-collector-configmap", "/etc/otelcol/conf"))) + Expect(collectorContainer.VolumeMounts).To( + ContainElement(MatchVolumeMount("opentelemetry-collector-pidfile", "/etc/otelcol/run"))) + + configReloaderContainer = podSpec.Containers[1] + Expect(configReloaderContainer).NotTo(BeNil()) + Expect(configReloaderContainer.Image).To(Equal(ConfigurationReloaderImageTest)) + Expect(configReloaderContainer.ImagePullPolicy).To(Equal(corev1.PullAlways)) + configReloaderContainerArgs = configReloaderContainer.Args + Expect(configReloaderContainerArgs).To(HaveLen(2)) + Expect(configReloaderContainerArgs[0]).To(Equal("--pidfile=/etc/otelcol/run/pid.file")) + Expect(configReloaderContainerArgs[1]).To(Equal("/etc/otelcol/conf/config.yaml")) + Expect(configReloaderContainer.VolumeMounts).To(HaveLen(2)) + Expect(configReloaderContainer.VolumeMounts).To( + ContainElement(MatchVolumeMount("opentelemetry-collector-configmap", "/etc/otelcol/conf"))) + Expect(configReloaderContainer.VolumeMounts).To( + ContainElement(MatchVolumeMount("opentelemetry-collector-pidfile", "/etc/otelcol/run"))) + }) It("should use the authorization token directly if provided", func() { @@ -254,6 +297,15 @@ func getDaemonSet(desiredState []client.Object) *appsv1.DaemonSet { return nil } +func getDeployment(desiredState []client.Object) *appsv1.Deployment { + for _, object := range desiredState { + if d, ok := object.(*appsv1.Deployment); ok { + return d + } + } + return nil +} + func findContainerByName(objects []corev1.Container, name string) *corev1.Container { for _, object := range objects { if object.Name == name { diff --git a/internal/backendconnection/otelcolresources/otelcol_resources_test.go b/internal/backendconnection/otelcolresources/otelcol_resources_test.go index 8746a9cb..dcf0315d 100644 --- a/internal/backendconnection/otelcolresources/otelcol_resources_test.go +++ b/internal/backendconnection/otelcolresources/otelcol_resources_test.go @@ -130,7 +130,10 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() Describe("when updating all OpenTelemetry collector resources", func() { It("should update the resources", func() { - for _, configMapName := range []string{expectedCollectorConfigConfigMapName, expectedFileOffsetsConfigMapName} { + for _, configMapName := range []string{ + expectedCollectorConfigConfigMapName, + expectedFileOffsetsConfigMapName, + } { Expect(k8sClient.Create(ctx, &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: configMapName, @@ -148,7 +151,7 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() })).To(Succeed()) } - resourcesHaveBeenCreated, resourcesHaveBeenUpdated, err := + _, resourcesHaveBeenUpdated, err := oTelColResourceManager.CreateOrUpdateOpenTelemetryCollectorResources( ctx, Dash0OperatorNamespace, @@ -158,7 +161,6 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() &logger, ) Expect(err).ToNot(HaveOccurred()) - Expect(resourcesHaveBeenCreated).To(BeFalse()) Expect(resourcesHaveBeenUpdated).To(BeTrue()) VerifyCollectorResourcesExist(ctx, k8sClient, Dash0OperatorNamespace) diff --git a/internal/dash0/selfmonitoring/self_monitoring.go b/internal/dash0/selfmonitoring/self_monitoring.go index 6808703f..2768bc25 100644 --- a/internal/dash0/selfmonitoring/self_monitoring.go +++ b/internal/dash0/selfmonitoring/self_monitoring.go @@ -177,18 +177,36 @@ func (c *cannotFindContainerByNameError) Error() string { return fmt.Sprintf("cannot find the container named '%v' in the %v %v/%v", c.ContainerName, c.WorkloadGKV.Kind, c.WorkloadNamespace, c.WorkloadName) } -func DisableSelfMonitoringInCollectorDaemonSet(collectorDemonSet *appsv1.DaemonSet) { - for _, container := range collectorDemonSet.Spec.Template.Spec.InitContainers { - disableSelfMonitoringInContainer(&container) - } +func EnableSelfMonitoringInCollectorDaemonSet( + collectorDaemonSet *appsv1.DaemonSet, + selfMonitoringConfiguration SelfMonitoringConfiguration, + operatorVersion string, + developmentMode bool, +) error { + return enableSelfMonitoringInCollector( + collectorDaemonSet.Spec.Template.Spec.Containers, + selfMonitoringConfiguration, + operatorVersion, + developmentMode, + ) +} - for _, container := range collectorDemonSet.Spec.Template.Spec.Containers { - disableSelfMonitoringInContainer(&container) - } +func EnableSelfMonitoringInCollectorDeployment( + collectorDeployment *appsv1.Deployment, + selfMonitoringConfiguration SelfMonitoringConfiguration, + operatorVersion string, + developmentMode bool, +) error { + return enableSelfMonitoringInCollector( + collectorDeployment.Spec.Template.Spec.Containers, + selfMonitoringConfiguration, + operatorVersion, + developmentMode, + ) } -func EnableSelfMonitoringInCollectorDaemonSet( - collectorDaemonSet *appsv1.DaemonSet, +func enableSelfMonitoringInCollector( + collectorContainers []corev1.Container, selfMonitoringConfiguration SelfMonitoringConfiguration, operatorVersion string, developmentMode bool, @@ -221,7 +239,7 @@ func EnableSelfMonitoringInCollectorDaemonSet( // collectorDaemonSet.Spec.Template.Spec.InitContainers[i] = container // } - for i, container := range collectorDaemonSet.Spec.Template.Spec.Containers { + for i, container := range collectorContainers { enableSelfMonitoringInContainer( &container, selfMonitoringExport, @@ -229,7 +247,7 @@ func EnableSelfMonitoringInCollectorDaemonSet( operatorVersion, developmentMode, ) - collectorDaemonSet.Spec.Template.Spec.Containers[i] = container + collectorContainers[i] = container } return nil diff --git a/test-resources/bin/test-cleanup.sh b/test-resources/bin/test-cleanup.sh index b94c4056..4f52aa30 100755 --- a/test-resources/bin/test-cleanup.sh +++ b/test-resources/bin/test-cleanup.sh @@ -14,7 +14,9 @@ source test-resources/bin/util load_env_file verify_kubectx -kubectl delete -n ${target_namespace} -f test-resources/customresources/dash0monitoring/dash0monitoring.secret.yaml || true +kubectl delete -n ${target_namespace} -f test-resources/customresources/dash0monitoring/dash0monitoring.secret.yaml --wait=false || true +sleep 1 +kubectl patch -f test-resources/customresources/dash0monitoring/dash0monitoring.secret.yaml -p '{"metadata":{"finalizers":null}}' --type=merge || true kubectl delete -f test-resources/customresources/dash0operatorconfiguration/dash0operatorconfiguration.token.yaml || true make undeploy-via-helm || true diff --git a/test-resources/bin/util b/test-resources/bin/util index aab612cf..d617076f 100644 --- a/test-resources/bin/util +++ b/test-resources/bin/util @@ -30,8 +30,8 @@ build_all_images() { } deploy_via_helm() { - helm_install_command="helm install --namespace dash0-system --set operator.developmentMode=true" - + helm_install_command="helm install --namespace dash0-system" + helm_install_command+=" --set operator.developmentMode=true" if ! has_been_set_to_empty_string "CONTROLLER_IMG_REPOSITORY"; then helm_install_command+=" --set operator.image.repository=${CONTROLLER_IMG_REPOSITORY:-operator-controller}" fi diff --git a/test/util/collector.go b/test/util/collector.go index 2ab99e79..af6a27ef 100644 --- a/test/util/collector.go +++ b/test/util/collector.go @@ -16,8 +16,9 @@ import ( ) const ( - ExpectedConfigMapName = "unit-test-opentelemetry-collector-agent" - ExpectedDaemonSetName = "unit-test-opentelemetry-collector-agent" + ExpectedConfigMapName = "unit-test-opentelemetry-collector-agent" + ExpectedDaemonSetName = "unit-test-opentelemetry-collector-agent" + ExpectedDeploymentName = "unit-test-cluster-metrics-collector" ) func VerifyCollectorResourcesExist( @@ -96,6 +97,7 @@ func VerifyCollectorResourcesDoNotExist( ) { verifyCollectorConfigMapDoesNotExist(ctx, k8sClient, operatorNamespace) verifyCollectorDaemonSetDoesNotExist(ctx, k8sClient, operatorNamespace) + verifyCollectorDeploymentDoesNotExist(ctx, k8sClient, operatorNamespace) } func verifyCollectorConfigMapDoesNotExist( @@ -128,6 +130,21 @@ func verifyCollectorDaemonSetDoesNotExist( ) } +func verifyCollectorDeploymentDoesNotExist( + ctx context.Context, + k8sClient client.Client, + operatorNamespace string, +) { + verifyResourceDoesNotExist( + ctx, + k8sClient, + operatorNamespace, + ExpectedDaemonSetName, + &appsv1.Deployment{}, + "daemon set", + ) +} + func verifyResourceDoesNotExist( ctx context.Context, k8sClient client.Client,