From f947c0d42851f17a6617929917f6d09b33100c0a Mon Sep 17 00:00:00 2001 From: Florian Bacher Date: Mon, 18 Nov 2024 20:22:41 +0100 Subject: [PATCH] [receiver/k8scluster] add support for observing resources for a specific namespace (#35727) #### Description This PR extends the k8scluster receiver with an option to limit the observed resources to a specific namespace. #### Link to tracking issue Fixes #9401 #### Testing added unit and e2e tests #### Documentation Added section about how to make use of Roles and RoleBindings instead of ClusterRoles and ClusterRoleBindings --------- Signed-off-by: Florian Bacher Co-authored-by: Tyler Helmuth <12352919+TylerHelmuth@users.noreply.github.com> Co-authored-by: Evan Bradley <11745660+evan-bradley@users.noreply.github.com> --- .chloggen/k8sclusterreceiver-namespaced.yaml | 27 + receiver/k8sclusterreceiver/README.md | 92 +++ receiver/k8sclusterreceiver/config.go | 6 + receiver/k8sclusterreceiver/e2e_test.go | 194 ++++-- receiver/k8sclusterreceiver/receiver_test.go | 64 +- .../collector/clusterrole.yaml | 0 .../collector/clusterrolebinding.yaml | 0 .../collector/configmap.yaml | 0 .../collector/deployment.yaml | 0 .../collector/service.yaml | 0 .../collector/serviceaccount.yaml | 0 .../e2e/{ => cluster-scoped}/expected.yaml | 0 .../testobjects/cronjob.yaml | 2 +- .../{ => cluster-scoped}/testobjects/hpa.yaml | 0 .../{ => cluster-scoped}/testobjects/job.yaml | 0 .../testobjects/statefulset.yaml | 0 .../namespace-scoped/collector/configmap.yaml | 35 + .../collector/deployment.yaml | 59 ++ .../e2e/namespace-scoped/collector/role.yaml | 60 ++ .../collector/rolebinding.yaml | 13 + .../namespace-scoped/collector/service.yaml | 16 + .../collector/serviceaccount.yaml | 5 + .../e2e/namespace-scoped/expected.yaml | 628 ++++++++++++++++++ .../testobjects/1_namespace.yaml | 4 + .../namespace-scoped/testobjects/cronjob.yaml | 21 + .../e2e/namespace-scoped/testobjects/hpa.yaml | 13 + .../e2e/namespace-scoped/testobjects/job.yaml | 17 + .../testobjects/statefulset.yaml | 29 + receiver/k8sclusterreceiver/watcher.go | 30 +- receiver/k8sclusterreceiver/watcher_test.go | 4 +- 30 files changed, 1256 insertions(+), 63 deletions(-) create mode 100644 .chloggen/k8sclusterreceiver-namespaced.yaml rename receiver/k8sclusterreceiver/testdata/e2e/{ => cluster-scoped}/collector/clusterrole.yaml (100%) rename receiver/k8sclusterreceiver/testdata/e2e/{ => cluster-scoped}/collector/clusterrolebinding.yaml (100%) rename receiver/k8sclusterreceiver/testdata/e2e/{ => cluster-scoped}/collector/configmap.yaml (100%) rename receiver/k8sclusterreceiver/testdata/e2e/{ => cluster-scoped}/collector/deployment.yaml (100%) rename receiver/k8sclusterreceiver/testdata/e2e/{ => cluster-scoped}/collector/service.yaml (100%) rename receiver/k8sclusterreceiver/testdata/e2e/{ => cluster-scoped}/collector/serviceaccount.yaml (100%) rename receiver/k8sclusterreceiver/testdata/e2e/{ => cluster-scoped}/expected.yaml (100%) rename receiver/k8sclusterreceiver/testdata/e2e/{ => cluster-scoped}/testobjects/cronjob.yaml (100%) rename receiver/k8sclusterreceiver/testdata/e2e/{ => cluster-scoped}/testobjects/hpa.yaml (100%) rename receiver/k8sclusterreceiver/testdata/e2e/{ => cluster-scoped}/testobjects/job.yaml (100%) rename receiver/k8sclusterreceiver/testdata/e2e/{ => cluster-scoped}/testobjects/statefulset.yaml (100%) create mode 100644 receiver/k8sclusterreceiver/testdata/e2e/namespace-scoped/collector/configmap.yaml create mode 100644 receiver/k8sclusterreceiver/testdata/e2e/namespace-scoped/collector/deployment.yaml create mode 100644 receiver/k8sclusterreceiver/testdata/e2e/namespace-scoped/collector/role.yaml create mode 100644 receiver/k8sclusterreceiver/testdata/e2e/namespace-scoped/collector/rolebinding.yaml create mode 100644 receiver/k8sclusterreceiver/testdata/e2e/namespace-scoped/collector/service.yaml create mode 100644 receiver/k8sclusterreceiver/testdata/e2e/namespace-scoped/collector/serviceaccount.yaml create mode 100644 receiver/k8sclusterreceiver/testdata/e2e/namespace-scoped/expected.yaml create mode 100644 receiver/k8sclusterreceiver/testdata/e2e/namespace-scoped/testobjects/1_namespace.yaml create mode 100644 receiver/k8sclusterreceiver/testdata/e2e/namespace-scoped/testobjects/cronjob.yaml create mode 100644 receiver/k8sclusterreceiver/testdata/e2e/namespace-scoped/testobjects/hpa.yaml create mode 100644 receiver/k8sclusterreceiver/testdata/e2e/namespace-scoped/testobjects/job.yaml create mode 100644 receiver/k8sclusterreceiver/testdata/e2e/namespace-scoped/testobjects/statefulset.yaml diff --git a/.chloggen/k8sclusterreceiver-namespaced.yaml b/.chloggen/k8sclusterreceiver-namespaced.yaml new file mode 100644 index 000000000000..3eb2ec73fd7d --- /dev/null +++ b/.chloggen/k8sclusterreceiver-namespaced.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: k8sclusterreceiver + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Add support for limiting observed resources to a specific namespace. + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [9401] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: This change allows to make use of this receiver with `Roles`/`RoleBindings`, as opposed to giving the collector cluster-wide read access. + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] diff --git a/receiver/k8sclusterreceiver/README.md b/receiver/k8sclusterreceiver/README.md index 2e84d8cc66e1..2ce895c5fca8 100644 --- a/receiver/k8sclusterreceiver/README.md +++ b/receiver/k8sclusterreceiver/README.md @@ -62,6 +62,7 @@ The following allocatable resource types are available. - storage - `metrics`: Allows to enable/disable metrics. - `resource_attributes`: Allows to enable/disable resource attributes. +- `namespace`: Allows to observe resources for a particular namespace only. If this option is set to a non-empty string, `Nodes`, `Namespaces` and `ClusterResourceQuotas` will not be observed. Example: @@ -273,6 +274,97 @@ subjects: EOF ``` +As an alternative to setting up a `ClusterRole`/`ClusterRoleBinding`, it is also possible to limit the observed resources to a +particular namespace by setting the `namespace` option of the receiver. This allows the collector to only rely on `Roles`/`RoleBindings`, +instead of granting the collector cluster-wide read access to resources. +Note however, that in this case the following resources will not be observed by the `k8sclusterreceiver`: + +- `Nodes` +- `Namespaces` +- `ClusterResourceQuotas` + +To use this approach, use the commands below to create the required `Role` and `RoleBinding`: + +```bash +< 0, in which case you can conclude there were restarts in the recent past, and not try and analyze the value beyond that. + gauge: + dataPoints: + - asInt: "0" + timeUnixNano: "1686772769034865545" + name: k8s.container.restarts + unit: "{restart}" + - description: Whether a container has passed its readiness probe (0 for no, 1 for yes) + gauge: + dataPoints: + - asInt: "1" + timeUnixNano: "1686772769034865545" + name: k8s.container.ready + unit: "" + - description: Resource requested for the container. See https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.23/#resourcerequirements-v1-core for details + gauge: + dataPoints: + - asInt: "268435456" + timeUnixNano: "1686772769034865545" + name: k8s.container.memory_request + unit: "By" + - description: Resource requested for the container. See https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.23/#resourcerequirements-v1-core for details + gauge: + dataPoints: + - asDouble: 0.128 + timeUnixNano: "1686772769034865545" + name: k8s.container.cpu_request + unit: "{cpu}" + - description: Maximum resource limit set for the container. See https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.23/#resourcerequirements-v1-core for details + gauge: + dataPoints: + - asDouble: 0.128 + timeUnixNano: "1686772769034865545" + name: k8s.container.cpu_limit + unit: "{cpu}" + - description: Maximum resource limit set for the container. See https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.23/#resourcerequirements-v1-core for details + gauge: + dataPoints: + - asInt: "268435456" + timeUnixNano: "1686772769034865545" + name: k8s.container.memory_limit + unit: "By" + scope: + name: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver + version: latest + - resource: + attributes: + - key: k8s.cronjob.name + value: + stringValue: test-k8scluster-receiver-cronjob + - key: k8s.cronjob.uid + value: + stringValue: 6a3c3e99-5db1-481f-9d5d-782ae9de9f58 + - key: k8s.namespace.name + value: + stringValue: my-namespace + schemaUrl: https://opentelemetry.io/schemas/1.18.0 + scopeMetrics: + - metrics: + - description: The number of actively running jobs for a cronjob + gauge: + dataPoints: + - asInt: "2" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: k8s.cronjob.active_jobs + unit: '{job}' + scope: + name: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver + version: latest + - resource: + attributes: + - key: k8s.job.name + value: + stringValue: test-k8scluster-receiver-cronjob-28839770 + - key: k8s.job.uid + value: + stringValue: a38da134-af71-4bc1-a585-c9e0342f9aab + - key: k8s.namespace.name + value: + stringValue: my-namespace + schemaUrl: https://opentelemetry.io/schemas/1.18.0 + scopeMetrics: + - metrics: + - description: The number of actively running pods for a job + gauge: + dataPoints: + - asInt: "1" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: k8s.job.active_pods + unit: '{pod}' + - description: The desired number of successfully finished pods the job should be run with + gauge: + dataPoints: + - asInt: "1" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: k8s.job.desired_successful_pods + unit: '{pod}' + - description: The number of pods which reached phase Failed for a job + gauge: + dataPoints: + - asInt: "0" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: k8s.job.failed_pods + unit: '{pod}' + - description: The max desired number of pods the job should run at any given time + gauge: + dataPoints: + - asInt: "1" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: k8s.job.max_parallel_pods + unit: '{pod}' + - description: The number of pods which reached phase Succeeded for a job + gauge: + dataPoints: + - asInt: "0" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: k8s.job.successful_pods + unit: '{pod}' + scope: + name: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver + version: latest + - resource: + attributes: + - key: k8s.namespace.name + value: + stringValue: my-namespace + - key: k8s.node.name + value: + stringValue: kind-control-plane + - key: k8s.pod.name + value: + stringValue: test-k8scluster-receiver-cronjob-28839770-9pp7g + - key: k8s.pod.uid + value: + stringValue: e388cfa8-06c3-47b6-a7a6-113d7cdda849 + schemaUrl: https://opentelemetry.io/schemas/1.18.0 + scopeMetrics: + - metrics: + - description: Current phase of the pod (1 - Pending, 2 - Running, 3 - Succeeded, 4 - Failed, 5 - Unknown) + gauge: + dataPoints: + - asInt: "2" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: k8s.pod.phase + scope: + name: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver + version: latest + - resource: + attributes: + - key: container.id + value: + stringValue: 10c9bec31ac94fc58e65ce5ed809455727eee9daae8ea80668990e848a7e7da0 + - key: container.image.name + value: + stringValue: docker.io/library/alpine + - key: container.image.tag + value: + stringValue: latest + - key: k8s.container.name + value: + stringValue: alpine + - key: k8s.namespace.name + value: + stringValue: my-namespace + - key: k8s.node.name + value: + stringValue: kind-control-plane + - key: k8s.pod.name + value: + stringValue: test-k8scluster-receiver-cronjob-28839771-llccr + - key: k8s.pod.uid + value: + stringValue: 0c2351b3-842c-4632-95c2-e7b061128a98 + schemaUrl: https://opentelemetry.io/schemas/1.18.0 + scopeMetrics: + - metrics: + - description: Whether a container has passed its readiness probe (0 for no, 1 for yes) + gauge: + dataPoints: + - asInt: "1" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: k8s.container.ready + - description: How many times the container has restarted in the recent past. This value is pulled directly from the K8s API and the value can go indefinitely high and be reset to 0 at any time depending on how your kubelet is configured to prune dead containers. It is best to not depend too much on the exact value but rather look at it as either == 0, in which case you can conclude there were no restarts in the recent past, or > 0, in which case you can conclude there were restarts in the recent past, and not try and analyze the value beyond that. + gauge: + dataPoints: + - asInt: "0" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: k8s.container.restarts + unit: '{restart}' + scope: + name: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver + version: latest + - resource: + attributes: + - key: k8s.job.name + value: + stringValue: test-k8scluster-receiver-job + - key: k8s.job.uid + value: + stringValue: b7ecbf9e-8e1a-4d70-beda-aab183645382 + - key: k8s.namespace.name + value: + stringValue: my-namespace + schemaUrl: https://opentelemetry.io/schemas/1.18.0 + scopeMetrics: + - metrics: + - description: The number of actively running pods for a job + gauge: + dataPoints: + - asInt: "1" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: k8s.job.active_pods + unit: '{pod}' + - description: The desired number of successfully finished pods the job should be run with + gauge: + dataPoints: + - asInt: "1" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: k8s.job.desired_successful_pods + unit: '{pod}' + - description: The number of pods which reached phase Failed for a job + gauge: + dataPoints: + - asInt: "0" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: k8s.job.failed_pods + unit: '{pod}' + - description: The max desired number of pods the job should run at any given time + gauge: + dataPoints: + - asInt: "1" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: k8s.job.max_parallel_pods + unit: '{pod}' + - description: The number of pods which reached phase Succeeded for a job + gauge: + dataPoints: + - asInt: "0" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: k8s.job.successful_pods + unit: '{pod}' + scope: + name: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver + version: latest + + - resource: + attributes: + - key: k8s.namespace.name + value: + stringValue: my-namespace + - key: k8s.node.name + value: + stringValue: kind-control-plane + - key: k8s.pod.name + value: + stringValue: test-k8scluster-receiver-job-bzjrh + - key: k8s.pod.uid + value: + stringValue: 7e8bdace-4bce-4750-bd8c-d7359bb3e56b + schemaUrl: https://opentelemetry.io/schemas/1.18.0 + scopeMetrics: + - metrics: + - description: Current phase of the pod (1 - Pending, 2 - Running, 3 - Succeeded, 4 - Failed, 5 - Unknown) + gauge: + dataPoints: + - asInt: "2" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: k8s.pod.phase + scope: + name: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver + version: latest + + - resource: + attributes: + - key: container.id + value: + stringValue: 567cd0ad83d68987dfb4dbffd056732b25bd2fc89e912605c16a5d1a4cd2b54c + - key: container.image.name + value: + stringValue: docker.io/library/alpine + - key: container.image.tag + value: + stringValue: latest + - key: k8s.container.name + value: + stringValue: alpine + - key: k8s.namespace.name + value: + stringValue: my-namespace + - key: k8s.node.name + value: + stringValue: kind-control-plane + - key: k8s.pod.name + value: + stringValue: test-k8scluster-receiver-job-bzjrh + - key: k8s.pod.uid + value: + stringValue: 7e8bdace-4bce-4750-bd8c-d7359bb3e56b + schemaUrl: https://opentelemetry.io/schemas/1.18.0 + scopeMetrics: + - metrics: + - description: Whether a container has passed its readiness probe (0 for no, 1 for yes) + gauge: + dataPoints: + - asInt: "1" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: k8s.container.ready + - description: How many times the container has restarted in the recent past. This value is pulled directly from the K8s API and the value can go indefinitely high and be reset to 0 at any time depending on how your kubelet is configured to prune dead containers. It is best to not depend too much on the exact value but rather look at it as either == 0, in which case you can conclude there were no restarts in the recent past, or > 0, in which case you can conclude there were restarts in the recent past, and not try and analyze the value beyond that. + gauge: + dataPoints: + - asInt: "0" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: k8s.container.restarts + unit: '{restart}' + scope: + name: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver + version: latest + + - resource: + attributes: + - key: k8s.namespace.name + value: + stringValue: my-namespace + - key: k8s.statefulset.name + value: + stringValue: test-k8scluster-receiver-statefulset + - key: k8s.statefulset.uid + value: + stringValue: 5ceb9f10-fc64-4d70-b6f8-228b4a0cfd3c + schemaUrl: https://opentelemetry.io/schemas/1.18.0 + scopeMetrics: + - metrics: + - description: The number of pods created by the StatefulSet controller from the StatefulSet version + gauge: + dataPoints: + - asInt: "1" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: k8s.statefulset.current_pods + unit: '{pod}' + - description: Number of desired pods in the stateful set (the `spec.replicas` field) + gauge: + dataPoints: + - asInt: "1" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: k8s.statefulset.desired_pods + unit: '{pod}' + - description: Number of pods created by the stateful set that have the `Ready` condition + gauge: + dataPoints: + - asInt: "1" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: k8s.statefulset.ready_pods + unit: '{pod}' + - description: Number of pods created by the StatefulSet controller from the StatefulSet version + gauge: + dataPoints: + - asInt: "1" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: k8s.statefulset.updated_pods + unit: '{pod}' + scope: + name: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver + version: latest + + - resource: + attributes: + - key: k8s.namespace.name + value: + stringValue: my-namespace + - key: k8s.node.name + value: + stringValue: kind-control-plane + - key: k8s.pod.name + value: + stringValue: test-k8scluster-receiver-statefulset-0 + - key: k8s.pod.uid + value: + stringValue: f1ea5486-77b7-41c6-a3be-d03650011801 + schemaUrl: https://opentelemetry.io/schemas/1.18.0 + scopeMetrics: + - metrics: + - description: Current phase of the pod (1 - Pending, 2 - Running, 3 - Succeeded, 4 - Failed, 5 - Unknown) + gauge: + dataPoints: + - asInt: "2" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: k8s.pod.phase + scope: + name: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver + version: latest + - resource: + attributes: + - key: container.id + value: + stringValue: 2cb1cb272a301a00f50020c3e4751bfa9a281496a6dc35f02a5546451e894e93 + - key: container.image.name + value: + stringValue: docker.io/library/nginx + - key: container.image.tag + value: + stringValue: latest + - key: k8s.container.name + value: + stringValue: nginx + - key: k8s.namespace.name + value: + stringValue: my-namespace + - key: k8s.node.name + value: + stringValue: kind-control-plane + - key: k8s.pod.name + value: + stringValue: test-k8scluster-receiver-statefulset-0 + - key: k8s.pod.uid + value: + stringValue: f1ea5486-77b7-41c6-a3be-d03650011801 + schemaUrl: https://opentelemetry.io/schemas/1.18.0 + scopeMetrics: + - metrics: + - description: Whether a container has passed its readiness probe (0 for no, 1 for yes) + gauge: + dataPoints: + - asInt: "1" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: k8s.container.ready + - description: How many times the container has restarted in the recent past. This value is pulled directly from the K8s API and the value can go indefinitely high and be reset to 0 at any time depending on how your kubelet is configured to prune dead containers. It is best to not depend too much on the exact value but rather look at it as either == 0, in which case you can conclude there were no restarts in the recent past, or > 0, in which case you can conclude there were restarts in the recent past, and not try and analyze the value beyond that. + gauge: + dataPoints: + - asInt: "0" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: k8s.container.restarts + unit: '{restart}' + scope: + name: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver + version: latest + + - resource: + attributes: + - key: k8s.hpa.name + value: + stringValue: test-k8scluster-receiver-hpa + - key: k8s.hpa.uid + value: + stringValue: 963572dc-4663-4fb2-930a-e143320a03c3 + - key: k8s.namespace.name + value: + stringValue: my-namespace + schemaUrl: https://opentelemetry.io/schemas/1.18.0 + scopeMetrics: + - metrics: + - description: Current number of pod replicas managed by this autoscaler. + gauge: + dataPoints: + - asInt: "1" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: k8s.hpa.current_replicas + unit: '{pod}' + - description: Desired number of pod replicas managed by this autoscaler. + gauge: + dataPoints: + - asInt: "0" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: k8s.hpa.desired_replicas + unit: '{pod}' + - description: Maximum number of replicas to which the autoscaler can scale up. + gauge: + dataPoints: + - asInt: "1" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: k8s.hpa.max_replicas + unit: '{pod}' + - description: Minimum number of replicas to which the autoscaler can scale up. + gauge: + dataPoints: + - asInt: "1" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: k8s.hpa.min_replicas + unit: '{pod}' + scope: + name: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver + version: latest \ No newline at end of file diff --git a/receiver/k8sclusterreceiver/testdata/e2e/namespace-scoped/testobjects/1_namespace.yaml b/receiver/k8sclusterreceiver/testdata/e2e/namespace-scoped/testobjects/1_namespace.yaml new file mode 100644 index 000000000000..4cb279baf51a --- /dev/null +++ b/receiver/k8sclusterreceiver/testdata/e2e/namespace-scoped/testobjects/1_namespace.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: my-namespace diff --git a/receiver/k8sclusterreceiver/testdata/e2e/namespace-scoped/testobjects/cronjob.yaml b/receiver/k8sclusterreceiver/testdata/e2e/namespace-scoped/testobjects/cronjob.yaml new file mode 100644 index 000000000000..d91d6a3acc8e --- /dev/null +++ b/receiver/k8sclusterreceiver/testdata/e2e/namespace-scoped/testobjects/cronjob.yaml @@ -0,0 +1,21 @@ +kind: CronJob +apiVersion: batch/v1 +metadata: + name: test-k8scluster-receiver-cronjob + namespace: my-namespace +spec: + schedule: "*/1 * * * *" + # ensure that only one job/pod is active for the lifetime of the test + concurrencyPolicy: Forbid + jobTemplate: + spec: + template: + spec: + containers: + - name: alpine + image: alpine + args: + - /bin/sh + - -c + - "echo Running; sleep 600" + restartPolicy: OnFailure diff --git a/receiver/k8sclusterreceiver/testdata/e2e/namespace-scoped/testobjects/hpa.yaml b/receiver/k8sclusterreceiver/testdata/e2e/namespace-scoped/testobjects/hpa.yaml new file mode 100644 index 000000000000..641832c5f954 --- /dev/null +++ b/receiver/k8sclusterreceiver/testdata/e2e/namespace-scoped/testobjects/hpa.yaml @@ -0,0 +1,13 @@ +apiVersion: autoscaling/v1 +kind: HorizontalPodAutoscaler +metadata: + name: test-k8scluster-receiver-hpa + namespace: my-namespace +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: StatefulSet + name: test-k8scluster-receiver-statefulset + minReplicas: 1 + maxReplicas: 1 + targetCPUUtilizationPercentage: 50 diff --git a/receiver/k8sclusterreceiver/testdata/e2e/namespace-scoped/testobjects/job.yaml b/receiver/k8sclusterreceiver/testdata/e2e/namespace-scoped/testobjects/job.yaml new file mode 100644 index 000000000000..d449fef61582 --- /dev/null +++ b/receiver/k8sclusterreceiver/testdata/e2e/namespace-scoped/testobjects/job.yaml @@ -0,0 +1,17 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: test-k8scluster-receiver-job + namespace: my-namespace +spec: + template: + spec: + containers: + - name: alpine + image: alpine + args: + - /bin/sh + - -c + - "echo Hello from Job; sleep 600" + restartPolicy: Never + backoffLimit: 3 diff --git a/receiver/k8sclusterreceiver/testdata/e2e/namespace-scoped/testobjects/statefulset.yaml b/receiver/k8sclusterreceiver/testdata/e2e/namespace-scoped/testobjects/statefulset.yaml new file mode 100644 index 000000000000..50eb3f352361 --- /dev/null +++ b/receiver/k8sclusterreceiver/testdata/e2e/namespace-scoped/testobjects/statefulset.yaml @@ -0,0 +1,29 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: test-k8scluster-receiver-statefulset + namespace: my-namespace +spec: + serviceName: "test-k8scluster-receiver-statefulset-service" + replicas: 1 + selector: + matchLabels: + app: test-k8scluster-receiver-statefulset + template: + metadata: + labels: + app: test-k8scluster-receiver-statefulset + spec: + containers: + - name: nginx + image: nginx + ports: + - containerPort: 80 + volumeClaimTemplates: + - metadata: + name: test-k8scluster-receiver-statefulset-pvc + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 100Mi diff --git a/receiver/k8sclusterreceiver/watcher.go b/receiver/k8sclusterreceiver/watcher.go index 63bc5ac09e9e..240c4f9417b5 100644 --- a/receiver/k8sclusterreceiver/watcher.go +++ b/receiver/k8sclusterreceiver/watcher.go @@ -90,7 +90,7 @@ func (rw *resourceWatcher) initialize() error { } rw.client = client - if rw.config.Distribution == distributionOpenShift { + if rw.config.Distribution == distributionOpenShift && rw.config.Namespace == "" { rw.osQuotaClient, err = rw.makeOpenShiftQuotaClient(rw.config.APIConfig) if err != nil { return fmt.Errorf("Failed to create OpenShift quota API client: %w", err) @@ -106,7 +106,7 @@ func (rw *resourceWatcher) initialize() error { } func (rw *resourceWatcher) prepareSharedInformerFactory() error { - factory := informers.NewSharedInformerFactoryWithOptions(rw.client, rw.config.MetadataCollectionInterval) + factory := rw.getInformerFactory() // Map of supported group version kinds by name of a kind. // If none of the group versions are supported by k8s server for a specific kind, @@ -156,6 +156,24 @@ func (rw *resourceWatcher) prepareSharedInformerFactory() error { return nil } +func (rw *resourceWatcher) getInformerFactory() informers.SharedInformerFactory { + var factory informers.SharedInformerFactory + if rw.config.Namespace != "" { + rw.logger.Info("Namespace filter has been enabled. Nodes and namespaces will not be observed.", zap.String("namespace", rw.config.Namespace)) + factory = informers.NewSharedInformerFactoryWithOptions( + rw.client, + rw.config.MetadataCollectionInterval, + informers.WithNamespace(rw.config.Namespace), + ) + } else { + factory = informers.NewSharedInformerFactoryWithOptions( + rw.client, + rw.config.MetadataCollectionInterval, + ) + } + return factory +} + func (rw *resourceWatcher) isKindSupported(gvk schema.GroupVersionKind) (bool, error) { resources, err := rw.client.Discovery().ServerResourcesForGroupVersion(gvk.GroupVersion().String()) if err != nil { @@ -179,9 +197,13 @@ func (rw *resourceWatcher) setupInformerForKind(kind schema.GroupVersionKind, fa case gvk.Pod: rw.setupInformer(kind, factory.Core().V1().Pods().Informer()) case gvk.Node: - rw.setupInformer(kind, factory.Core().V1().Nodes().Informer()) + if rw.config.Namespace == "" { + rw.setupInformer(kind, factory.Core().V1().Nodes().Informer()) + } case gvk.Namespace: - rw.setupInformer(kind, factory.Core().V1().Namespaces().Informer()) + if rw.config.Namespace == "" { + rw.setupInformer(kind, factory.Core().V1().Namespaces().Informer()) + } case gvk.ReplicationController: rw.setupInformer(kind, factory.Core().V1().ReplicationControllers().Informer()) case gvk.ResourceQuota: diff --git a/receiver/k8sclusterreceiver/watcher_test.go b/receiver/k8sclusterreceiver/watcher_test.go index c9b104e6d906..806a6ab5520c 100644 --- a/receiver/k8sclusterreceiver/watcher_test.go +++ b/receiver/k8sclusterreceiver/watcher_test.go @@ -110,7 +110,7 @@ func TestIsKindSupported(t *testing.T) { }{ { name: "nothing_supported", - client: fake.NewSimpleClientset(), + client: fake.NewClientset(), gvk: gvk.Pod, expected: false, }, @@ -146,7 +146,7 @@ func TestPrepareSharedInformerFactory(t *testing.T) { { name: "old_server_version", // With no batch/v1.CronJob support. client: func() *fake.Clientset { - client := fake.NewSimpleClientset() + client := fake.NewClientset() client.Resources = []*metav1.APIResourceList{ { GroupVersion: "v1",