diff --git a/.chloggen/add_k8scluster_attr.yaml b/.chloggen/add_k8scluster_attr.yaml new file mode 100644 index 000000000000..45587300b52a --- /dev/null +++ b/.chloggen/add_k8scluster_attr.yaml @@ -0,0 +1,31 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: k8sclusterreceiver + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Add additional attributes to node and pod entities + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [35879] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: | + Adds the following attributes to node and pod metadata/entities: + - `k8s.pod.phase`: The phase of a Pod indicates where the Pod is in its lifecycle. E.g. 'Pending', 'Running' + - `k8s.pod.status_reason`: A brief message indicating details about why the pod is in this state. E.g. 'Evicted' + - `k8s.node.condition_*`: The condition of a node. e.g. `k8s.node.condition_ready`. The value can be `true`, `false`, `unknown`. + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [user] diff --git a/receiver/k8sclusterreceiver/internal/node/nodes.go b/receiver/k8sclusterreceiver/internal/node/nodes.go index ad94b9ef9008..e8b5a8db5532 100644 --- a/receiver/k8sclusterreceiver/internal/node/nodes.go +++ b/receiver/k8sclusterreceiver/internal/node/nodes.go @@ -23,8 +23,9 @@ import ( ) const ( - // Keys for node metadata. - nodeCreationTime = "node.creation_timestamp" + // Keys for node metadata and entity attributes. These are NOT used by resource attributes. + nodeCreationTime = "node.creation_timestamp" + k8sNodeConditionPrefix = "k8s.node.condition" ) // Transform transforms the node to remove the fields that we don't use to reduce RAM utilization. @@ -151,6 +152,24 @@ func GetMetadata(node *corev1.Node) map[experimentalmetricmetadata.ResourceID]*m meta[conventions.AttributeK8SNodeName] = node.Name meta[nodeCreationTime] = node.GetCreationTimestamp().Format(time.RFC3339) + // Node can have many additional conditions (gke has 18 on v1.29). Bad thresholds/implementations + // of custom conditions can cause value to oscillate between true/false frequently. So, only sending the node + // pressure conditions that are set by kubelet to avoid noise. + // https://pkg.go.dev/k8s.io/api/core/v1#NodeConditionType + kubeletConditions := map[corev1.NodeConditionType]struct{}{ + corev1.NodeReady: {}, + corev1.NodeMemoryPressure: {}, + corev1.NodeDiskPressure: {}, + corev1.NodePIDPressure: {}, + corev1.NodeNetworkUnavailable: {}, + } + + for _, c := range node.Status.Conditions { + if _, ok := kubeletConditions[c.Type]; ok { + meta[fmt.Sprintf("%s_%s", k8sNodeConditionPrefix, strcase.ToSnake(string(c.Type)))] = strings.ToLower(string(c.Status)) + } + } + nodeID := experimentalmetricmetadata.ResourceID(node.UID) return map[experimentalmetricmetadata.ResourceID]*metadata.KubernetesMetadata{ nodeID: { diff --git a/receiver/k8sclusterreceiver/internal/node/nodes_test.go b/receiver/k8sclusterreceiver/internal/node/nodes_test.go index d047d33f4e36..fcfb6ab1a223 100644 --- a/receiver/k8sclusterreceiver/internal/node/nodes_test.go +++ b/receiver/k8sclusterreceiver/internal/node/nodes_test.go @@ -13,10 +13,12 @@ import ( "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" "go.opentelemetry.io/collector/receiver/receivertest" + conventions "go.opentelemetry.io/collector/semconv/v1.18.0" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/experimentalmetricmetadata" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/golden" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatatest/pmetrictest" "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver/internal/metadata" @@ -263,3 +265,111 @@ func TestTransform(t *testing.T) { } assert.Equal(t, wantNode, Transform(originalNode)) } + +func TestNodeMetadata(t *testing.T) { + creationTimestamp := time.Now() + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node", + UID: "test-node-uid", + Labels: map[string]string{"env": "production"}, + CreationTimestamp: metav1.Time{Time: creationTimestamp}, + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{ + { + Type: "FrequentUnregisterNetDevice", + Status: "False", + LastHeartbeatTime: metav1.Time{ + Time: time.Now(), + }, + LastTransitionTime: metav1.Time{ + Time: time.Now(), + }, + Message: "node is functioning properly", + Reason: "NoFrequentUnregisterNetDevice", + }, + { + Type: "MemoryPressure", + Status: "False", + LastHeartbeatTime: metav1.Time{ + Time: time.Now(), + }, + LastTransitionTime: metav1.Time{ + Time: time.Now(), + }, + Reason: "KubeletHasSufficientMemory", + Message: "kubelet has sufficient memory available", + }, + { + Type: "DiskPressure", + Status: "False", + LastHeartbeatTime: metav1.Time{ + Time: time.Now(), + }, + LastTransitionTime: metav1.Time{ + Time: time.Now(), + }, + Reason: "KubeletHasNoDiskPressure", + Message: "kubelet has no disk pressure", + }, + { + Type: "PIDPressure", + Status: "False", + LastHeartbeatTime: metav1.Time{ + Time: time.Now(), + }, + LastTransitionTime: metav1.Time{ + Time: time.Now(), + }, + Reason: "KubeletHasSufficientPID", + Message: "kubelet has sufficient PID available", + }, + { + Type: "Ready", + Status: "True", + LastHeartbeatTime: metav1.Time{ + Time: time.Now(), + }, + LastTransitionTime: metav1.Time{ + Time: time.Now(), + }, + Reason: "KubeletReady", + Message: "kubelet is posting ready status", + }, + }, + NodeInfo: corev1.NodeSystemInfo{ + MachineID: "70ebe86154de42bda73a4ffe181afa3d", + SystemUUID: "70ebe86154de42bda73a4ffe181afa3d", + BootID: "541c31d4-d1e2-4660-a3b2-484abbb1cbce", + KernelVersion: "6.10.4-linuxkit", + OSImage: "Debian GNU/Linux 12 (bookworm)", + ContainerRuntimeVersion: "containerd://1.7.15", + KubeletVersion: "v1.30.0", + OperatingSystem: "linux", + Architecture: "amd64", + }, + }, + } + + expectedMeta := map[experimentalmetricmetadata.ResourceID]*metadata.KubernetesMetadata{ + experimentalmetricmetadata.ResourceID("test-node-uid"): { + EntityType: "k8s.node", + ResourceIDKey: "k8s.node.uid", + ResourceID: experimentalmetricmetadata.ResourceID("test-node-uid"), + Metadata: map[string]string{ + "env": "production", + conventions.AttributeK8SNodeName: "test-node", + "k8s.node.condition_ready": "true", + "k8s.node.condition_memory_pressure": "false", + "k8s.node.condition_disk_pressure": "false", + "k8s.node.condition_pid_pressure": "false", + "node.creation_timestamp": creationTimestamp.Format(time.RFC3339), + }, + }, + } + + actualMeta := GetMetadata(node) + require.NotNil(t, actualMeta) + require.Equal(t, expectedMeta, actualMeta) +} diff --git a/receiver/k8sclusterreceiver/internal/pod/pods.go b/receiver/k8sclusterreceiver/internal/pod/pods.go index 81cf2a206789..263e4c3ac5f0 100644 --- a/receiver/k8sclusterreceiver/internal/pod/pods.go +++ b/receiver/k8sclusterreceiver/internal/pod/pods.go @@ -28,8 +28,10 @@ import ( ) const ( - // Keys for pod metadata. + // Keys for pod metadata and entity attributes. These are NOT used by resource attributes. podCreationTime = "pod.creation_timestamp" + podPhase = "k8s.pod.phase" + podStatusReason = "k8s.pod.status_reason" ) // Transform transforms the pod to remove the fields that we don't use to reduce RAM utilization. @@ -43,6 +45,7 @@ func Transform(pod *corev1.Pod) *corev1.Pod { Status: corev1.PodStatus{ Phase: pod.Status.Phase, QOSClass: pod.Status.QOSClass, + Reason: pod.Status.Reason, }, } for _, cs := range pod.Status.ContainerStatuses { @@ -126,6 +129,15 @@ func GetMetadata(pod *corev1.Pod, mc *metadata.Store, logger *zap.Logger) map[ex meta := maps.MergeStringMaps(map[string]string{}, pod.Labels) meta[podCreationTime] = pod.CreationTimestamp.Format(time.RFC3339) + phase := pod.Status.Phase + if phase == "" { + phase = corev1.PodUnknown + } + meta[podPhase] = string(phase) + reason := pod.Status.Reason + if reason != "" { + meta[podStatusReason] = reason + } for _, or := range pod.OwnerReferences { kind := strings.ToLower(or.Kind) diff --git a/receiver/k8sclusterreceiver/internal/pod/pods_test.go b/receiver/k8sclusterreceiver/internal/pod/pods_test.go index 122f7bd9df33..805eb66d325e 100644 --- a/receiver/k8sclusterreceiver/internal/pod/pods_test.go +++ b/receiver/k8sclusterreceiver/internal/pod/pods_test.go @@ -247,8 +247,9 @@ func expectedKubernetesMetadata(to testCaseOptions) map[experimentalmetricmetada ResourceIDKey: "k8s.pod.uid", ResourceID: experimentalmetricmetadata.ResourceID(podUIDLabel), Metadata: map[string]string{ - kindNameLabel: kindObjName, - kindUIDLabel: kindObjUID, + kindNameLabel: kindObjName, + kindUIDLabel: kindObjUID, + "k8s.pod.phase": "Unknown", // Default value when phase is not set. }, }, } @@ -415,6 +416,7 @@ func TestTransform(t *testing.T) { }, Status: corev1.PodStatus{ Phase: corev1.PodRunning, + Reason: "Evicted", HostIP: "192.168.1.100", PodIP: "10.244.0.5", StartTime: &v1.Time{Time: v1.Now().Add(-5 * time.Minute)}, @@ -463,7 +465,8 @@ func TestTransform(t *testing.T) { }, }, Status: corev1.PodStatus{ - Phase: corev1.PodRunning, + Phase: corev1.PodRunning, + Reason: "Evicted", ContainerStatuses: []corev1.ContainerStatus{ { Name: "my-container", @@ -478,3 +481,70 @@ func TestTransform(t *testing.T) { } assert.Equal(t, wantPod, Transform(originalPod)) } + +func TestPodMetadata(t *testing.T) { + tests := []struct { + name string + statusPhase corev1.PodPhase + statusReason string + expectedMetadata map[string]string + }{ + { + name: "Pod with status reason", + statusPhase: corev1.PodFailed, + statusReason: "Evicted", + expectedMetadata: map[string]string{ + "k8s.pod.phase": "Failed", + "k8s.pod.status_reason": "Evicted", + "k8s.workload.kind": "Deployment", + "k8s.workload.name": "test-deployment-0", + "k8s.replicaset.name": "test-replicaset-0", + "k8s.replicaset.uid": "test-replicaset-0-uid", + "k8s.deployment.name": "test-deployment-0", + "k8s.deployment.uid": "test-deployment-0-uid", + }, + }, + { + name: "Pod without status reason", + statusPhase: corev1.PodRunning, + statusReason: "", + expectedMetadata: map[string]string{ + "k8s.pod.phase": "Running", + "k8s.workload.kind": "Deployment", + "k8s.workload.name": "test-deployment-0", + "k8s.replicaset.name": "test-replicaset-0", + "k8s.replicaset.uid": "test-replicaset-0-uid", + "k8s.deployment.name": "test-deployment-0", + "k8s.deployment.uid": "test-deployment-0-uid", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + pod := podWithOwnerReference("ReplicaSet") + pod.Status.Phase = tt.statusPhase + pod.Status.Reason = tt.statusReason + + metadataStore := mockMetadataStore(testCaseOptions{ + kind: "ReplicaSet", + withParentOR: true, + }) + logger := zap.NewNop() + meta := GetMetadata(pod, metadataStore, logger) + + require.NotNil(t, meta) + require.Contains(t, meta, experimentalmetricmetadata.ResourceID("test-pod-0-uid")) + podMeta := meta["test-pod-0-uid"].Metadata + + allExpectedMetadata := make(map[string]string) + for key, value := range commonPodMetadata { + allExpectedMetadata[key] = value + } + for key, value := range tt.expectedMetadata { + allExpectedMetadata[key] = value + } + assert.Equal(t, allExpectedMetadata, podMeta) + }) + } +} diff --git a/receiver/k8sclusterreceiver/watcher_test.go b/receiver/k8sclusterreceiver/watcher_test.go index 806a6ab5520c..29facfc26f22 100644 --- a/receiver/k8sclusterreceiver/watcher_test.go +++ b/receiver/k8sclusterreceiver/watcher_test.go @@ -270,7 +270,7 @@ func TestSyncMetadataAndEmitEntityEvents(t *testing.T) { "otel.entity.interval": int64(7200000), // 2h in milliseconds "otel.entity.type": "k8s.pod", "otel.entity.id": map[string]any{"k8s.pod.uid": "pod0"}, - "otel.entity.attributes": map[string]any{"pod.creation_timestamp": "0001-01-01T00:00:00Z"}, + "otel.entity.attributes": map[string]any{"pod.creation_timestamp": "0001-01-01T00:00:00Z", "k8s.pod.phase": "Unknown"}, } assert.EqualValues(t, expected, lr.Attributes().AsRaw()) assert.WithinRange(t, lr.Timestamp().AsTime(), step1, step2) @@ -324,7 +324,7 @@ func TestObjMetadata(t *testing.T) { EntityType: "k8s.pod", ResourceIDKey: "k8s.pod.uid", ResourceID: "test-pod-0-uid", - Metadata: commonPodMetadata, + Metadata: allPodMetadata(map[string]string{"k8s.pod.phase": "Succeeded"}), }, experimentalmetricmetadata.ResourceID("container-id"): { EntityType: "container", @@ -345,17 +345,19 @@ func TestObjMetadata(t *testing.T) { Name: "test-statefulset-0", UID: "test-statefulset-0-uid", }, - }, testutils.NewPodWithContainer("0", &corev1.PodSpec{}, &corev1.PodStatus{})), + }, testutils.NewPodWithContainer("0", &corev1.PodSpec{}, &corev1.PodStatus{Phase: corev1.PodFailed, Reason: "Evicted"})), want: map[experimentalmetricmetadata.ResourceID]*metadata.KubernetesMetadata{ experimentalmetricmetadata.ResourceID("test-pod-0-uid"): { EntityType: "k8s.pod", ResourceIDKey: "k8s.pod.uid", ResourceID: "test-pod-0-uid", Metadata: allPodMetadata(map[string]string{ - "k8s.workload.kind": "StatefulSet", - "k8s.workload.name": "test-statefulset-0", - "k8s.statefulset.name": "test-statefulset-0", - "k8s.statefulset.uid": "test-statefulset-0-uid", + "k8s.workload.kind": "StatefulSet", + "k8s.workload.name": "test-statefulset-0", + "k8s.statefulset.name": "test-statefulset-0", + "k8s.statefulset.uid": "test-statefulset-0-uid", + "k8s.pod.phase": "Failed", + "k8s.pod.status_reason": "Evicted", }), }, }, @@ -384,7 +386,7 @@ func TestObjMetadata(t *testing.T) { }(), resource: podWithAdditionalLabels( map[string]string{"k8s-app": "my-app"}, - testutils.NewPodWithContainer("0", &corev1.PodSpec{}, &corev1.PodStatus{}), + testutils.NewPodWithContainer("0", &corev1.PodSpec{}, &corev1.PodStatus{Phase: corev1.PodRunning}), ), want: map[experimentalmetricmetadata.ResourceID]*metadata.KubernetesMetadata{ experimentalmetricmetadata.ResourceID("test-pod-0-uid"): { @@ -394,6 +396,7 @@ func TestObjMetadata(t *testing.T) { Metadata: allPodMetadata(map[string]string{ "k8s.service.test-service": "", "k8s-app": "my-app", + "k8s.pod.phase": "Running", }), }, }, @@ -479,10 +482,15 @@ func TestObjMetadata(t *testing.T) { ResourceIDKey: "k8s.node.uid", ResourceID: "test-node-1-uid", Metadata: map[string]string{ - "foo": "bar", - "foo1": "", - "k8s.node.name": "test-node-1", - "node.creation_timestamp": "0001-01-01T00:00:00Z", + "foo": "bar", + "foo1": "", + "k8s.node.name": "test-node-1", + "node.creation_timestamp": "0001-01-01T00:00:00Z", + "k8s.node.condition_disk_pressure": "false", + "k8s.node.condition_memory_pressure": "false", + "k8s.node.condition_network_unavailable": "false", + "k8s.node.condition_pid_pressure": "false", + "k8s.node.condition_ready": "true", }, }, },