From c18a26b0a2a3a69ea084e2484bfe25404562c250 Mon Sep 17 00:00:00 2001 From: Kulwant Singh Date: Mon, 29 Jan 2024 16:45:25 +0000 Subject: [PATCH] Enable awscontainerinsights receiver to run inside Host Process container (#153) * Enable awscontainerinsights receiver to run inside Host Process container 1. Added workaround to fix ServiceAccount token and cert path for kubelet account inside HPC. 2. Added workarond to fix above issue in k8s clientset. * Addressed chad's comments * Addrssed pooja's comments * Fix go.mod --- cmd/otelcontribcol/go.mod | 2 +- go.mod | 2 +- internal/aws/containerinsight/const.go | 5 +++ internal/aws/containerinsight/utils.go | 11 +++++ internal/aws/containerinsight/utils_test.go | 12 ++++++ internal/aws/k8s/go.mod | 3 ++ internal/aws/k8s/k8sclient/clientset.go | 46 ++++++++++++++++++++- internal/kubelet/client.go | 18 +++++++- internal/kubelet/client_test.go | 28 +++++++++++++ internal/kubelet/go.mod | 6 +++ internal/kubelet/go.sum | 7 ++++ 11 files changed, 136 insertions(+), 4 deletions(-) diff --git a/cmd/otelcontribcol/go.mod b/cmd/otelcontribcol/go.mod index cdf55a636050..9568e6eb6bac 100644 --- a/cmd/otelcontribcol/go.mod +++ b/cmd/otelcontribcol/go.mod @@ -520,7 +520,7 @@ require ( github.com/nginxinc/nginx-prometheus-exporter v0.8.1-0.20201110005315-f5a5f8086c19 // indirect github.com/open-telemetry/opentelemetry-collector-contrib/extension/observer v0.89.0 // indirect github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/awsutil v0.89.0 // indirect - github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/containerinsight v0.89.0 // indirect + github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/containerinsight v0.92.0 // indirect github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/cwlogs v0.89.0 // indirect github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/ecsutil v0.89.0 // indirect github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/k8s v0.89.0 // indirect diff --git a/go.mod b/go.mod index 79dec4194c72..98b173221bf4 100644 --- a/go.mod +++ b/go.mod @@ -495,7 +495,7 @@ require ( github.com/nginxinc/nginx-prometheus-exporter v0.8.1-0.20201110005315-f5a5f8086c19 // indirect github.com/open-telemetry/opentelemetry-collector-contrib/extension/observer v0.89.0 // indirect github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/awsutil v0.89.0 // indirect - github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/containerinsight v0.89.0 // indirect + github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/containerinsight v0.92.0 // indirect github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/cwlogs v0.89.0 // indirect github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/ecsutil v0.89.0 // indirect github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/k8s v0.89.0 // indirect diff --git a/internal/aws/containerinsight/const.go b/internal/aws/containerinsight/const.go index 0e46f9a31455..d378451a99c1 100644 --- a/internal/aws/containerinsight/const.go +++ b/internal/aws/containerinsight/const.go @@ -12,6 +12,11 @@ const ( // We assume 50 micro-seconds is the minimal gap between two collected data sample to be valid to calculate delta MinTimeDiff = 50 * time.Microsecond + // Environment variables + RunInContainer = "RUN_IN_CONTAINER" + RunAsHostProcessContainer = "RUN_AS_HOST_PROCESS_CONTAINER" + TrueValue = "True" + // Attribute names InstanceID = "InstanceId" InstanceType = "InstanceType" diff --git a/internal/aws/containerinsight/utils.go b/internal/aws/containerinsight/utils.go index 5f734597bfb1..4b89b0f1dcb0 100644 --- a/internal/aws/containerinsight/utils.go +++ b/internal/aws/containerinsight/utils.go @@ -5,6 +5,8 @@ package containerinsight // import "github.com/open-telemetry/opentelemetry-coll import ( "fmt" "log" + "os" + "runtime" "strconv" "strings" "time" @@ -83,6 +85,15 @@ func IsPod(mType string) bool { return false } +func IsWindowsHostProcessContainer() bool { + // todo: Remove this workaround func when Windows AMIs has containerd 1.7 which solves upstream bug + // https://kubernetes.io/docs/tasks/configure-pod-container/create-hostprocess-pod/#containerd-v1-6 + if runtime.GOOS == "windows" && os.Getenv(RunInContainer) == TrueValue && os.Getenv(RunAsHostProcessContainer) == TrueValue { + return true + } + return false +} + func getPrefixByMetricType(mType string) string { prefix := "" instancePrefix := "instance_" diff --git a/internal/aws/containerinsight/utils_test.go b/internal/aws/containerinsight/utils_test.go index 124aa1d22c70..e1c782dbcf70 100644 --- a/internal/aws/containerinsight/utils_test.go +++ b/internal/aws/containerinsight/utils_test.go @@ -5,6 +5,7 @@ package containerinsight import ( "fmt" "log" + "os" "strconv" "strings" "testing" @@ -866,3 +867,14 @@ func TestConvertToOTLPMetricsForPodContainerStatusMetrics(t *testing.T) { md = ConvertToOTLPMetrics(fields, tags, zap.NewNop()) checkMetricsAreExpected(t, md, fields, tags, expectedUnits) } + +func TestHostProcessContainer(t *testing.T) { + os.Setenv(RunInContainer, "True") + assert.Equal(t, IsWindowsHostProcessContainer(), false) + + os.Setenv(RunAsHostProcessContainer, "True") + assert.Equal(t, IsWindowsHostProcessContainer(), true) + + os.Unsetenv(RunInContainer) + os.Unsetenv(RunAsHostProcessContainer) +} diff --git a/internal/aws/k8s/go.mod b/internal/aws/k8s/go.mod index b596cebea0e8..9171fdcf1d32 100644 --- a/internal/aws/k8s/go.mod +++ b/internal/aws/k8s/go.mod @@ -9,6 +9,7 @@ require ( k8s.io/api v0.28.3 k8s.io/apimachinery v0.28.3 k8s.io/client-go v0.28.3 + github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/containerinsight v0.89.0 ) require ( @@ -56,6 +57,8 @@ require ( sigs.k8s.io/yaml v1.3.0 // indirect ) +replace github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/containerinsight => ../../aws/containerinsight + retract ( v0.76.2 v0.76.1 diff --git a/internal/aws/k8s/k8sclient/clientset.go b/internal/aws/k8s/k8sclient/clientset.go index 0a115915b520..294e760b0678 100644 --- a/internal/aws/k8s/k8sclient/clientset.go +++ b/internal/aws/k8s/k8sclient/clientset.go @@ -5,6 +5,8 @@ package k8sclient // import "github.com/open-telemetry/opentelemetry-collector-c import ( "context" + "fmt" + "net" "os" "path/filepath" "reflect" @@ -20,6 +22,9 @@ import ( "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" + certutil "k8s.io/client-go/util/cert" + + "github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/containerinsight" ) const ( @@ -251,7 +256,7 @@ func (c *K8sClient) init(logger *zap.Logger, options ...Option) error { opt.set(c) } - config, err := rest.InClusterConfig() + config, err := c.inClusterConfig() if err != nil { c.logger.Warn("cannot find in cluster config", zap.Error(err)) config, err = clientcmd.BuildConfigFromFlags("", c.kubeConfigPath) @@ -459,3 +464,42 @@ func (c *K8sClient) Shutdown() { } } } + +// inClusterConfig is copy of rest.InClusterConfig. +// There is known bug in rest.InClusterConfig on Windows when running it as host process container. +// https://github.com/kubernetes/kubernetes/issues/104562 +// This copy fixes that bug by appending `CONTAINER_SANDBOX_MOUNT_POINT` in k8s token and cert file paths. +// todo: Remove this workaround func when Windows AMIs has containerd 1.7 which solves upstream bug. +func (c *K8sClient) inClusterConfig() (*rest.Config, error) { + if !containerinsight.IsWindowsHostProcessContainer() { + return rest.InClusterConfig() + } + var ( + tokenFile = filepath.Join(os.Getenv("CONTAINER_SANDBOX_MOUNT_POINT"), "/var/run/secrets/kubernetes.io/serviceaccount/token") + rootCAFile = filepath.Join(os.Getenv("CONTAINER_SANDBOX_MOUNT_POINT"), "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt") + ) + host, port := os.Getenv("KUBERNETES_SERVICE_HOST"), os.Getenv("KUBERNETES_SERVICE_PORT") + if len(host) == 0 || len(port) == 0 { + return nil, rest.ErrNotInCluster + } + + token, err := os.ReadFile(tokenFile) + if err != nil { + return nil, err + } + + tlsClientConfig := rest.TLSClientConfig{} + + if _, err := certutil.NewPool(rootCAFile); err != nil { + c.logger.Error(fmt.Sprintf("Expected to load root CA config from %s, but got err: %v", rootCAFile, err)) + } else { + tlsClientConfig.CAFile = rootCAFile + } + + return &rest.Config{ + Host: "https://" + net.JoinHostPort(host, port), + TLSClientConfig: tlsClientConfig, + BearerToken: string(token), + BearerTokenFile: tokenFile, + }, nil +} diff --git a/internal/kubelet/client.go b/internal/kubelet/client.go index df16aa3b3c84..6d00e74ef320 100644 --- a/internal/kubelet/client.go +++ b/internal/kubelet/client.go @@ -11,23 +11,39 @@ import ( "net/http" "net/url" "os" + "path/filepath" "strings" "go.uber.org/zap" "k8s.io/client-go/rest" "k8s.io/client-go/transport" + "github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/containerinsight" "github.com/open-telemetry/opentelemetry-collector-contrib/internal/common/sanitize" "github.com/open-telemetry/opentelemetry-collector-contrib/internal/k8sconfig" ) -const ( +var ( svcAcctCACertPath = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" svcAcctTokenPath = "/var/run/secrets/kubernetes.io/serviceaccount/token" // #nosec defaultSecurePort = "10250" defaultReadOnlyPort = "10255" ) +func init() { + updateSVCPath() +} + +func updateSVCPath() { + // This is known that k8s token and cert file as available with CONTAINER_SANDBOX_MOUNT_POINT in path. + // https://kubernetes.io/docs/tasks/configure-pod-container/create-hostprocess-pod/#containerd-v1-6 + // todo: Remove this workaround func when Windows AMIs has containerd 1.7 which solves upstream bug + if containerinsight.IsWindowsHostProcessContainer() { + svcAcctCACertPath = filepath.Join(os.Getenv("CONTAINER_SANDBOX_MOUNT_POINT"), svcAcctCACertPath) + svcAcctTokenPath = filepath.Join(os.Getenv("CONTAINER_SANDBOX_MOUNT_POINT"), svcAcctTokenPath) + } +} + type Client interface { Get(path string) ([]byte, error) } diff --git a/internal/kubelet/client_test.go b/internal/kubelet/client_test.go index d9fa5f6547f6..5b960743a53b 100644 --- a/internal/kubelet/client_test.go +++ b/internal/kubelet/client_test.go @@ -15,6 +15,7 @@ import ( "io" "net/http" "net/http/httptest" + "os" "path/filepath" "regexp" "strings" @@ -26,6 +27,7 @@ import ( "go.uber.org/zap" "k8s.io/client-go/tools/clientcmd" + "github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/containerinsight" "github.com/open-telemetry/opentelemetry-collector-contrib/internal/k8sconfig" ) @@ -71,6 +73,32 @@ func TestNewTLSClientProvider(t *testing.T) { require.NotNil(t, tcc.RootCAs) } +func TestSAPathInHostProcessContainer(t *testing.T) { + // todo: Remove this workaround func when Windows AMIs has containerd 1.7 which solves upstream bug. + + // Test default SA cert and token. + assert.Equal(t, svcAcctCACertPath, "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt") + assert.Equal(t, svcAcctTokenPath, "/var/run/secrets/kubernetes.io/serviceaccount/token") + + // Test SA cert and token when run inside container. + os.Setenv(containerinsight.RunInContainer, "True") + updateSVCPath() + assert.Equal(t, svcAcctCACertPath, "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt") + assert.Equal(t, svcAcctTokenPath, "/var/run/secrets/kubernetes.io/serviceaccount/token") + + // Test SA cert and token when run inside host process container. + os.Setenv(containerinsight.RunAsHostProcessContainer, "True") + os.Setenv("CONTAINER_SANDBOX_MOUNT_POINT", "test123456") + updateSVCPath() + assert.Equal(t, svcAcctCACertPath, "test123456/var/run/secrets/kubernetes.io/serviceaccount/ca.crt") + assert.Equal(t, svcAcctTokenPath, "test123456/var/run/secrets/kubernetes.io/serviceaccount/token") + + os.Unsetenv("CONTAINER_SANDBOX_MOUNT_POINT") + os.Unsetenv(containerinsight.RunInContainer) + os.Unsetenv(containerinsight.RunAsHostProcessContainer) + updateSVCPath() +} + func TestNewSAClientProvider(t *testing.T) { p, err := NewClientProvider("localhost:9876", &ClientConfig{ APIConfig: k8sconfig.APIConfig{ diff --git a/internal/kubelet/go.mod b/internal/kubelet/go.mod index 60c5eb829dcd..0c360e49dda1 100644 --- a/internal/kubelet/go.mod +++ b/internal/kubelet/go.mod @@ -3,6 +3,7 @@ module github.com/open-telemetry/opentelemetry-collector-contrib/internal/kubele go 1.20 require ( + github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/containerinsight v0.92.0 github.com/open-telemetry/opentelemetry-collector-contrib/internal/common v0.89.0 github.com/open-telemetry/opentelemetry-collector-contrib/internal/k8sconfig v0.89.0 github.com/stretchr/testify v1.8.4 @@ -37,6 +38,7 @@ require ( github.com/pmezard/go-difflib v1.0.0 // indirect github.com/spf13/pflag v1.0.5 // indirect go.opentelemetry.io/collector/config/configopaque v0.89.0 // indirect + go.opentelemetry.io/collector/pdata v1.0.0-rcv0018 // indirect go.uber.org/multierr v1.11.0 // indirect golang.org/x/net v0.18.0 // indirect golang.org/x/oauth2 v0.14.0 // indirect @@ -45,6 +47,8 @@ require ( golang.org/x/text v0.14.0 // indirect golang.org/x/time v0.4.0 // indirect google.golang.org/appengine v1.6.7 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect + google.golang.org/grpc v1.59.0 // indirect google.golang.org/protobuf v1.31.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect @@ -63,6 +67,8 @@ replace github.com/open-telemetry/opentelemetry-collector-contrib/internal/commo replace github.com/open-telemetry/opentelemetry-collector-contrib/internal/k8sconfig => ../../internal/k8sconfig +replace github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/containerinsight => ../../internal/aws/containerinsight + // openshift removed all tags from their repo, use the pseudoversion from the release-3.9 branch HEAD replace github.com/openshift/api v3.9.0+incompatible => github.com/openshift/api v0.0.0-20180801171038-322a19404e37 diff --git a/internal/kubelet/go.sum b/internal/kubelet/go.sum index 4285b59c7fe3..43e990b84bda 100644 --- a/internal/kubelet/go.sum +++ b/internal/kubelet/go.sum @@ -232,6 +232,8 @@ go.opentelemetry.io/collector/config/configopaque v0.89.0 h1:Ad6yGcGBHs+J9SNjked go.opentelemetry.io/collector/config/configopaque v0.89.0/go.mod h1:TPCHaU+QXiEV+JXbgyr6mSErTI9chwQyasDVMdJr3eY= go.opentelemetry.io/collector/config/configtls v0.89.0 h1:XDeUaTU7LYwnEXz/CSdjbCStJa7n0YR1q0QpK0Vtw9w= go.opentelemetry.io/collector/config/configtls v0.89.0/go.mod h1:NlE4elqXoyFfzQvYfzgH6uOU1zNVa+5tt6EIq52TJ9Y= +go.opentelemetry.io/collector/pdata v1.0.0-rcv0018 h1:a2IHOZKphRzPagcvOHQHHUE0DlITFSKlIBwaWhPZpl4= +go.opentelemetry.io/collector/pdata v1.0.0-rcv0018/go.mod h1:oNIcTRyEJYIfMcRYyyh5lquDU0Vl+ktTL6ka+p+dYvg= go.uber.org/goleak v1.2.0 h1:xqgm/S+aQvhWFTtR0XK3Jvg7z8kGV8P4X14IzwN3Eqk= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= @@ -438,6 +440,9 @@ google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfG google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200305110556-506484158171/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= +google.golang.org/genproto v0.0.0-20230822172742-b8732ec3820d h1:VBu5YqKPv6XiJ199exd8Br+Aetz+o08F+PLMnwJQHAY= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d h1:uvYuEyMHKNt+lT4K3bN6fGswmK8qSvcreM3BwjDh+y4= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d/go.mod h1:+Bk1OCOj40wS2hwAMA+aCW9ypzm63QTBBHp6lQ3p+9M= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= @@ -445,6 +450,8 @@ google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyac google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.59.0 h1:Z5Iec2pjwb+LEOqzpB2MR12/eKFhDPhuqW91O+4bwUk= +google.golang.org/grpc v1.59.0/go.mod h1:aUPDwccQo6OTjy7Hct4AfBPD1GptF4fyUjIkQ9YtF98= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=