From 356a4a5df2b8a8d07043a5a7b7fbb0ce1f82cb13 Mon Sep 17 00:00:00 2001 From: Tyler Helmuth <12352919+TylerHelmuth@users.noreply.github.com> Date: Fri, 26 Jul 2024 13:12:22 -0600 Subject: [PATCH 01/11] Add componentstatus module --- Makefile | 2 + component/componentstatus/Makefile | 1 + component/componentstatus/go.mod | 28 ++ component/componentstatus/go.sum | 84 ++++++ component/componentstatus/instance.go | 13 + component/componentstatus/status.go | 211 +++++++++++++++ component/componentstatus/status_test.go | 330 +++++++++++++++++++++++ versions.yaml | 1 + 8 files changed, 670 insertions(+) create mode 100644 component/componentstatus/Makefile create mode 100644 component/componentstatus/go.mod create mode 100644 component/componentstatus/go.sum create mode 100644 component/componentstatus/instance.go create mode 100644 component/componentstatus/status.go create mode 100644 component/componentstatus/status_test.go diff --git a/Makefile b/Makefile index e66d2fd36fd..ba35b2e397e 100644 --- a/Makefile +++ b/Makefile @@ -259,6 +259,7 @@ check-contrib: -replace go.opentelemetry.io/collector=$(CURDIR) \ -replace go.opentelemetry.io/collector/client=$(CURDIR)/client \ -replace go.opentelemetry.io/collector/component=$(CURDIR)/component \ + -replace go.opentelemetry.io/collector/component/componentstatus=$(CURDIR)/component/componentstatus \ -replace go.opentelemetry.io/collector/config/configauth=$(CURDIR)/config/configauth \ -replace go.opentelemetry.io/collector/config/configcompression=$(CURDIR)/config/configcompression \ -replace go.opentelemetry.io/collector/config/configgrpc=$(CURDIR)/config/configgrpc \ @@ -321,6 +322,7 @@ restore-contrib: -dropreplace go.opentelemetry.io/collector \ -dropreplace go.opentelemetry.io/collector/client \ -dropreplace go.opentelemetry.io/collector/component \ + -dropreplace go.opentelemetry.io/collector/component/componentstatus \ -dropreplace go.opentelemetry.io/collector/config/configauth \ -dropreplace go.opentelemetry.io/collector/config/configcompression \ -dropreplace go.opentelemetry.io/collector/config/configgrpc \ diff --git a/component/componentstatus/Makefile b/component/componentstatus/Makefile new file mode 100644 index 00000000000..ded7a36092d --- /dev/null +++ b/component/componentstatus/Makefile @@ -0,0 +1 @@ +include ../../Makefile.Common diff --git a/component/componentstatus/go.mod b/component/componentstatus/go.mod new file mode 100644 index 00000000000..5b13e32b40c --- /dev/null +++ b/component/componentstatus/go.mod @@ -0,0 +1,28 @@ +module go.opentelemetry.io/collector/component/componentstatus + +go 1.21.0 + +require ( + github.com/stretchr/testify v1.9.0 + go.opentelemetry.io/collector/component v0.105.0 +) + +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + go.opentelemetry.io/collector/config/configtelemetry v0.105.0 // indirect + go.opentelemetry.io/collector/pdata v1.12.0 // indirect + go.opentelemetry.io/otel v1.28.0 // indirect + go.opentelemetry.io/otel/metric v1.28.0 // indirect + go.opentelemetry.io/otel/trace v1.28.0 // indirect + go.uber.org/multierr v1.11.0 // indirect + go.uber.org/zap v1.27.0 // indirect + golang.org/x/net v0.26.0 // indirect + golang.org/x/sys v0.21.0 // indirect + golang.org/x/text v0.16.0 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157 // indirect + google.golang.org/grpc v1.65.0 // indirect + google.golang.org/protobuf v1.34.2 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/component/componentstatus/go.sum b/component/componentstatus/go.sum new file mode 100644 index 00000000000..08dfab8d5c9 --- /dev/null +++ b/component/componentstatus/go.sum @@ -0,0 +1,84 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.opentelemetry.io/collector/component v0.105.0 h1:/OdkWHd1xTNX7JRq9iW3AFoJAnYUOGZZyOprNQkGoTI= +go.opentelemetry.io/collector/component v0.105.0/go.mod h1:s8KoxOrhNIBzetkb0LHmzX1OI67DyZbaaUPOWIXS1mg= +go.opentelemetry.io/collector/config/configtelemetry v0.105.0 h1:wEfUxAjjstp47aLr2s1cMZiH0dt+k42m6VC6HigqgJA= +go.opentelemetry.io/collector/config/configtelemetry v0.105.0/go.mod h1:WxWKNVAQJg/Io1nA3xLgn/DWLE/W1QOB2+/Js3ACi40= +go.opentelemetry.io/collector/pdata v1.12.0 h1:Xx5VK1p4VO0md8MWm2icwC1MnJ7f8EimKItMWw46BmA= +go.opentelemetry.io/collector/pdata v1.12.0/go.mod h1:MYeB0MmMAxeM0hstCFrCqWLzdyeYySim2dG6pDT6nYI= +go.opentelemetry.io/otel v1.28.0 h1:/SqNcYk+idO0CxKEUOtKQClMK/MimZihKYMruSMViUo= +go.opentelemetry.io/otel v1.28.0/go.mod h1:q68ijF8Fc8CnMHKyzqL6akLO46ePnjkgfIMIjUIX9z4= +go.opentelemetry.io/otel/metric v1.28.0 h1:f0HGvSl1KRAU1DLgLGFjrwVyismPlnuU6JD6bOeuA5Q= +go.opentelemetry.io/otel/metric v1.28.0/go.mod h1:Fb1eVBFZmLVTMb6PPohq3TO9IIhUisDsbJoL/+uQW4s= +go.opentelemetry.io/otel/trace v1.28.0 h1:GhQ9cUuQGmNDd5BTCP2dAvv75RdMxEfTmYejp+lkx9g= +go.opentelemetry.io/otel/trace v1.28.0/go.mod h1:jPyXzNPg6da9+38HEwElrQiHlVMTnVfM3/yv2OlIHaI= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= +go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ= +golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws= +golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= +golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157 h1:Zy9XzmMEflZ/MAaA7vNcoebnRAld7FsPW1EeBB7V0m8= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157/go.mod h1:EfXuqaE1J41VCDicxHzUDm+8rk+7ZdXzHV0IhO/I6s0= +google.golang.org/grpc v1.65.0 h1:bs/cUb4lp1G5iImFFd3u5ixQzweKizoZJAwBNLR42lc= +google.golang.org/grpc v1.65.0/go.mod h1:WgYC2ypjlB0EiQi6wdKixMqukr6lBc0Vo+oOgjrM5ZQ= +google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= +google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/component/componentstatus/instance.go b/component/componentstatus/instance.go new file mode 100644 index 00000000000..e2158a63ec3 --- /dev/null +++ b/component/componentstatus/instance.go @@ -0,0 +1,13 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package componentstatus // import "go.opentelemetry.io/collector/component/componentstatus" + +import "go.opentelemetry.io/collector/component" + +// InstanceID uniquely identifies a component instance +type InstanceID struct { + ID component.ID + Kind component.Kind + PipelineIDs map[component.ID]struct{} +} diff --git a/component/componentstatus/status.go b/component/componentstatus/status.go new file mode 100644 index 00000000000..d05caafc186 --- /dev/null +++ b/component/componentstatus/status.go @@ -0,0 +1,211 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package componentstatus // import "go.opentelemetry.io/collector/component/componentstatus" + +import ( + "time" +) + +// Watcher is an extra interface for Extension hosted by the OpenTelemetry +// Collector that is to be implemented by extensions interested in changes to component +// status. +type Watcher interface { + // ComponentStatusChanged notifies about a change in the source component status. + // Extensions that implement this interface must be ready that the ComponentStatusChanged + // may be called before, after or concurrently with calls to Component.Start() and Component.Shutdown(). + // The function may be called concurrently with itself. + ComponentStatusChanged(source *InstanceID, event *Event) +} + +type Status int32 + +// Enumeration of possible component statuses +const ( + // StatusNone indicates absence of component status. + StatusNone Status = iota + // StatusStarting indicates the component is starting. + StatusStarting + // StatusOK indicates the component is running without issues. + StatusOK + // StatusRecoverableError indicates that the component has experienced a transient error and may recover. + StatusRecoverableError + // StatusPermanentError indicates that the component has detected a condition at runtime that will need human intervention to fix. The collector will continue to run in a degraded mode. + StatusPermanentError + // StatusFatalError indicates that the collector has experienced a fatal runtime error and will shut down. + StatusFatalError + // StatusStopping indicates that the component is in the process of shutting down. + StatusStopping + // StatusStopped indicates that the component has completed shutdown. + StatusStopped +) + +// String returns a string representation of a Status +func (s Status) String() string { + switch s { + case StatusStarting: + return "StatusStarting" + case StatusOK: + return "StatusOK" + case StatusRecoverableError: + return "StatusRecoverableError" + case StatusPermanentError: + return "StatusPermanentError" + case StatusFatalError: + return "StatusFatalError" + case StatusStopping: + return "StatusStopping" + case StatusStopped: + return "StatusStopped" + } + return "StatusNone" +} + +// Event contains a status and timestamp, and can contain an error +type Event struct { + status Status + err error + timestamp time.Time +} + +// Status returns the Status (enum) associated with the Event +func (ev *Event) Status() Status { + return ev.status +} + +// Err returns the error associated with the Event. +func (ev *Event) Err() error { + return ev.err +} + +// Timestamp returns the timestamp associated with the Event +func (ev *Event) Timestamp() time.Time { + return ev.timestamp +} + +// NewEvent creates and returns a Event with the specified status and sets the timestamp +// time.Now(). To set an error on the event for an error status use one of the dedicated +// constructors (e.g. NewRecoverableErrorEvent, NewPermanentErrorEvent, NewFatalErrorEvent) +func NewEvent(status Status) *Event { + return &Event{ + status: status, + timestamp: time.Now(), + } +} + +// NewRecoverableErrorEvent wraps a transient error +// passed as argument as a Event with a status StatusRecoverableError +// and a timestamp set to time.Now(). +func NewRecoverableErrorEvent(err error) *Event { + ev := NewEvent(StatusRecoverableError) + ev.err = err + return ev +} + +// NewPermanentErrorEvent wraps an error requiring human intervention to fix +// passed as argument as a Event with a status StatusPermanentError +// and a timestamp set to time.Now(). +func NewPermanentErrorEvent(err error) *Event { + ev := NewEvent(StatusPermanentError) + ev.err = err + return ev +} + +// NewFatalErrorEvent wraps the fatal runtime error passed as argument as a Event +// with a status StatusFatalError and a timestamp set to time.Now(). +func NewFatalErrorEvent(err error) *Event { + ev := NewEvent(StatusFatalError) + ev.err = err + return ev +} + +// AggregateStatus will derive a status for the given input using the following rules in order: +// 1. If all instances have the same status, there is nothing to aggregate, return it. +// 2. If any instance encounters a fatal error, the component is in a Fatal Error state. +// 3. If any instance is in a Permanent Error state, the component status is Permanent Error. +// 4. If any instance is Stopping, the component is in a Stopping state. +// 5. An instance is Stopped, but not all instances are Stopped, we must be in the process of Stopping the component. +// 6. If any instance is in a Recoverable Error state, the component status is Recoverable Error. +// 7. By process of elimination, the only remaining state is starting. +func AggregateStatus[K comparable](eventMap map[K]*Event) Status { + seen := make(map[Status]struct{}) + for _, ev := range eventMap { + seen[ev.Status()] = struct{}{} + } + + // All statuses are the same. Note, this will handle StatusOK and StatusStopped as these two + // cases require all components be in the same state. + if len(seen) == 1 { + for st := range seen { + return st + } + } + + // Handle mixed status cases + if _, isFatal := seen[StatusFatalError]; isFatal { + return StatusFatalError + } + + if _, isPermanent := seen[StatusPermanentError]; isPermanent { + return StatusPermanentError + } + + if _, isStopping := seen[StatusStopping]; isStopping { + return StatusStopping + } + + if _, isStopped := seen[StatusStopped]; isStopped { + return StatusStopping + } + + if _, isRecoverable := seen[StatusRecoverableError]; isRecoverable { + return StatusRecoverableError + } + + // By process of elimination, this is the last possible status; no check necessary. + return StatusStarting +} + +// StatusIsError returns true for error statuses (e.g. StatusRecoverableError, +// StatusPermanentError, or StatusFatalError) +func StatusIsError(status Status) bool { + return status == StatusRecoverableError || + status == StatusPermanentError || + status == StatusFatalError +} + +// AggregateStatusEvent returns a status event where: +// - The status is set to the aggregate status of the events in the eventMap +// - The timestamp is set to the latest timestamp of the events in the eventMap +// - For an error status, the event will have same error as the most current event of the same +// error type from the eventMap +func AggregateStatusEvent[K comparable](eventMap map[K]*Event) *Event { + var lastEvent, lastMatchingEvent *Event + aggregateStatus := AggregateStatus[K](eventMap) + + for _, ev := range eventMap { + if lastEvent == nil || lastEvent.timestamp.Before(ev.timestamp) { + lastEvent = ev + } + if aggregateStatus == ev.Status() && + (lastMatchingEvent == nil || lastMatchingEvent.timestamp.Before(ev.timestamp)) { + lastMatchingEvent = ev + } + } + + // the effective status matches an existing event + if lastEvent.Status() == aggregateStatus { + return lastEvent + } + + // the effective status requires a synthetic event + aggregateEvent := &Event{ + status: aggregateStatus, + timestamp: lastEvent.timestamp, + } + if StatusIsError(aggregateStatus) { + aggregateEvent.err = lastMatchingEvent.err + } + + return aggregateEvent +} diff --git a/component/componentstatus/status_test.go b/component/componentstatus/status_test.go new file mode 100644 index 00000000000..244988c9d6c --- /dev/null +++ b/component/componentstatus/status_test.go @@ -0,0 +1,330 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 +package componentstatus + +import ( + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNewStatusEvent(t *testing.T) { + statuses := []Status{ + StatusStarting, + StatusOK, + StatusRecoverableError, + StatusPermanentError, + StatusFatalError, + StatusStopping, + StatusStopped, + } + + for _, status := range statuses { + t.Run(fmt.Sprintf("%s without error", status), func(t *testing.T) { + ev := NewEvent(status) + require.Equal(t, status, ev.Status()) + require.Nil(t, ev.Err()) + require.False(t, ev.Timestamp().IsZero()) + }) + } +} + +func TestStatusEventsWithError(t *testing.T) { + statusConstructorMap := map[Status]func(error) *Event{ + StatusRecoverableError: NewRecoverableErrorEvent, + StatusPermanentError: NewPermanentErrorEvent, + StatusFatalError: NewFatalErrorEvent, + } + + for status, newEvent := range statusConstructorMap { + t.Run(fmt.Sprintf("error status constructor for: %s", status), func(t *testing.T) { + ev := newEvent(assert.AnError) + require.Equal(t, status, ev.Status()) + require.Equal(t, assert.AnError, ev.Err()) + require.False(t, ev.Timestamp().IsZero()) + }) + } +} + +func TestAggregateStatus(t *testing.T) { + for _, tc := range []struct { + name string + statusMap map[*InstanceID]*Event + expectedStatus Status + }{ + { + name: "aggregate status with fatal is FatalError", + statusMap: map[*InstanceID]*Event{ + {}: NewEvent(StatusStarting), + {}: NewEvent(StatusOK), + {}: NewEvent(StatusFatalError), + {}: NewEvent(StatusRecoverableError), + }, + expectedStatus: StatusFatalError, + }, + { + name: "aggregate status with permanent is PermanentError", + statusMap: map[*InstanceID]*Event{ + {}: NewEvent(StatusStarting), + {}: NewEvent(StatusOK), + {}: NewEvent(StatusPermanentError), + {}: NewEvent(StatusRecoverableError), + }, + expectedStatus: StatusPermanentError, + }, + { + name: "aggregate status with stopping is Stopping", + statusMap: map[*InstanceID]*Event{ + {}: NewEvent(StatusStarting), + {}: NewEvent(StatusOK), + {}: NewEvent(StatusRecoverableError), + {}: NewEvent(StatusStopping), + }, + expectedStatus: StatusStopping, + }, + { + name: "aggregate status with stopped and non-stopped is Stopping", + statusMap: map[*InstanceID]*Event{ + {}: NewEvent(StatusStarting), + {}: NewEvent(StatusOK), + {}: NewEvent(StatusRecoverableError), + {}: NewEvent(StatusStopped), + }, + expectedStatus: StatusStopping, + }, + { + name: "aggregate status with all stopped is Stopped", + statusMap: map[*InstanceID]*Event{ + {}: NewEvent(StatusStopped), + {}: NewEvent(StatusStopped), + {}: NewEvent(StatusStopped), + }, + expectedStatus: StatusStopped, + }, + { + name: "aggregate status with recoverable is RecoverableError", + statusMap: map[*InstanceID]*Event{ + {}: NewEvent(StatusStarting), + {}: NewEvent(StatusOK), + {}: NewEvent(StatusRecoverableError), + }, + expectedStatus: StatusRecoverableError, + }, + { + name: "aggregate status with starting is Starting", + statusMap: map[*InstanceID]*Event{ + {}: NewEvent(StatusStarting), + {}: NewEvent(StatusOK), + }, + expectedStatus: StatusStarting, + }, + { + name: "aggregate status with all ok is OK", + statusMap: map[*InstanceID]*Event{ + {}: NewEvent(StatusOK), + {}: NewEvent(StatusOK), + {}: NewEvent(StatusOK), + }, + expectedStatus: StatusOK, + }, + } { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.expectedStatus, AggregateStatus(tc.statusMap)) + }) + } +} + +func TestStatusIsError(t *testing.T) { + for _, tc := range []struct { + status Status + isError bool + }{ + { + status: StatusStarting, + isError: false, + }, + { + status: StatusOK, + isError: false, + }, + { + status: StatusRecoverableError, + isError: true, + }, + { + status: StatusPermanentError, + isError: true, + }, + { + status: StatusFatalError, + isError: true, + }, + { + status: StatusStopping, + isError: false, + }, + { + status: StatusStopped, + isError: false, + }, + } { + name := fmt.Sprintf("StatusIsError(%s) is %t", tc.status, tc.isError) + t.Run(name, func(t *testing.T) { + assert.Equal(t, tc.isError, StatusIsError(tc.status)) + }) + } +} + +func TestAggregateStatusEvent(t *testing.T) { + // maxTime is used to make sure we select the event with the latest timestamp + maxTime := time.Unix(1<<63-62135596801, 999999999) + // latest sets the timestamp for an event to maxTime + latest := func(ev *Event) *Event { + ev.timestamp = maxTime + return ev + } + + for _, tc := range []struct { + name string + statusMap map[*InstanceID]*Event + expectedStatus *Event + }{ + { + name: "FatalError - existing event", + statusMap: map[*InstanceID]*Event{ + {}: NewEvent(StatusStarting), + {}: NewEvent(StatusOK), + {}: latest(NewFatalErrorEvent(assert.AnError)), + {}: NewEvent(StatusRecoverableError), + }, + expectedStatus: &Event{ + status: StatusFatalError, + timestamp: maxTime, + err: assert.AnError, + }, + }, + { + name: "FatalError - synthetic event", + statusMap: map[*InstanceID]*Event{ + {}: NewEvent(StatusStarting), + {}: NewEvent(StatusOK), + {}: NewFatalErrorEvent(assert.AnError), + {}: latest(NewEvent(StatusRecoverableError)), + }, + expectedStatus: &Event{ + status: StatusFatalError, + timestamp: maxTime, + err: assert.AnError, + }, + }, + { + name: "PermanentError - existing event", + statusMap: map[*InstanceID]*Event{ + {}: NewEvent(StatusStarting), + {}: NewEvent(StatusOK), + {}: latest(NewPermanentErrorEvent(assert.AnError)), + {}: NewEvent(StatusRecoverableError), + }, + expectedStatus: &Event{ + status: StatusPermanentError, + timestamp: maxTime, + err: assert.AnError, + }, + }, + { + name: "PermanentError - synthetic event", + statusMap: map[*InstanceID]*Event{ + {}: NewEvent(StatusStarting), + {}: NewEvent(StatusOK), + {}: NewPermanentErrorEvent(assert.AnError), + {}: latest(NewEvent(StatusRecoverableError)), + }, + expectedStatus: &Event{ + status: StatusPermanentError, + timestamp: maxTime, + err: assert.AnError, + }, + }, + { + name: "Stopping - existing event", + statusMap: map[*InstanceID]*Event{ + {}: NewEvent(StatusStarting), + {}: NewEvent(StatusOK), + {}: NewEvent(StatusRecoverableError), + {}: latest(NewEvent(StatusStopping)), + }, + expectedStatus: &Event{ + status: StatusStopping, + timestamp: maxTime, + }, + }, + { + name: "Stopping - synthetic event", + statusMap: map[*InstanceID]*Event{ + {}: NewEvent(StatusStarting), + {}: NewEvent(StatusOK), + {}: NewEvent(StatusRecoverableError), + {}: latest(NewEvent(StatusStopped)), + }, + expectedStatus: &Event{ + status: StatusStopping, + timestamp: maxTime, + }, + }, + { + name: "Stopped - existing event", + statusMap: map[*InstanceID]*Event{ + {}: NewEvent(StatusStopped), + {}: latest(NewEvent(StatusStopped)), + {}: NewEvent(StatusStopped), + }, + expectedStatus: &Event{ + status: StatusStopped, + timestamp: maxTime, + }, + }, + { + name: "RecoverableError - existing event", + statusMap: map[*InstanceID]*Event{ + {}: NewEvent(StatusStarting), + {}: NewEvent(StatusOK), + {}: latest(NewRecoverableErrorEvent(assert.AnError)), + }, + expectedStatus: &Event{ + status: StatusRecoverableError, + timestamp: maxTime, + err: assert.AnError, + }, + }, + { + name: "Starting - synthetic event", + statusMap: map[*InstanceID]*Event{ + {}: NewEvent(StatusStarting), + {}: latest(NewEvent(StatusOK)), + }, + expectedStatus: &Event{ + status: StatusStarting, + timestamp: maxTime, + }, + }, + { + name: "OK - existing event", + statusMap: map[*InstanceID]*Event{ + {}: NewEvent(StatusOK), + {}: latest(NewEvent(StatusOK)), + {}: NewEvent(StatusOK), + }, + expectedStatus: &Event{ + status: StatusOK, + timestamp: maxTime, + }, + }, + } { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.expectedStatus, AggregateStatusEvent(tc.statusMap)) + }) + } +} diff --git a/versions.yaml b/versions.yaml index d38de076543..e8b0dd30adf 100644 --- a/versions.yaml +++ b/versions.yaml @@ -20,6 +20,7 @@ module-sets: - go.opentelemetry.io/collector/cmd/builder - go.opentelemetry.io/collector/cmd/mdatagen - go.opentelemetry.io/collector/component + - go.opentelemetry.io/collector/component/componentstatus - go.opentelemetry.io/collector/component/componentprofiles - go.opentelemetry.io/collector/confmap - go.opentelemetry.io/collector/confmap/converter/expandconverter From 9654aa15c6b21a2066612335176341e91b43b1a1 Mon Sep 17 00:00:00 2001 From: Tyler Helmuth <12352919+TylerHelmuth@users.noreply.github.com> Date: Fri, 26 Jul 2024 13:15:45 -0600 Subject: [PATCH 02/11] changelog --- .chloggen/componentstatus-new-module.yaml | 25 +++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 .chloggen/componentstatus-new-module.yaml diff --git a/.chloggen/componentstatus-new-module.yaml b/.chloggen/componentstatus-new-module.yaml new file mode 100644 index 00000000000..17a11840b58 --- /dev/null +++ b/.chloggen/componentstatus-new-module.yaml @@ -0,0 +1,25 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: new_component + +# The name of the component, or a single word describing the area of concern, (e.g. otlpreceiver) +component: componentstatus + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Adds new componentstatus module that will soon replace status content in component. + +# One or more tracking issues or pull requests related to the change +issues: [10730] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [api] From 4132d0580cb52c3ad0c4032266422f7ac51fcffe Mon Sep 17 00:00:00 2001 From: Tyler Helmuth <12352919+TylerHelmuth@users.noreply.github.com> Date: Fri, 26 Jul 2024 13:18:26 -0600 Subject: [PATCH 03/11] Update name --- component/componentstatus/status.go | 4 ++-- component/componentstatus/status_test.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/component/componentstatus/status.go b/component/componentstatus/status.go index d05caafc186..67dcc12bae3 100644 --- a/component/componentstatus/status.go +++ b/component/componentstatus/status.go @@ -174,12 +174,12 @@ func StatusIsError(status Status) bool { status == StatusFatalError } -// AggregateStatusEvent returns a status event where: +// AggregateEvent returns a status event where: // - The status is set to the aggregate status of the events in the eventMap // - The timestamp is set to the latest timestamp of the events in the eventMap // - For an error status, the event will have same error as the most current event of the same // error type from the eventMap -func AggregateStatusEvent[K comparable](eventMap map[K]*Event) *Event { +func AggregateEvent[K comparable](eventMap map[K]*Event) *Event { var lastEvent, lastMatchingEvent *Event aggregateStatus := AggregateStatus[K](eventMap) diff --git a/component/componentstatus/status_test.go b/component/componentstatus/status_test.go index 244988c9d6c..520ae142fcb 100644 --- a/component/componentstatus/status_test.go +++ b/component/componentstatus/status_test.go @@ -324,7 +324,7 @@ func TestAggregateStatusEvent(t *testing.T) { }, } { t.Run(tc.name, func(t *testing.T) { - assert.Equal(t, tc.expectedStatus, AggregateStatusEvent(tc.statusMap)) + assert.Equal(t, tc.expectedStatus, AggregateEvent(tc.statusMap)) }) } } From 1c35eb7956a66120c84b33388b42b24a09868be0 Mon Sep 17 00:00:00 2001 From: Tyler Helmuth <12352919+TylerHelmuth@users.noreply.github.com> Date: Fri, 26 Jul 2024 13:25:33 -0600 Subject: [PATCH 04/11] make crosslink --- component/componentstatus/go.mod | 6 ++++++ component/componentstatus/go.sum | 6 ------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/component/componentstatus/go.mod b/component/componentstatus/go.mod index 5b13e32b40c..1813786d2e8 100644 --- a/component/componentstatus/go.mod +++ b/component/componentstatus/go.mod @@ -26,3 +26,9 @@ require ( google.golang.org/protobuf v1.34.2 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) + +replace go.opentelemetry.io/collector/config/configtelemetry => ../../config/configtelemetry + +replace go.opentelemetry.io/collector/component => ../ + +replace go.opentelemetry.io/collector/pdata => ../../pdata diff --git a/component/componentstatus/go.sum b/component/componentstatus/go.sum index 08dfab8d5c9..147b5cbbcda 100644 --- a/component/componentstatus/go.sum +++ b/component/componentstatus/go.sum @@ -22,12 +22,6 @@ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsT github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -go.opentelemetry.io/collector/component v0.105.0 h1:/OdkWHd1xTNX7JRq9iW3AFoJAnYUOGZZyOprNQkGoTI= -go.opentelemetry.io/collector/component v0.105.0/go.mod h1:s8KoxOrhNIBzetkb0LHmzX1OI67DyZbaaUPOWIXS1mg= -go.opentelemetry.io/collector/config/configtelemetry v0.105.0 h1:wEfUxAjjstp47aLr2s1cMZiH0dt+k42m6VC6HigqgJA= -go.opentelemetry.io/collector/config/configtelemetry v0.105.0/go.mod h1:WxWKNVAQJg/Io1nA3xLgn/DWLE/W1QOB2+/Js3ACi40= -go.opentelemetry.io/collector/pdata v1.12.0 h1:Xx5VK1p4VO0md8MWm2icwC1MnJ7f8EimKItMWw46BmA= -go.opentelemetry.io/collector/pdata v1.12.0/go.mod h1:MYeB0MmMAxeM0hstCFrCqWLzdyeYySim2dG6pDT6nYI= go.opentelemetry.io/otel v1.28.0 h1:/SqNcYk+idO0CxKEUOtKQClMK/MimZihKYMruSMViUo= go.opentelemetry.io/otel v1.28.0/go.mod h1:q68ijF8Fc8CnMHKyzqL6akLO46ePnjkgfIMIjUIX9z4= go.opentelemetry.io/otel/metric v1.28.0 h1:f0HGvSl1KRAU1DLgLGFjrwVyismPlnuU6JD6bOeuA5Q= From 270752dec7c9586e4efd8024a187cd3e4e6ba412 Mon Sep 17 00:00:00 2001 From: Tyler Helmuth <12352919+TylerHelmuth@users.noreply.github.com> Date: Mon, 29 Jul 2024 16:40:30 -0600 Subject: [PATCH 05/11] Update componentstatus deps --- component/componentstatus/go.mod | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/component/componentstatus/go.mod b/component/componentstatus/go.mod index 1813786d2e8..ccd0e82608f 100644 --- a/component/componentstatus/go.mod +++ b/component/componentstatus/go.mod @@ -4,14 +4,14 @@ go 1.21.0 require ( github.com/stretchr/testify v1.9.0 - go.opentelemetry.io/collector/component v0.105.0 + go.opentelemetry.io/collector/component v0.106.0 ) require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - go.opentelemetry.io/collector/config/configtelemetry v0.105.0 // indirect + go.opentelemetry.io/collector/config/configtelemetry v0.106.0 // indirect go.opentelemetry.io/collector/pdata v1.12.0 // indirect go.opentelemetry.io/otel v1.28.0 // indirect go.opentelemetry.io/otel/metric v1.28.0 // indirect From 5a17c560e8787c7286174fd71e147a283c1fe9e7 Mon Sep 17 00:00:00 2001 From: Tyler Helmuth <12352919+TylerHelmuth@users.noreply.github.com> Date: Tue, 30 Jul 2024 02:36:37 +0000 Subject: [PATCH 06/11] Update component/componentstatus/status.go Co-authored-by: Matthew Wear --- component/componentstatus/status.go | 90 ----------------------------- 1 file changed, 90 deletions(-) diff --git a/component/componentstatus/status.go b/component/componentstatus/status.go index 67dcc12bae3..14e3554dd2a 100644 --- a/component/componentstatus/status.go +++ b/component/componentstatus/status.go @@ -119,93 +119,3 @@ func NewFatalErrorEvent(err error) *Event { return ev } -// AggregateStatus will derive a status for the given input using the following rules in order: -// 1. If all instances have the same status, there is nothing to aggregate, return it. -// 2. If any instance encounters a fatal error, the component is in a Fatal Error state. -// 3. If any instance is in a Permanent Error state, the component status is Permanent Error. -// 4. If any instance is Stopping, the component is in a Stopping state. -// 5. An instance is Stopped, but not all instances are Stopped, we must be in the process of Stopping the component. -// 6. If any instance is in a Recoverable Error state, the component status is Recoverable Error. -// 7. By process of elimination, the only remaining state is starting. -func AggregateStatus[K comparable](eventMap map[K]*Event) Status { - seen := make(map[Status]struct{}) - for _, ev := range eventMap { - seen[ev.Status()] = struct{}{} - } - - // All statuses are the same. Note, this will handle StatusOK and StatusStopped as these two - // cases require all components be in the same state. - if len(seen) == 1 { - for st := range seen { - return st - } - } - - // Handle mixed status cases - if _, isFatal := seen[StatusFatalError]; isFatal { - return StatusFatalError - } - - if _, isPermanent := seen[StatusPermanentError]; isPermanent { - return StatusPermanentError - } - - if _, isStopping := seen[StatusStopping]; isStopping { - return StatusStopping - } - - if _, isStopped := seen[StatusStopped]; isStopped { - return StatusStopping - } - - if _, isRecoverable := seen[StatusRecoverableError]; isRecoverable { - return StatusRecoverableError - } - - // By process of elimination, this is the last possible status; no check necessary. - return StatusStarting -} - -// StatusIsError returns true for error statuses (e.g. StatusRecoverableError, -// StatusPermanentError, or StatusFatalError) -func StatusIsError(status Status) bool { - return status == StatusRecoverableError || - status == StatusPermanentError || - status == StatusFatalError -} - -// AggregateEvent returns a status event where: -// - The status is set to the aggregate status of the events in the eventMap -// - The timestamp is set to the latest timestamp of the events in the eventMap -// - For an error status, the event will have same error as the most current event of the same -// error type from the eventMap -func AggregateEvent[K comparable](eventMap map[K]*Event) *Event { - var lastEvent, lastMatchingEvent *Event - aggregateStatus := AggregateStatus[K](eventMap) - - for _, ev := range eventMap { - if lastEvent == nil || lastEvent.timestamp.Before(ev.timestamp) { - lastEvent = ev - } - if aggregateStatus == ev.Status() && - (lastMatchingEvent == nil || lastMatchingEvent.timestamp.Before(ev.timestamp)) { - lastMatchingEvent = ev - } - } - - // the effective status matches an existing event - if lastEvent.Status() == aggregateStatus { - return lastEvent - } - - // the effective status requires a synthetic event - aggregateEvent := &Event{ - status: aggregateStatus, - timestamp: lastEvent.timestamp, - } - if StatusIsError(aggregateStatus) { - aggregateEvent.err = lastMatchingEvent.err - } - - return aggregateEvent -} From a5366a62be0035c4a3a8581374f769cc54d7cc21 Mon Sep 17 00:00:00 2001 From: Tyler Helmuth <12352919+TylerHelmuth@users.noreply.github.com> Date: Mon, 29 Jul 2024 20:37:01 -0600 Subject: [PATCH 07/11] Update component/componentstatus/status_test.go Co-authored-by: Matthew Wear --- component/componentstatus/status_test.go | 87 ------------------------ 1 file changed, 87 deletions(-) diff --git a/component/componentstatus/status_test.go b/component/componentstatus/status_test.go index 520ae142fcb..4651429dac8 100644 --- a/component/componentstatus/status_test.go +++ b/component/componentstatus/status_test.go @@ -49,93 +49,6 @@ func TestStatusEventsWithError(t *testing.T) { } } -func TestAggregateStatus(t *testing.T) { - for _, tc := range []struct { - name string - statusMap map[*InstanceID]*Event - expectedStatus Status - }{ - { - name: "aggregate status with fatal is FatalError", - statusMap: map[*InstanceID]*Event{ - {}: NewEvent(StatusStarting), - {}: NewEvent(StatusOK), - {}: NewEvent(StatusFatalError), - {}: NewEvent(StatusRecoverableError), - }, - expectedStatus: StatusFatalError, - }, - { - name: "aggregate status with permanent is PermanentError", - statusMap: map[*InstanceID]*Event{ - {}: NewEvent(StatusStarting), - {}: NewEvent(StatusOK), - {}: NewEvent(StatusPermanentError), - {}: NewEvent(StatusRecoverableError), - }, - expectedStatus: StatusPermanentError, - }, - { - name: "aggregate status with stopping is Stopping", - statusMap: map[*InstanceID]*Event{ - {}: NewEvent(StatusStarting), - {}: NewEvent(StatusOK), - {}: NewEvent(StatusRecoverableError), - {}: NewEvent(StatusStopping), - }, - expectedStatus: StatusStopping, - }, - { - name: "aggregate status with stopped and non-stopped is Stopping", - statusMap: map[*InstanceID]*Event{ - {}: NewEvent(StatusStarting), - {}: NewEvent(StatusOK), - {}: NewEvent(StatusRecoverableError), - {}: NewEvent(StatusStopped), - }, - expectedStatus: StatusStopping, - }, - { - name: "aggregate status with all stopped is Stopped", - statusMap: map[*InstanceID]*Event{ - {}: NewEvent(StatusStopped), - {}: NewEvent(StatusStopped), - {}: NewEvent(StatusStopped), - }, - expectedStatus: StatusStopped, - }, - { - name: "aggregate status with recoverable is RecoverableError", - statusMap: map[*InstanceID]*Event{ - {}: NewEvent(StatusStarting), - {}: NewEvent(StatusOK), - {}: NewEvent(StatusRecoverableError), - }, - expectedStatus: StatusRecoverableError, - }, - { - name: "aggregate status with starting is Starting", - statusMap: map[*InstanceID]*Event{ - {}: NewEvent(StatusStarting), - {}: NewEvent(StatusOK), - }, - expectedStatus: StatusStarting, - }, - { - name: "aggregate status with all ok is OK", - statusMap: map[*InstanceID]*Event{ - {}: NewEvent(StatusOK), - {}: NewEvent(StatusOK), - {}: NewEvent(StatusOK), - }, - expectedStatus: StatusOK, - }, - } { - t.Run(tc.name, func(t *testing.T) { - assert.Equal(t, tc.expectedStatus, AggregateStatus(tc.statusMap)) - }) - } -} func TestStatusIsError(t *testing.T) { for _, tc := range []struct { From e0c4f98445919c66807d39035f303a252ea73cf5 Mon Sep 17 00:00:00 2001 From: Tyler Helmuth <12352919+TylerHelmuth@users.noreply.github.com> Date: Mon, 29 Jul 2024 20:37:48 -0600 Subject: [PATCH 08/11] Update component/componentstatus/status_test.go Co-authored-by: Matthew Wear --- component/componentstatus/status_test.go | 150 ----------------------- 1 file changed, 150 deletions(-) diff --git a/component/componentstatus/status_test.go b/component/componentstatus/status_test.go index 4651429dac8..aeb7e3e6d23 100644 --- a/component/componentstatus/status_test.go +++ b/component/componentstatus/status_test.go @@ -91,153 +91,3 @@ func TestStatusIsError(t *testing.T) { } } -func TestAggregateStatusEvent(t *testing.T) { - // maxTime is used to make sure we select the event with the latest timestamp - maxTime := time.Unix(1<<63-62135596801, 999999999) - // latest sets the timestamp for an event to maxTime - latest := func(ev *Event) *Event { - ev.timestamp = maxTime - return ev - } - - for _, tc := range []struct { - name string - statusMap map[*InstanceID]*Event - expectedStatus *Event - }{ - { - name: "FatalError - existing event", - statusMap: map[*InstanceID]*Event{ - {}: NewEvent(StatusStarting), - {}: NewEvent(StatusOK), - {}: latest(NewFatalErrorEvent(assert.AnError)), - {}: NewEvent(StatusRecoverableError), - }, - expectedStatus: &Event{ - status: StatusFatalError, - timestamp: maxTime, - err: assert.AnError, - }, - }, - { - name: "FatalError - synthetic event", - statusMap: map[*InstanceID]*Event{ - {}: NewEvent(StatusStarting), - {}: NewEvent(StatusOK), - {}: NewFatalErrorEvent(assert.AnError), - {}: latest(NewEvent(StatusRecoverableError)), - }, - expectedStatus: &Event{ - status: StatusFatalError, - timestamp: maxTime, - err: assert.AnError, - }, - }, - { - name: "PermanentError - existing event", - statusMap: map[*InstanceID]*Event{ - {}: NewEvent(StatusStarting), - {}: NewEvent(StatusOK), - {}: latest(NewPermanentErrorEvent(assert.AnError)), - {}: NewEvent(StatusRecoverableError), - }, - expectedStatus: &Event{ - status: StatusPermanentError, - timestamp: maxTime, - err: assert.AnError, - }, - }, - { - name: "PermanentError - synthetic event", - statusMap: map[*InstanceID]*Event{ - {}: NewEvent(StatusStarting), - {}: NewEvent(StatusOK), - {}: NewPermanentErrorEvent(assert.AnError), - {}: latest(NewEvent(StatusRecoverableError)), - }, - expectedStatus: &Event{ - status: StatusPermanentError, - timestamp: maxTime, - err: assert.AnError, - }, - }, - { - name: "Stopping - existing event", - statusMap: map[*InstanceID]*Event{ - {}: NewEvent(StatusStarting), - {}: NewEvent(StatusOK), - {}: NewEvent(StatusRecoverableError), - {}: latest(NewEvent(StatusStopping)), - }, - expectedStatus: &Event{ - status: StatusStopping, - timestamp: maxTime, - }, - }, - { - name: "Stopping - synthetic event", - statusMap: map[*InstanceID]*Event{ - {}: NewEvent(StatusStarting), - {}: NewEvent(StatusOK), - {}: NewEvent(StatusRecoverableError), - {}: latest(NewEvent(StatusStopped)), - }, - expectedStatus: &Event{ - status: StatusStopping, - timestamp: maxTime, - }, - }, - { - name: "Stopped - existing event", - statusMap: map[*InstanceID]*Event{ - {}: NewEvent(StatusStopped), - {}: latest(NewEvent(StatusStopped)), - {}: NewEvent(StatusStopped), - }, - expectedStatus: &Event{ - status: StatusStopped, - timestamp: maxTime, - }, - }, - { - name: "RecoverableError - existing event", - statusMap: map[*InstanceID]*Event{ - {}: NewEvent(StatusStarting), - {}: NewEvent(StatusOK), - {}: latest(NewRecoverableErrorEvent(assert.AnError)), - }, - expectedStatus: &Event{ - status: StatusRecoverableError, - timestamp: maxTime, - err: assert.AnError, - }, - }, - { - name: "Starting - synthetic event", - statusMap: map[*InstanceID]*Event{ - {}: NewEvent(StatusStarting), - {}: latest(NewEvent(StatusOK)), - }, - expectedStatus: &Event{ - status: StatusStarting, - timestamp: maxTime, - }, - }, - { - name: "OK - existing event", - statusMap: map[*InstanceID]*Event{ - {}: NewEvent(StatusOK), - {}: latest(NewEvent(StatusOK)), - {}: NewEvent(StatusOK), - }, - expectedStatus: &Event{ - status: StatusOK, - timestamp: maxTime, - }, - }, - } { - t.Run(tc.name, func(t *testing.T) { - assert.Equal(t, tc.expectedStatus, AggregateEvent(tc.statusMap)) - }) - } -} From fb98b95a9a057e63492cb48d8bcfcabae5116e19 Mon Sep 17 00:00:00 2001 From: Tyler Helmuth <12352919+TylerHelmuth@users.noreply.github.com> Date: Tue, 30 Jul 2024 08:25:08 -0600 Subject: [PATCH 09/11] Fix test --- component/componentstatus/status.go | 7 +++++++ component/componentstatus/status_test.go | 3 --- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/component/componentstatus/status.go b/component/componentstatus/status.go index 14e3554dd2a..57a691dab57 100644 --- a/component/componentstatus/status.go +++ b/component/componentstatus/status.go @@ -119,3 +119,10 @@ func NewFatalErrorEvent(err error) *Event { return ev } +// StatusIsError returns true for error statuses (e.g. StatusRecoverableError, +// StatusPermanentError, or StatusFatalError) +func StatusIsError(status Status) bool { + return status == StatusRecoverableError || + status == StatusPermanentError || + status == StatusFatalError +} diff --git a/component/componentstatus/status_test.go b/component/componentstatus/status_test.go index aeb7e3e6d23..7d66f2c5c3d 100644 --- a/component/componentstatus/status_test.go +++ b/component/componentstatus/status_test.go @@ -5,7 +5,6 @@ package componentstatus import ( "fmt" "testing" - "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -49,7 +48,6 @@ func TestStatusEventsWithError(t *testing.T) { } } - func TestStatusIsError(t *testing.T) { for _, tc := range []struct { status Status @@ -90,4 +88,3 @@ func TestStatusIsError(t *testing.T) { }) } } - From 6251a9fb18aef5ea6851d40517c8ed70bd8927c2 Mon Sep 17 00:00:00 2001 From: Tyler Helmuth <12352919+TylerHelmuth@users.noreply.github.com> Date: Tue, 30 Jul 2024 18:25:44 -0600 Subject: [PATCH 10/11] Update with TODOs and godoc warning --- component/componentstatus/instance.go | 3 +++ component/componentstatus/status.go | 15 +++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/component/componentstatus/instance.go b/component/componentstatus/instance.go index e2158a63ec3..22ea4c8003e 100644 --- a/component/componentstatus/instance.go +++ b/component/componentstatus/instance.go @@ -6,6 +6,9 @@ package componentstatus // import "go.opentelemetry.io/collector/component/compo import "go.opentelemetry.io/collector/component" // InstanceID uniquely identifies a component instance +// +// TODO: consider moving this struct to a new package/module like `extension/statuswatcher` +// https://github.com/open-telemetry/opentelemetry-collector/issues/10764 type InstanceID struct { ID component.ID Kind component.Kind diff --git a/component/componentstatus/status.go b/component/componentstatus/status.go index 57a691dab57..6f016cf0797 100644 --- a/component/componentstatus/status.go +++ b/component/componentstatus/status.go @@ -1,6 +1,12 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 +// Package componentstatus is an experimental module that defines how components should +// report health statues, how collector hosts should facilitate component status reporting, +// and how extensions should watch for new component statuses. +// +// This package is currently under development and is exempt from the Collector SIG's +// breaking change policy. package componentstatus // import "go.opentelemetry.io/collector/component/componentstatus" import ( @@ -10,6 +16,9 @@ import ( // Watcher is an extra interface for Extension hosted by the OpenTelemetry // Collector that is to be implemented by extensions interested in changes to component // status. +// +// TODO: consider moving this interface to a new package/module like `extension/statuswatcher` +// https://github.com/open-telemetry/opentelemetry-collector/issues/10764 type Watcher interface { // ComponentStatusChanged notifies about a change in the source component status. // Extensions that implement this interface must be ready that the ComponentStatusChanged @@ -63,8 +72,10 @@ func (s Status) String() string { // Event contains a status and timestamp, and can contain an error type Event struct { - status Status - err error + status Status + err error + // TODO: consider if a timestamp is necessary in the default Event struct or is needed only for the healthcheckv2 extension + // https://github.com/open-telemetry/opentelemetry-collector/issues/10763 timestamp time.Time } From e99f56daabc9e999a3565da71dd2719572f92ce5 Mon Sep 17 00:00:00 2001 From: Tyler Helmuth <12352919+TylerHelmuth@users.noreply.github.com> Date: Tue, 30 Jul 2024 18:29:36 -0600 Subject: [PATCH 11/11] run make gotidy --- component/componentstatus/go.mod | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/component/componentstatus/go.mod b/component/componentstatus/go.mod index ccd0e82608f..c02c171b802 100644 --- a/component/componentstatus/go.mod +++ b/component/componentstatus/go.mod @@ -4,14 +4,14 @@ go 1.21.0 require ( github.com/stretchr/testify v1.9.0 - go.opentelemetry.io/collector/component v0.106.0 + go.opentelemetry.io/collector/component v0.106.1 ) require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - go.opentelemetry.io/collector/config/configtelemetry v0.106.0 // indirect + go.opentelemetry.io/collector/config/configtelemetry v0.106.1 // indirect go.opentelemetry.io/collector/pdata v1.12.0 // indirect go.opentelemetry.io/otel v1.28.0 // indirect go.opentelemetry.io/otel/metric v1.28.0 // indirect