diff --git a/metric/system/process/helper_test.go b/metric/system/process/helper_test.go deleted file mode 100644 index 841b2f172..000000000 --- a/metric/system/process/helper_test.go +++ /dev/null @@ -1,76 +0,0 @@ -// Licensed to Elasticsearch B.V. under one or more contributor -// license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright -// ownership. Elasticsearch B.V. licenses this file to you under -// the Apache License, Version 2.0 (the "License"); you may -// not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package process - -import ( - "errors" - "fmt" - "syscall" - "testing" - - "github.com/stretchr/testify/require" -) - -func TestErrors(t *testing.T) { - cases := []struct { - name string - check func(t *testing.T) - }{ - { - name: "non fatal error", - check: func(t *testing.T) { - err := fmt.Errorf("Faced non-fatal error: %w", NonFatalErr{Err: syscall.EPERM}) - require.True(t, isNonFatal(err), "Should be a non fatal error") - }, - }, - { - name: "non fatal error - unwrapped", - check: func(t *testing.T) { - err := fmt.Errorf("Faced non-fatal error: %w", syscall.EPERM) - require.True(t, isNonFatal(err), "Should be a non fatal error") - }, - }, - { - name: "non fatal error - hierarchy", - check: func(t *testing.T) { - err := fmt.Errorf("Faced non-fatal error: %w", syscall.EPERM) - err2 := errors.Join(toNonFatal(err)) - require.True(t, isNonFatal(err2), "Should be a non fatal error") - }, - }, - { - name: "fatal error", - check: func(t *testing.T) { - err := fmt.Errorf("Faced fatal error: %w", errors.New("FATAL")) - err = toNonFatal(err) // shouldn't have any effect as it's a fatal error - require.Falsef(t, isNonFatal(err), "Should be a fatal error") - }, - }, - { - name: "fatal error - hierarchy", - check: func(t *testing.T) { - err := fmt.Errorf("Faced fatal error: %w", errors.New("FATAL")) - err2 := errors.Join(err) - require.Falsef(t, isNonFatal(err2), "Should be a fatal error") - }, - }, - } - for _, c := range cases { - t.Run(c.name, c.check) - } -} diff --git a/metric/system/process/helpers.go b/metric/system/process/helpers.go index 6f4c4931c..362d7953d 100644 --- a/metric/system/process/helpers.go +++ b/metric/system/process/helpers.go @@ -98,40 +98,3 @@ func GetProcCPUPercentage(s0, s1 ProcState) ProcState { return s1 } - -// NonFatalErr indicates an error occurred during metrics -// collection, however the metrics already -// gathered and returned are still valid. -// This error can be safely ignored, this will result -// in having partial metrics for a process rather than -// no metrics at all. -// -// It was introduced to allow for partial metrics collection -// on privileged process on Windows. -type NonFatalErr struct { - Err error -} - -func (c NonFatalErr) Error() string { - return "Not enough privileges to fetch information: " + c.Err.Error() -} - -func (c NonFatalErr) Is(other error) bool { - _, is := other.(NonFatalErr) - return is -} - -func (c NonFatalErr) Unwrap() error { - return c.Err -} - -// Wraps a NonFatalError around a generic error, if given error is non-fatal in nature -func toNonFatal(err error) error { - if err == nil { - return nil - } - if !isNonFatal(err) { - return err - } - return NonFatalErr{Err: err} -} diff --git a/metric/system/process/helpers_others.go b/metric/system/process/helpers_others.go deleted file mode 100644 index 2873a5e4c..000000000 --- a/metric/system/process/helpers_others.go +++ /dev/null @@ -1,35 +0,0 @@ -// Licensed to Elasticsearch B.V. under one or more contributor -// license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright -// ownership. Elasticsearch B.V. licenses this file to you under -// the Apache License, Version 2.0 (the "License"); you may -// not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//go:build !windows - -package process - -import ( - "errors" - "syscall" -) - -func isNonFatal(err error) bool { - if err == nil { - return true - } - return (errors.Is(err, syscall.EACCES) || - errors.Is(err, syscall.EPERM) || - errors.Is(err, syscall.EINVAL) || - errors.Is(err, NonFatalErr{})) -} diff --git a/metric/system/process/helpers_windows.go b/metric/system/process/helpers_windows.go deleted file mode 100644 index 94e4b882d..000000000 --- a/metric/system/process/helpers_windows.go +++ /dev/null @@ -1,37 +0,0 @@ -// Licensed to Elasticsearch B.V. under one or more contributor -// license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright -// ownership. Elasticsearch B.V. licenses this file to you under -// the Apache License, Version 2.0 (the "License"); you may -// not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//go:build windows - -package process - -import ( - "errors" - "syscall" - - "golang.org/x/sys/windows" -) - -func isNonFatal(err error) bool { - if err == nil { - return true - } - return errors.Is(err, windows.ERROR_ACCESS_DENIED) || - errors.Is(err, syscall.EPERM) || - errors.Is(err, syscall.EINVAL) || - errors.Is(err, windows.ERROR_INVALID_PARAMETER) || errors.Is(err, NonFatalErr{}) -} diff --git a/metric/system/process/process.go b/metric/system/process/process.go index 2212158f6..4a9151b61 100644 --- a/metric/system/process/process.go +++ b/metric/system/process/process.go @@ -25,7 +25,6 @@ import ( "fmt" "sort" "strings" - "syscall" "time" psutil "github.com/shirou/gopsutil/v3/process" @@ -55,11 +54,11 @@ func ListStates(hostfs resolve.Resolver) ([]ProcState, error) { // actually fetch the PIDs from the OS-specific code _, plist, err := init.FetchPids() - if err != nil && !isNonFatal(err) { + if err != nil { return nil, fmt.Errorf("error gathering PIDs: %w", err) } - return plist, toNonFatal(err) + return plist, nil } // GetPIDState returns the state of a given PID @@ -91,10 +90,10 @@ func (procStats *Stats) Get() ([]mapstr.M, []mapstr.M, error) { } // actually fetch the PIDs from the OS-specific code - pidMap, plist, wrappedErr := procStats.FetchPids() + pidMap, plist, err := procStats.FetchPids() - if wrappedErr != nil && !isNonFatal(wrappedErr) { - return nil, nil, fmt.Errorf("error gathering PIDs: %w", wrappedErr) + if err != nil { + return nil, nil, fmt.Errorf("error gathering PIDs: %w", err) } // We use this to track processes over time. procStats.ProcsMap.SetMap(pidMap) @@ -134,13 +133,13 @@ func (procStats *Stats) Get() ([]mapstr.M, []mapstr.M, error) { rootEvents = append(rootEvents, rootMap) } - return procs, rootEvents, toNonFatal(wrappedErr) + return procs, rootEvents, nil } // GetOne fetches process data for a given PID if its name matches the regexes provided from the host. func (procStats *Stats) GetOne(pid int) (mapstr.M, error) { pidStat, _, err := procStats.pidFill(pid, false) - if err != nil && !isNonFatal(err) { + if err != nil { return nil, fmt.Errorf("error fetching PID %d: %w", pid, err) } @@ -152,9 +151,9 @@ func (procStats *Stats) GetOne(pid int) (mapstr.M, error) { // GetOneRootEvent is the same as `GetOne()` but it returns an // event formatted as expected by ECS func (procStats *Stats) GetOneRootEvent(pid int) (mapstr.M, mapstr.M, error) { - pidStat, _, wrappedErr := procStats.pidFill(pid, false) - if wrappedErr != nil && !isNonFatal(wrappedErr) { - return nil, nil, fmt.Errorf("error fetching PID %d: %w", pid, wrappedErr) + pidStat, _, err := procStats.pidFill(pid, false) + if err != nil { + return nil, nil, fmt.Errorf("error fetching PID %d: %w", pid, err) } procStats.ProcsMap.SetPid(pid, pidStat) @@ -166,7 +165,7 @@ func (procStats *Stats) GetOneRootEvent(pid int) (mapstr.M, mapstr.M, error) { rootMap := processRootEvent(&pidStat) - return procMap, rootMap, toNonFatal(wrappedErr) + return procMap, rootMap, err } // GetSelf gets process info for the beat itself @@ -181,41 +180,56 @@ func (procStats *Stats) GetSelf() (ProcState, error) { } pidStat, _, err := procStats.pidFill(self, false) - if err != nil && !isNonFatal(err) { + if err != nil { return ProcState{}, fmt.Errorf("error fetching PID %d: %w", self, err) } procStats.ProcsMap.SetPid(self, pidStat) - return pidStat, toNonFatal(err) + return pidStat, nil } // pidIter wraps a few lines of generic code that all OS-specific FetchPids() functions must call. // this also handles the process of adding to the maps/lists in order to limit the code duplication in all the OS implementations -func (procStats *Stats) pidIter(pid int, procMap ProcsMap, proclist []ProcState) (ProcsMap, []ProcState, error) { +func (procStats *Stats) pidIter(pid int, procMap ProcsMap, proclist []ProcState) (ProcsMap, []ProcState) { status, saved, err := procStats.pidFill(pid, true) - var nonFatalErr error if err != nil { if !errors.Is(err, NonFatalErr{}) { procStats.logger.Debugf("Error fetching PID info for %d, skipping: %s", pid, err) - // While monitoring a set of processes, some processes might get killed after we get all the PIDs - // So, there's no need to capture "process not found" error. - if errors.Is(err, syscall.ESRCH) { - return procMap, proclist, nil - } - return procMap, proclist, err + return procMap, proclist } - nonFatalErr = fmt.Errorf("non fatal error fetching PID some info for %d, metrics are valid, but partial: %w", pid, err) - procStats.logger.Debugf(err.Error()) + procStats.logger.Debugf("Non fatal error fetching PID some info for %d, metrics are valid, but partial: %s", pid, err) } if !saved { procStats.logger.Debugf("Process name does not match the provided regex; PID=%d; name=%s", pid, status.Name) - return procMap, proclist, nonFatalErr + return procMap, proclist } procMap[pid] = status proclist = append(proclist, status) - return procMap, proclist, nonFatalErr + return procMap, proclist +} + +// NonFatalErr is returned when there was an error +// collecting metrics, however the metrics already +// gathered and returned are still valid. +// This error can be safely ignored, this will result +// in having partial metrics for a process rather than +// no metrics at all. +// +// It was introduced to allow for partial metrics collection +// on privileged process on Windows. +type NonFatalErr struct { + Err error +} + +func (c NonFatalErr) Error() string { + return "Not enough privileges to fetch information: " + c.Err.Error() +} + +func (c NonFatalErr) Is(other error) bool { + _, is := other.(NonFatalErr) + return is } // pidFill is an entrypoint used by OS-specific code to fill out a pid. @@ -224,7 +238,7 @@ func (procStats *Stats) pidIter(pid int, procMap ProcsMap, proclist []ProcState) // The second return value will only be false if an event has been filtered out. func (procStats *Stats) pidFill(pid int, filter bool) (ProcState, bool, error) { // Fetch proc state so we can get the name for filtering based on user's filter. - var wrappedErr error + // OS-specific entrypoint, get basic info so we can at least run matchProcess status, err := GetInfoForPid(procStats.Hostfs, pid) if err != nil { @@ -251,8 +265,7 @@ func (procStats *Stats) pidFill(pid int, filter bool) (ProcState, bool, error) { if !errors.Is(err, NonFatalErr{}) { return status, true, fmt.Errorf("FillPidMetrics: %w", err) } - wrappedErr = errors.Join(wrappedErr, fmt.Errorf("non-fatal error fetching PID metrics for %d, metrics are valid, but partial: %w", pid, err)) - procStats.logger.Debugf(wrappedErr.Error()) + procStats.logger.Debugf("Non-fatal error fetching PID metrics for %d, metrics are valid, but partial: %s", pid, err) } if status.CPU.Total.Ticks.Exists() { @@ -307,7 +320,7 @@ func (procStats *Stats) pidFill(pid int, filter bool) (ProcState, bool, error) { } } - return status, true, wrappedErr + return status, true, nil } // cacheCmdLine fills out Env and arg metrics from any stored previous metrics for the pid diff --git a/metric/system/process/process_aix.go b/metric/system/process/process_aix.go index 44f17fa54..4705d4b7a 100644 --- a/metric/system/process/process_aix.go +++ b/metric/system/process/process_aix.go @@ -46,7 +46,6 @@ func (procStats *Stats) FetchPids() (ProcsMap, []ProcState, error) { pid := C.pid_t(0) procMap := make(ProcsMap, 0) - var wrappedErr err var plist []ProcState for { // getprocs first argument is a void* @@ -54,14 +53,13 @@ func (procStats *Stats) FetchPids() (ProcsMap, []ProcState, error) { if err != nil { return nil, nil, fmt.Errorf("error fetching PIDs: %w", err) } - procMap, plist, err = procStats.pidIter(int(pid), procMap, plist) - wrappedErr = errors.Join(wrappedErr, err) + procMap, plist = procStats.pidIter(int(info.pi_pid), procMap, plist) if num == 0 { break } } - return procMap, plist, toNonFatal(wrappedErr) + return procMap, plist, nil } // GetInfoForPid returns basic info for the process diff --git a/metric/system/process/process_container_test.go b/metric/system/process/process_container_test.go index 39fc8baea..7bac98754 100644 --- a/metric/system/process/process_container_test.go +++ b/metric/system/process/process_container_test.go @@ -18,7 +18,6 @@ package process import ( - "fmt" "os" "os/user" "runtime" @@ -113,7 +112,7 @@ func TestSystemHostFromContainer(t *testing.T) { validateProcResult(t, result) } else { _, roots, err := testStats.Get() - require.True(t, isNonFatal(err), fmt.Sprintf("Fatal error: %s", err)) + require.NoError(t, err) for _, proc := range roots { t.Logf("proc: %d: %s", proc["process"].(map[string]interface{})["pid"], diff --git a/metric/system/process/process_darwin.go b/metric/system/process/process_darwin.go index e4d19d4a5..873714e0a 100644 --- a/metric/system/process/process_darwin.go +++ b/metric/system/process/process_darwin.go @@ -34,7 +34,6 @@ import "C" import ( "bytes" "encoding/binary" - "errors" "fmt" "io" "os" @@ -74,8 +73,6 @@ func (procStats *Stats) FetchPids() (ProcsMap, []ProcState, error) { procMap := make(ProcsMap, num) plist := make([]ProcState, 0, num) - var wrappedErr error - var err error for i := 0; i < num; i++ { if err := binary.Read(bbuf, binary.LittleEndian, &pid); err != nil { @@ -85,11 +82,10 @@ func (procStats *Stats) FetchPids() (ProcsMap, []ProcState, error) { if pid == 0 { continue } - procMap, plist, err = procStats.pidIter(int(pid), procMap, plist) - wrappedErr = errors.Join(wrappedErr, err) + procMap, plist = procStats.pidIter(int(pid), procMap, plist) } - return procMap, plist, toNonFatal(wrappedErr) + return procMap, plist, nil } // GetInfoForPid returns basic info for the process @@ -102,9 +98,9 @@ func GetInfoForPid(_ resolve.Resolver, pid int) (ProcState, error) { // For docs, see the link below. Check the `proc_taskallinfo` struct, which // is a composition of `proc_bsdinfo` and `proc_taskinfo`. // https://opensource.apple.com/source/xnu/xnu-1504.3.12/bsd/sys/proc_info.h.auto.html - n, err := C.proc_pidinfo(C.int(pid), C.PROC_PIDTASKALLINFO, 0, ptr, size) + n := C.proc_pidinfo(C.int(pid), C.PROC_PIDTASKALLINFO, 0, ptr, size) if n != size { - return ProcState{}, fmt.Errorf("could not read process info for pid %d: proc_pidinfo returned %d, err: %w", pid, int(n), err) + return ProcState{}, fmt.Errorf("could not read process info for pid %d: proc_pidinfo returned %d", pid, int(n)) } status := ProcState{} diff --git a/metric/system/process/process_linux_common.go b/metric/system/process/process_linux_common.go index 6954fd902..5ffbf135d 100644 --- a/metric/system/process/process_linux_common.go +++ b/metric/system/process/process_linux_common.go @@ -85,7 +85,6 @@ func (procStats *Stats) FetchPids() (ProcsMap, []ProcState, error) { procMap := make(ProcsMap, len(names)) plist := make([]ProcState, 0, len(names)) - var wrappedErr error // Iterate over the directory, fetch just enough info so we can filter based on user input. logger := logp.L() @@ -100,11 +99,10 @@ func (procStats *Stats) FetchPids() (ProcsMap, []ProcState, error) { logger.Debugf("Error converting PID name %s", name) continue } - procMap, plist, err = procStats.pidIter(pid, procMap, plist) - wrappedErr = errors.Join(wrappedErr, err) + procMap, plist = procStats.pidIter(pid, procMap, plist) } - return procMap, plist, toNonFatal(wrappedErr) + return procMap, plist, nil } func FillPidMetrics(hostfs resolve.Resolver, pid int, state ProcState, filter func(string) bool) (ProcState, error) { diff --git a/metric/system/process/process_test.go b/metric/system/process/process_test.go index c0c5c151e..f5d40db2e 100644 --- a/metric/system/process/process_test.go +++ b/metric/system/process/process_test.go @@ -169,12 +169,12 @@ func TestGetOne(t *testing.T) { assert.NoError(t, err, "Init") _, _, err = testConfig.Get() - assert.True(t, isNonFatal(err), fmt.Sprintf("Fatal Error: %s", err)) + assert.NoError(t, err, "GetOne") time.Sleep(time.Second * 2) procData, _, err := testConfig.Get() - assert.True(t, isNonFatal(err), fmt.Sprintf("Fatal Error: %s", err)) + assert.NoError(t, err, "GetOne") t.Logf("Proc: %s", procData[0].StringToPrint()) } @@ -267,7 +267,7 @@ func TestFilter(t *testing.T) { func TestProcessList(t *testing.T) { plist, err := ListStates(resolve.NewTestResolver("/")) - assert.True(t, isNonFatal(err), fmt.Sprintf("Fatal Error: %s", err)) + assert.NoError(t, err, "ListStates") for _, proc := range plist { assert.NotEmpty(t, proc.State) diff --git a/metric/system/process/process_windows.go b/metric/system/process/process_windows.go index 66862da2c..65125a22d 100644 --- a/metric/system/process/process_windows.go +++ b/metric/system/process/process_windows.go @@ -41,16 +41,14 @@ func (procStats *Stats) FetchPids() (ProcsMap, []ProcState, error) { procMap := make(ProcsMap, len(pids)) plist := make([]ProcState, 0, len(pids)) - var wrappedErr error // This is probably the only implementation that doesn't benefit from our // little fillPid callback system. We'll need to iterate over everything // manually. for _, pid := range pids { - procMap, plist, err = procStats.pidIter(int(pid), procMap, plist) - wrappedErr = errors.Join(wrappedErr, err) + procMap, plist = procStats.pidIter(int(pid), procMap, plist) } - return procMap, plist, toNonFatal(wrappedErr) + return procMap, plist, nil } // GetSelfPid is the darwin implementation; see the linux version in