Skip to content

Commit

Permalink
reduce resource usage (#103)
Browse files Browse the repository at this point in the history
## What does this PR do?

Reduce resource usage by preallocating memory and using caches.

## Why is it important?

`elastic/elastic-agent-system-metrics` is a direct dependency of beats
like `metricbeat`.


![20231026-095255](https://github.com/elastic/elastic-agent-system-metrics/assets/1132494/98e9b8e4-bd60-4e5f-b858-528cd9744839)

As shown in the above screenshot, `metricbeat` spends most of its time
in `elastic/elastic-agent-system-metrics` doing syscalls to walk
directories.

## Checklist

<!-- Mandatory
Add a checklist of things that are required to be reviewed in order to
have the PR approved

List here all the items you have verified BEFORE sending this PR. Please
DO NOT remove any item, striking through those that do not apply. (Just
in case, strikethrough uses two tildes. ~~Scratch this.~~)
-->

- [x] My code follows the style guidelines of this project
- [x] I have commented my code, particularly in hard-to-understand areas
- [ ] ~~I have added tests that prove my fix is effective or that my
feature works~~
- [ ] ~~I have added an entry in `CHANGELOG.md`~~

---------

Signed-off-by: Florian Lehner <[email protected]>
  • Loading branch information
florianl authored Nov 9, 2023
1 parent f01a014 commit 7e588f5
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 8 deletions.
19 changes: 18 additions & 1 deletion metric/system/cgroup/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ import (
"path/filepath"
"strconv"
"strings"
"sync"
"time"

"github.com/elastic/elastic-agent-libs/logp"
"github.com/elastic/elastic-agent-system-metrics/metric/system/cgroup/cgv1"
Expand Down Expand Up @@ -68,7 +70,7 @@ const (
memoryStat = "memory"
)

//nolint: deadcode,structcheck,unused // needed by other platforms
// nolint: deadcode,structcheck,unused // needed by other platforms
type mount struct {
subsystem string // Subsystem name (e.g. cpuacct).
mountpoint string // Mountpoint of the subsystem (e.g. /cgroup/cpuacct).
Expand All @@ -77,6 +79,17 @@ type mount struct {
fullPath string // Absolute path to the cgroup. It's the mountpoint joined with the path.
}

// pathListWithTime combines PathList with a timestamp.
type pathListWithTime struct {
added time.Time
pathList PathList
}

type pathCache struct {
sync.RWMutex
cache map[string]pathListWithTime
}

// Reader reads cgroup metrics and limits.
type Reader struct {
// Mountpoint of the root filesystem. Defaults to / if not set. This can be
Expand All @@ -85,6 +98,9 @@ type Reader struct {
ignoreRootCgroups bool // Ignore a cgroup when its path is "/".
cgroupsHierarchyOverride string
cgroupMountpoints Mountpoints // Mountpoints for each subsystem (e.g. cpu, cpuacct, memory, blkio).

// Cache to map known v2 cgroup controllerPaths to pathListWithTime.
v2ControllerPathCache pathCache
}

// ReaderOptions holds options for NewReaderOptions.
Expand Down Expand Up @@ -135,6 +151,7 @@ func NewReaderOptions(opts ReaderOptions) (*Reader, error) {
ignoreRootCgroups: opts.IgnoreRootCgroups,
cgroupsHierarchyOverride: opts.CgroupsHierarchyOverride,
cgroupMountpoints: mountpoints,
v2ControllerPathCache: pathCache{cache: make(map[string]pathListWithTime)},
}, nil
}

Expand Down
27 changes: 26 additions & 1 deletion metric/system/cgroup/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"path/filepath"
"strconv"
"strings"
"time"

"github.com/elastic/elastic-agent-libs/logp"
"github.com/elastic/elastic-agent-system-metrics/metric/system/resolve"
Expand Down Expand Up @@ -67,7 +68,7 @@ type PathList struct {

// Flatten combines the V1 and V2 cgroups in cases where we don't need a map with keys
func (pl PathList) Flatten() []ControllerPath {
list := []ControllerPath{}
list := make([]ControllerPath, 0, len(pl.V1)+len(pl.V2))
for _, v1 := range pl.V1 {
list = append(list, v1)
}
Expand Down Expand Up @@ -255,6 +256,7 @@ func (r Reader) ProcessCgroupPaths(pid int) (PathList, error) {
if r.cgroupsHierarchyOverride != "" {
path = r.cgroupsHierarchyOverride
}

// cgroup V2
// cgroup v2 controllers will always start with this string
if strings.Contains(line, "0::/") {
Expand All @@ -275,6 +277,23 @@ the container as /sys/fs/cgroup/unified and start the system module with the hos
controllerPath = r.rootfsMountpoint.ResolveHostFS(filepath.Join("/sys/fs/cgroup/unified", path))
}

// Check if there is an entry for controllerPath already cached.
r.v2ControllerPathCache.Lock()
cacheEntry, ok := r.v2ControllerPathCache.cache[controllerPath]
if ok {
// If the cached entry for controllerPath is not older than 5 minutes,
// return the cached entry.
if time.Since(cacheEntry.added) < 5*time.Minute {
cPaths.V2 = cacheEntry.pathList.V2
r.v2ControllerPathCache.Unlock()
continue
}
}
// Consider the existing entry for controllerPath invalid, as it is
// older than 5 minutes.
delete(r.v2ControllerPathCache.cache, controllerPath)
r.v2ControllerPathCache.Unlock()

cgpaths, err := os.ReadDir(controllerPath)
if err != nil {
return cPaths, fmt.Errorf("error fetching cgroupV2 controllers for cgroup location '%s' and path line '%s': %w", r.cgroupMountpoints.V2Loc, line, err)
Expand All @@ -287,6 +306,12 @@ the container as /sys/fs/cgroup/unified and start the system module with the hos
cPaths.V2[controllerName] = ControllerPath{ControllerPath: path, FullPath: controllerPath, IsV2: true}
}
}
r.v2ControllerPathCache.Lock()
r.v2ControllerPathCache.cache[controllerPath] = pathListWithTime{
added: time.Now(),
pathList: cPaths,
}
r.v2ControllerPathCache.Unlock()
// cgroup v1
} else {
subsystems := strings.Split(fields[1], ",")
Expand Down
4 changes: 2 additions & 2 deletions metric/system/process/process_darwin.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ func (procStats *Stats) FetchPids() (ProcsMap, []ProcState, error) {

bbuf := bytes.NewBuffer(buf)

procMap := make(ProcsMap, 0)
var plist []ProcState
procMap := make(ProcsMap, len(names))
plist := make([]ProcState, 0, len(names))

for i := 0; i < num; i++ {
if err := binary.Read(bbuf, binary.LittleEndian, &pid); err != nil {
Expand Down
4 changes: 2 additions & 2 deletions metric/system/process/process_linux_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ func (procStats *Stats) FetchPids() (ProcsMap, []ProcState, error) {
return nil, nil, fmt.Errorf("error reading directory names: %w", err)
}

procMap := make(ProcsMap)
var plist []ProcState
procMap := make(ProcsMap, len(names))
plist := make([]ProcState, 0, len(names))

// Iterate over the directory, fetch just enough info so we can filter based on user input.
logger := logp.L()
Expand Down
5 changes: 3 additions & 2 deletions metric/system/process/process_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,9 @@ func (procStats *Stats) FetchPids() (ProcsMap, []ProcState, error) {
return nil, nil, fmt.Errorf("EnumProcesses failed: %w", err)
}

procMap := make(ProcsMap, 0)
var plist []ProcState
procMap := make(ProcsMap, len(names))
plist := make([]ProcState, 0, len(names))

// This is probably the only implementation that doesn't benefit from our
// little fillPid callback system. We'll need to iterate over everything
// manually.
Expand Down

0 comments on commit 7e588f5

Please sign in to comment.