Skip to content

Commit

Permalink
[fix]: add test case with darwin OS (sustainable-computing-io#1856)
Browse files Browse the repository at this point in the history
* [fix]: add test case with darwin OS

Signed-off-by: Sam Yuan <[email protected]>

* [fix]: use same function naming conventions and behavior

Signed-off-by: Sam Yuan <[email protected]>

* [fix]: update with review comments

Signed-off-by: Sam Yuan <[email protected]>

* [fix]: update contributing.md as PR review comments

Signed-off-by: Sam Yuan <[email protected]>

* [fix]: remove unused package

Signed-off-by: Sam Yuan <[email protected]>

---------

Signed-off-by: Sam Yuan <[email protected]>
  • Loading branch information
SamYuan1990 authored Nov 29, 2024
1 parent abe3eb9 commit e45f6c4
Show file tree
Hide file tree
Showing 25 changed files with 520 additions and 42 deletions.
9 changes: 9 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,12 @@ Doc: update Developer.md

We enabled [stale bot](https://github.com/probot/stale) for house keeping. An
Issue or Pull Request becomes stale if no any inactivity for 60 days.

## For Mac and Windows user

kepler currently focus on linux platform.
for other platforms, to make kepler is easy for anyone contributes from
any platform, we are welcome any benefits(PRs) for kepler including
parts as compilable on other platform.
before the specific platform is supported, we just running CI on linux
as PR merge standard and official support.
2 changes: 1 addition & 1 deletion cmd/exporter/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ func main() {
platform.InitPowerImpl()
defer platform.StopPower()

if config.EnabledGPU() {
if config.IsGPUEnabled() {
r := accelerator.GetRegistry()
if a, err := accelerator.New(config.GPU, true); err == nil {
r.MustRegister(a) // Register the accelerator with the registry
Expand Down
3 changes: 3 additions & 0 deletions pkg/bpf/exporter.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
//go:build !darwin
// +build !darwin

/*
Copyright 2021.
Expand Down
293 changes: 293 additions & 0 deletions pkg/bpf/fake_mac.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,293 @@
//go:build darwin
// +build darwin

/*
Copyright 2021.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package bpf

import (
"errors"
"fmt"
"os"
"runtime"
"time"

"github.com/cilium/ebpf"
"github.com/cilium/ebpf/link"
"github.com/cilium/ebpf/rlimit"
"github.com/jaypipes/ghw"
"github.com/sustainable-computing-io/kepler/pkg/config"
"golang.org/x/sys/unix"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/klog/v2"
)

type exporter struct {
bpfObjects keplerObjects

schedSwitchLink link.Link
irqLink link.Link
pageWriteLink link.Link
pageReadLink link.Link

perfEvents *hardwarePerfEvents

enabledHardwareCounters sets.Set[string]
enabledSoftwareCounters sets.Set[string]
}

func NewExporter() (Exporter, error) {
e := &exporter{
enabledHardwareCounters: sets.New[string](config.BPFHwCounters()...),
enabledSoftwareCounters: sets.New[string](config.BPFSwCounters()...),
}
err := e.attach()
if err != nil {
e.Detach()
}
return e, err
}

func (e *exporter) SupportedMetrics() SupportedMetrics {
return SupportedMetrics{
HardwareCounters: e.enabledHardwareCounters.Clone(),
SoftwareCounters: e.enabledSoftwareCounters.Clone(),
}
}

func (e *exporter) attach() error {
// Remove resource limits for kernels <5.11.
if err := rlimit.RemoveMemlock(); err != nil {
return fmt.Errorf("error removing memlock: %v", err)
}

// Load eBPF Specs
specs, err := loadKepler()
if err != nil {
return fmt.Errorf("error loading eBPF specs: %v", err)
}

// Adjust map sizes to the number of available CPUs
numCPU := getCPUCores()
klog.Infof("Number of CPUs: %d", numCPU)
for _, m := range specs.Maps {
// Only resize maps that have a MaxEntries of NUM_CPUS constant
if m.MaxEntries == 128 {
m.MaxEntries = uint32(numCPU)
}
}

// Set program global variables
err = specs.RewriteConstants(map[string]interface{}{
"SAMPLE_RATE": int32(config.GetBPFSampleRate()),
})
if err != nil {
return fmt.Errorf("error rewriting program constants: %v", err)
}

// Load the eBPF program(s)
if err := specs.LoadAndAssign(&e.bpfObjects, nil); err != nil {
return fmt.Errorf("error loading eBPF objects: %v", err)
}

// Attach the eBPF program(s)
e.schedSwitchLink, err = link.AttachTracing(link.TracingOptions{
Program: e.bpfObjects.KeplerSchedSwitchTrace,
AttachType: ebpf.AttachTraceRawTp,
})
if err != nil {
return fmt.Errorf("error attaching sched_switch tracepoint: %v", err)
}

if config.ExposeIRQCounterMetrics() {
e.irqLink, err = link.AttachTracing(link.TracingOptions{
Program: e.bpfObjects.KeplerIrqTrace,
AttachType: ebpf.AttachTraceRawTp,
})
if err != nil {
return fmt.Errorf("could not attach irq/softirq_entry: %w", err)
}
}

group := "writeback"
name := "writeback_dirty_page"
if _, err := os.Stat("/sys/kernel/debug/tracing/events/writeback/writeback_dirty_folio"); err == nil {
name = "writeback_dirty_folio"
}
e.pageWriteLink, err = link.Tracepoint(group, name, e.bpfObjects.KeplerWritePageTrace, nil)
if err != nil {
klog.Warningf("failed to attach tp/%s/%s: %v. Kepler will not collect page cache write events. This will affect the DRAM power model estimation on VMs.", group, name, err)
} else {
e.enabledSoftwareCounters[config.PageCacheHit] = struct{}{}
}

e.pageReadLink, err = link.AttachTracing(link.TracingOptions{
Program: e.bpfObjects.KeplerReadPageTrace,
AttachType: ebpf.AttachTraceFEntry,
})
if err != nil {
klog.Warningf("failed to attach fentry/mark_page_accessed: %v. Kepler will not collect page cache read events. This will affect the DRAM power model estimation on VMs.", err)
}

// Return early if hardware counters are not enabled
if !config.ExposeHardwareCounterMetrics() {
klog.Infof("Hardware counter metrics are disabled")
return nil
}

e.perfEvents, err = createHardwarePerfEvents(
e.bpfObjects.CpuInstructionsEventReader,
e.bpfObjects.CpuCyclesEventReader,
e.bpfObjects.CacheMissEventReader,
numCPU,
)
if err != nil {
return nil
}

return nil
}

func (e *exporter) Detach() {
// Links
if e.schedSwitchLink != nil {
e.schedSwitchLink.Close()
e.schedSwitchLink = nil
}

if e.irqLink != nil {
e.irqLink.Close()
e.irqLink = nil
}

if e.pageWriteLink != nil {
e.pageWriteLink.Close()
e.pageWriteLink = nil
}

if e.pageReadLink != nil {
e.pageReadLink.Close()
e.pageReadLink = nil
}

// Perf events
e.perfEvents.close()
e.perfEvents = nil

// Objects
e.bpfObjects.Close()
}

func (e *exporter) CollectProcesses() ([]ProcessMetrics, error) {
start := time.Now()
// Get the max number of entries in the map
maxEntries := e.bpfObjects.Processes.MaxEntries()
total := 0
deleteKeys := make([]uint32, maxEntries)
deleteValues := make([]ProcessMetrics, maxEntries)
var cursor ebpf.MapBatchCursor
for {
count, err := e.bpfObjects.Processes.BatchLookupAndDelete(
&cursor,
deleteKeys,
deleteValues,
&ebpf.BatchOptions{},
)
total += count
if errors.Is(err, ebpf.ErrKeyNotExist) {
break
}
if err != nil {
return nil, fmt.Errorf("failed to batch lookup and delete: %v", err)
}
}
klog.V(5).Infof("collected %d process samples in %v", total, time.Since(start))
return deleteValues[:total], nil
}

///////////////////////////////////////////////////////////////////////////
// utility functions

func unixOpenPerfEvent(typ, conf, cpuCores int) ([]int, error) {
return []int{}, nil
}

func unixClosePerfEvents(fds []int) {
for _, fd := range fds {
_ = unix.SetNonblock(fd, true)
unix.Close(fd)
}
}

func getCPUCores() int {
cores := runtime.NumCPU()
if cpu, err := ghw.CPU(); err == nil {
// we need to get the number of all CPUs,
// so if /proc/cpuinfo is available, we can get the number of all CPUs
cores = int(cpu.TotalThreads)
}
return cores
}

type hardwarePerfEvents struct {
cpuCyclesPerfEvents []int
cpuInstructionsPerfEvents []int
cacheMissPerfEvents []int
}

func (h *hardwarePerfEvents) close() {
unixClosePerfEvents(h.cpuCyclesPerfEvents)
unixClosePerfEvents(h.cpuInstructionsPerfEvents)
unixClosePerfEvents(h.cacheMissPerfEvents)
}

// CreateHardwarePerfEvents creates perf events for CPU cycles, CPU instructions, and cache misses
// and updates the corresponding eBPF maps.
func createHardwarePerfEvents(cpuInstructionsMap, cpuCyclesMap, cacheMissMap *ebpf.Map, numCPU int) (*hardwarePerfEvents, error) {
var err error
events := &hardwarePerfEvents{
cpuCyclesPerfEvents: []int{},
cpuInstructionsPerfEvents: []int{},
cacheMissPerfEvents: []int{},
}
defer func() {
if err != nil {
unixClosePerfEvents(events.cpuCyclesPerfEvents)
unixClosePerfEvents(events.cpuInstructionsPerfEvents)
unixClosePerfEvents(events.cacheMissPerfEvents)
}
}()
for i, fd := range events.cpuCyclesPerfEvents {
if err = cpuCyclesMap.Update(uint32(i), uint32(fd), ebpf.UpdateAny); err != nil {
klog.Warningf("Failed to update cpu_cycles_event_reader map: %v", err)
return nil, err
}
}
for i, fd := range events.cpuInstructionsPerfEvents {
if err = cpuInstructionsMap.Update(uint32(i), uint32(fd), ebpf.UpdateAny); err != nil {
klog.Warningf("Failed to update cpu_instructions_event_reader map: %v", err)
return nil, err
}
}
for i, fd := range events.cacheMissPerfEvents {
if err = cacheMissMap.Update(uint32(i), uint32(fd), ebpf.UpdateAny); err != nil {
klog.Warningf("Failed to update cache_miss_event_reader map: %v", err)
return nil, err
}
}
return events, nil
}
3 changes: 3 additions & 0 deletions pkg/bpftest/bpf_suite_test.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
//go:build !darwin
// +build !darwin

package bpftest

import (
Expand Down
2 changes: 1 addition & 1 deletion pkg/collector/energy/node_energy_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ func UpdateNodeComponentsEnergy(nodeStats *stats.NodeStats, wg *sync.WaitGroup)
// UpdateNodeGPUEnergy updates each GPU power consumption. Right now we don't support other types of accelerators
func UpdateNodeGPUEnergy(nodeStats *stats.NodeStats, wg *sync.WaitGroup) {
defer wg.Done()
if config.EnabledGPU() {
if config.IsGPUEnabled() {
if gpu := acc.GetActiveAcceleratorByType(config.GPU); gpu != nil {
gpuEnergy := gpu.Device().AbsEnergyFromDevice()
for gpu, energy := range gpuEnergy {
Expand Down
2 changes: 1 addition & 1 deletion pkg/collector/metric_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ func (c *Collector) updateProcessResourceUtilizationMetrics(wg *sync.WaitGroup)
// update process metrics regarding the resource utilization to be used to calculate the energy consumption
// we first updates the bpf which is responsible to include new processes in the ProcessStats collection
resourceBpf.UpdateProcessBPFMetrics(c.bpfExporter, c.ProcessStats)
if config.EnabledGPU() {
if config.IsGPUEnabled() {
if acc.GetActiveAcceleratorByType(config.GPU) != nil {
accelerator.UpdateProcessGPUUtilizationMetrics(c.ProcessStats)
}
Expand Down
13 changes: 13 additions & 0 deletions pkg/collector/resourceutilization/bpf/fake_mac.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
//go:build darwin
// +build darwin

package bpf

import (
"github.com/sustainable-computing-io/kepler/pkg/bpf"
"github.com/sustainable-computing-io/kepler/pkg/collector/stats"
)

func UpdateProcessBPFMetrics(bpfExporter bpf.Exporter, processStats map[uint64]*stats.ProcessStats) {

}
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
//go:build !darwin
// +build !darwin

/*
Copyright 2021.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
//go:build !darwin
// +build !darwin

package bpf

import (
Expand Down
2 changes: 1 addition & 1 deletion pkg/collector/stats/node_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ func (ne *NodeStats) ResetDeltaValues() {

func (ne *NodeStats) UpdateIdleEnergyWithMinValue(isComponentsSystemCollectionSupported bool) {
// gpu metric
if config.EnabledGPU() {
if config.IsGPUEnabled() {
if acc.GetActiveAcceleratorByType(config.GPU) != nil {
ne.CalcIdleEnergy(config.AbsEnergyInGPU, config.IdleEnergyInGPU, config.GPUComputeUtilization)
}
Expand Down
Loading

0 comments on commit e45f6c4

Please sign in to comment.