From 596a871931f1bd8692bc44db335920391a981550 Mon Sep 17 00:00:00 2001 From: Robin Lu <robin.lu@bytedance.com> Date: Fri, 8 Nov 2024 18:28:55 +0800 Subject: [PATCH] feat(qrm): adding support to cpu.weight Signed-off-by: Robin Lu <robin.lu@bytedance.com> --- .../app/options/qrm/cpu_plugin.go | 16 ++ pkg/agent/qrm-plugins/cpu/consts/consts.go | 1 + .../qrm-plugins/cpu/dynamicpolicy/policy.go | 12 ++ .../cpu/handlers/cpuweight/const.go | 21 ++ .../cpu/handlers/cpuweight/cpuweight_linux.go | 99 +++++++++ .../cpuweight/cpuweight_linux_test.go | 189 ++++++++++++++++++ .../cpuweight/cpuweight_unsupported.go | 31 +++ pkg/config/agent/qrm/cpu_plugin.go | 6 + 8 files changed, 375 insertions(+) create mode 100644 pkg/agent/qrm-plugins/cpu/handlers/cpuweight/const.go create mode 100644 pkg/agent/qrm-plugins/cpu/handlers/cpuweight/cpuweight_linux.go create mode 100644 pkg/agent/qrm-plugins/cpu/handlers/cpuweight/cpuweight_linux_test.go create mode 100644 pkg/agent/qrm-plugins/cpu/handlers/cpuweight/cpuweight_unsupported.go diff --git a/cmd/katalyst-agent/app/options/qrm/cpu_plugin.go b/cmd/katalyst-agent/app/options/qrm/cpu_plugin.go index f07503cff2..b9bc2c46a6 100644 --- a/cmd/katalyst-agent/app/options/qrm/cpu_plugin.go +++ b/cmd/katalyst-agent/app/options/qrm/cpu_plugin.go @@ -31,6 +31,7 @@ type CPUOptions struct { CPUDynamicPolicyOptions CPUNativePolicyOptions + CPUWeightOptions } type CPUDynamicPolicyOptions struct { @@ -48,6 +49,11 @@ type CPUNativePolicyOptions struct { CPUAllocationOption string } +type CPUWeightOptions struct { + EnableSettingCPUWeight bool + CPUWeightConfigFile string +} + func NewCPUOptions() *CPUOptions { return &CPUOptions{ PolicyName: "dynamic", @@ -70,6 +76,10 @@ func NewCPUOptions() *CPUOptions { EnableFullPhysicalCPUsOnly: false, CPUAllocationOption: "packed", }, + CPUWeightOptions: CPUWeightOptions{ + EnableSettingCPUWeight: false, + CPUWeightConfigFile: "", + }, } } @@ -103,6 +113,10 @@ func (o *CPUOptions) AddFlags(fss *cliflag.NamedFlagSets) { fs.BoolVar(&o.EnableFullPhysicalCPUsOnly, "enable-full-physical-cpus-only", o.EnableFullPhysicalCPUsOnly, "if set true, we will enable extra allocation restrictions to "+ "avoid different containers to possibly end up on the same core.") + fs.BoolVar(&o.EnableSettingCPUWeight, "enable-setting-cpu-weight", + o.EnableSettingCPUWeight, "if set true, we will enable cpu weight related features") + fs.StringVar(&o.CPUWeightConfigFile, "qrm-cpu-weight-config-file", + o.CPUWeightConfigFile, "cpu weight config file") } func (o *CPUOptions) ApplyTo(conf *qrmconfig.CPUQRMPluginConfig) error { @@ -118,5 +132,7 @@ func (o *CPUOptions) ApplyTo(conf *qrmconfig.CPUQRMPluginConfig) error { conf.CPUAllocationOption = o.CPUAllocationOption conf.CPUNUMAHintPreferPolicy = o.CPUNUMAHintPreferPolicy conf.CPUNUMAHintPreferLowThreshold = o.CPUNUMAHintPreferLowThreshold + conf.EnableSettingCPUWeight = o.EnableSettingCPUWeight + conf.CPUWeightConfigFile = o.CPUWeightConfigFile return nil } diff --git a/pkg/agent/qrm-plugins/cpu/consts/consts.go b/pkg/agent/qrm-plugins/cpu/consts/consts.go index ba2bfb0f51..7edb737ab4 100644 --- a/pkg/agent/qrm-plugins/cpu/consts/consts.go +++ b/pkg/agent/qrm-plugins/cpu/consts/consts.go @@ -32,6 +32,7 @@ const ( CheckCPUSet = CPUPluginDynamicPolicyName + "_check_cpuset" SyncCPUIdle = CPUPluginDynamicPolicyName + "_sync_cpu_idle" CommunicateWithAdvisor = CPUPluginDynamicPolicyName + "_communicate_with_advisor" + SyncCPUWeight = CPUPluginDynamicPolicyName + "_sync_cpu_weight" ) const ( diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go index 4458f4712b..6657131492 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go @@ -42,6 +42,7 @@ import ( "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/validator" + "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/handlers/cpuweight" cpuutil "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/util" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util" "github.com/kubewharf/katalyst-core/pkg/agent/utilcomponent/periodicalhandler" @@ -133,6 +134,7 @@ type DynamicPolicy struct { transitionPeriod time.Duration cpuNUMAHintPreferPolicy string cpuNUMAHintPreferLowThreshold float64 + enableSettingCPUWeight bool } func NewDynamicPolicy(agentCtx *agent.GenericContext, conf *config.Configuration, @@ -197,6 +199,7 @@ func NewDynamicPolicy(agentCtx *agent.GenericContext, conf *config.Configuration enableCPUAdvisor: conf.CPUQRMPluginConfig.EnableCPUAdvisor, cpuNUMAHintPreferPolicy: conf.CPUQRMPluginConfig.CPUNUMAHintPreferPolicy, cpuNUMAHintPreferLowThreshold: conf.CPUQRMPluginConfig.CPUNUMAHintPreferLowThreshold, + enableSettingCPUWeight: conf.EnableSettingCPUWeight, reservedCPUs: reservedCPUs, extraStateFileAbsPath: conf.ExtraStateFileAbsPath, enableSyncingCPUIdle: conf.CPUQRMPluginConfig.EnableSyncingCPUIdle, @@ -332,6 +335,15 @@ func (p *DynamicPolicy) Start() (err error) { p.cpuAdvisorSocketAbsPath, p.cpuPluginSocketAbsPath) } + if p.enableSettingCPUWeight { + general.Infof("setCPUWeight enabled") + err = periodicalhandler.RegisterPeriodicalHandlerWithHealthz(cpuconsts.SyncCPUWeight, general.HealthzCheckStateNotReady, + qrm.QRMCPUPluginPeriodicalHandlerGroupName, cpuweight.SetCPUWeight, 1800*time.Second, healthCheckTolerationTimes) + if err != nil { + general.Errorf("start %v failed,err:%v", cpuconsts.SyncCPUWeight, err) + } + } + general.Infof("start dynamic policy cpu plugin with sys-advisor") err = p.initAdvisorClientConn() if err != nil { diff --git a/pkg/agent/qrm-plugins/cpu/handlers/cpuweight/const.go b/pkg/agent/qrm-plugins/cpu/handlers/cpuweight/const.go new file mode 100644 index 0000000000..5f8730da28 --- /dev/null +++ b/pkg/agent/qrm-plugins/cpu/handlers/cpuweight/const.go @@ -0,0 +1,21 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cpuweight + +const ( + metricNameCPUWeight = "async_handler_cpu_weight" +) diff --git a/pkg/agent/qrm-plugins/cpu/handlers/cpuweight/cpuweight_linux.go b/pkg/agent/qrm-plugins/cpu/handlers/cpuweight/cpuweight_linux.go new file mode 100644 index 0000000000..7bd7f79f8d --- /dev/null +++ b/pkg/agent/qrm-plugins/cpu/handlers/cpuweight/cpuweight_linux.go @@ -0,0 +1,99 @@ +//go:build linux +// +build linux + +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cpuweight + +import ( + "k8s.io/apimachinery/pkg/util/errors" + + cpuconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/consts" + coreconfig "github.com/kubewharf/katalyst-core/pkg/config" + dynamicconfig "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic" + "github.com/kubewharf/katalyst-core/pkg/metaserver" + "github.com/kubewharf/katalyst-core/pkg/metrics" + cgroupcm "github.com/kubewharf/katalyst-core/pkg/util/cgroup/common" + cgroupmgr "github.com/kubewharf/katalyst-core/pkg/util/cgroup/manager" + "github.com/kubewharf/katalyst-core/pkg/util/general" +) + +func applyCPUWeightCgroupLevelConfig(conf *coreconfig.Configuration, emitter metrics.MetricEmitter) { + if conf.CPUWeightConfigFile == "" { + general.Errorf("CPUWeightConfigFile isn't configured") + return + } + + cpuWightCgroupLevelConfigs := make(map[string]uint64) + err := general.LoadJsonConfig(conf.CPUWeightConfigFile, &cpuWightCgroupLevelConfigs) + if err != nil { + general.Errorf("load CPUWeightCgroupLevelConfig failed with error: %v", err) + return + } + + for relativeCgPath, weight := range cpuWightCgroupLevelConfigs { + err := cgroupmgr.ApplyCPUWithRelativePath(relativeCgPath, &cgroupcm.CPUData{ + Shares: weight, + }) + if err != nil { + general.Errorf("ApplyCPUWeightWithRelativePath in relativeCgPath: %s failed with error: %v", + relativeCgPath, err) + } else { + general.Infof("ApplyCPUWeightWithRelativePath weight: %d in relativeCgPath: %s successfully", + weight, relativeCgPath) + _ = emitter.StoreInt64(metricNameCPUWeight, int64(weight), metrics.MetricTypeNameRaw, + metrics.ConvertMapToTags(map[string]string{ + "cgPath": relativeCgPath, + })...) + + } + } +} + +func SetCPUWeight(conf *coreconfig.Configuration, + _ interface{}, _ *dynamicconfig.DynamicAgentConfiguration, + emitter metrics.MetricEmitter, metaServer *metaserver.MetaServer, +) { + general.Infof("called") + + var errList []error + defer func() { + _ = general.UpdateHealthzStateByError(cpuconsts.SyncCPUWeight, errors.NewAggregate(errList)) + }() + + if conf == nil { + general.Errorf("nil extraConf") + return + } else if emitter == nil { + general.Errorf("nil emitter") + return + } else if metaServer == nil { + general.Errorf("nil metaServer") + return + } + + // SettingCPUWeight featuregate. + if !conf.EnableSettingCPUWeight { + general.Infof("SetCPUWeight disabled") + return + } + + // checking cgroup-level cpu.weight configuration. + if len(conf.CPUWeightConfigFile) > 0 { + applyCPUWeightCgroupLevelConfig(conf, emitter) + } +} diff --git a/pkg/agent/qrm-plugins/cpu/handlers/cpuweight/cpuweight_linux_test.go b/pkg/agent/qrm-plugins/cpu/handlers/cpuweight/cpuweight_linux_test.go new file mode 100644 index 0000000000..8879fbf459 --- /dev/null +++ b/pkg/agent/qrm-plugins/cpu/handlers/cpuweight/cpuweight_linux_test.go @@ -0,0 +1,189 @@ +//go:build linux +// +build linux + +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cpuweight + +import ( + "testing" + + "github.com/stretchr/testify/assert" + v1 "k8s.io/api/core/v1" + + coreconfig "github.com/kubewharf/katalyst-core/pkg/config" + "github.com/kubewharf/katalyst-core/pkg/config/agent" + configagent "github.com/kubewharf/katalyst-core/pkg/config/agent" + dynamicconfig "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic" + "github.com/kubewharf/katalyst-core/pkg/config/agent/qrm" + "github.com/kubewharf/katalyst-core/pkg/metaserver" + metaagent "github.com/kubewharf/katalyst-core/pkg/metaserver/agent" + "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/metric" + "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/pod" + "github.com/kubewharf/katalyst-core/pkg/metrics" + "github.com/kubewharf/katalyst-core/pkg/util/machine" +) + +func makeMetaServer() (*metaserver.MetaServer, error) { + server := &metaserver.MetaServer{ + MetaAgent: &metaagent.MetaAgent{}, + } + + cpuTopology, err := machine.GenerateDummyCPUTopology(16, 1, 2) + if err != nil { + return nil, err + } + + server.KatalystMachineInfo = &machine.KatalystMachineInfo{ + CPUTopology: cpuTopology, + } + server.MetricsFetcher = metric.NewFakeMetricsFetcher(metrics.DummyMetrics{}) + return server, nil +} + +func TestSetCPUWeight(t *testing.T) { + t.Parallel() + + SetCPUWeight(&coreconfig.Configuration{ + AgentConfiguration: &agent.AgentConfiguration{ + StaticAgentConfiguration: &configagent.StaticAgentConfiguration{ + QRMPluginsConfiguration: &qrm.QRMPluginsConfiguration{ + CPUQRMPluginConfig: &qrm.CPUQRMPluginConfig{ + CPUWeightOptions: qrm.CPUWeightOptions{ + EnableSettingCPUWeight: false, + }, + }, + }, + }, + }, + }, nil, &dynamicconfig.DynamicAgentConfiguration{}, nil, nil) + + SetCPUWeight(&coreconfig.Configuration{ + AgentConfiguration: &agent.AgentConfiguration{ + StaticAgentConfiguration: &configagent.StaticAgentConfiguration{ + QRMPluginsConfiguration: &qrm.QRMPluginsConfiguration{ + CPUQRMPluginConfig: &qrm.CPUQRMPluginConfig{ + CPUWeightOptions: qrm.CPUWeightOptions{ + EnableSettingCPUWeight: true, + }, + }, + }, + }, + }, + }, nil, &dynamicconfig.DynamicAgentConfiguration{}, nil, nil) + + metaServer, err := makeMetaServer() + assert.NoError(t, err) + metaServer.PodFetcher = &pod.PodFetcherStub{PodList: []*v1.Pod{}} + SetCPUWeight(&coreconfig.Configuration{ + AgentConfiguration: &agent.AgentConfiguration{ + StaticAgentConfiguration: &configagent.StaticAgentConfiguration{ + QRMPluginsConfiguration: &qrm.QRMPluginsConfiguration{ + CPUQRMPluginConfig: &qrm.CPUQRMPluginConfig{ + CPUWeightOptions: qrm.CPUWeightOptions{ + EnableSettingCPUWeight: true, + }, + }, + }, + }, + }, + }, metrics.DummyMetrics{}, &dynamicconfig.DynamicAgentConfiguration{}, metrics.DummyMetrics{}, metaServer) + + SetCPUWeight(&coreconfig.Configuration{ + AgentConfiguration: &agent.AgentConfiguration{ + StaticAgentConfiguration: &configagent.StaticAgentConfiguration{ + QRMPluginsConfiguration: &qrm.QRMPluginsConfiguration{ + CPUQRMPluginConfig: &qrm.CPUQRMPluginConfig{ + CPUWeightOptions: qrm.CPUWeightOptions{ + EnableSettingCPUWeight: true, + }, + }, + }, + }, + }, + }, nil, &dynamicconfig.DynamicAgentConfiguration{}, metrics.DummyMetrics{}, metaServer) + + /* + applyCPUWeightCgroupLevelConfig(&coreconfig.Configuration{ + AgentConfiguration: &agent.AgentConfiguration{ + StaticAgentConfiguration: &configagent.StaticAgentConfiguration{ + QRMPluginsConfiguration: &qrm.QRMPluginsConfiguration{ + CPUQRMPluginConfig: &qrm.CPUQRMPluginConfig{ + CPUWeightOptions: qrm.CPUWeightOptions{ + EnableSettingCPUWeight: true, + }, + }, + }, + }, + }, + }, metrics.DummyMetrics{}) + + applyCPUWeightCgroupLevelConfig(&coreconfig.Configuration{ + AgentConfiguration: &agent.AgentConfiguration{ + StaticAgentConfiguration: &configagent.StaticAgentConfiguration{ + QRMPluginsConfiguration: &qrm.QRMPluginsConfiguration{ + CPUQRMPluginConfig: &qrm.CPUQRMPluginConfig{ + CPUWeightOptions: qrm.CPUWeightOptions{ + EnableSettingCPUWeight: true, + CPUWeightConfigFile: "fake", + }, + }, + }, + }, + }, + }, metrics.DummyMetrics{}) + + // Create a temporary file + tempFile, err := ioutil.TempFile("", "cpuweight.json") + if err != nil { + fmt.Println("Error creating temporary file:", err) + return + } + defer os.Remove(tempFile.Name()) // Defer removing the temporary file + + // Write the JSON content to the temporary file + jsonContent := `{ + "fake": 200, + "fake2": 400 + }` + + if _, err := tempFile.WriteString(jsonContent); err != nil { + fmt.Println("Error writing to temporary file:", err) + return + } + absPathCgroup, err := filepath.Abs(tempFile.Name()) + if err != nil { + fmt.Println("Error obtaining absolute path:", err) + return + } + + applyCPUWeightCgroupLevelConfig(&coreconfig.Configuration{ + AgentConfiguration: &agent.AgentConfiguration{ + StaticAgentConfiguration: &configagent.StaticAgentConfiguration{ + QRMPluginsConfiguration: &qrm.QRMPluginsConfiguration{ + CPUQRMPluginConfig: &qrm.CPUQRMPluginConfig{ + CPUWeightOptions: qrm.CPUWeightOptions{ + EnableSettingCPUWeight: true, + CPUWeightConfigFile: absPathCgroup, + }, + }, + }, + }, + }, + }, metrics.DummyMetrics{}) + */ +} diff --git a/pkg/agent/qrm-plugins/cpu/handlers/cpuweight/cpuweight_unsupported.go b/pkg/agent/qrm-plugins/cpu/handlers/cpuweight/cpuweight_unsupported.go new file mode 100644 index 0000000000..62a5ad8bf3 --- /dev/null +++ b/pkg/agent/qrm-plugins/cpu/handlers/cpuweight/cpuweight_unsupported.go @@ -0,0 +1,31 @@ +//go:build !linux + +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cpuweight + +import ( + coreconfig "github.com/kubewharf/katalyst-core/pkg/config" + dynamicconfig "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic" + "github.com/kubewharf/katalyst-core/pkg/metaserver" + "github.com/kubewharf/katalyst-core/pkg/metrics" +) + +func SetCpuWeight(conf *coreconfig.Configuration, + _ interface{}, _ *dynamicconfig.DynamicAgentConfiguration, + emitter metrics.MetricEmitter, metaServer *metaserver.MetaServer) { +} diff --git a/pkg/config/agent/qrm/cpu_plugin.go b/pkg/config/agent/qrm/cpu_plugin.go index 64c9be495a..868c704adc 100644 --- a/pkg/config/agent/qrm/cpu_plugin.go +++ b/pkg/config/agent/qrm/cpu_plugin.go @@ -26,6 +26,7 @@ type CPUQRMPluginConfig struct { CPUDynamicPolicyConfig CPUNativePolicyConfig + CPUWeightOptions } type CPUDynamicPolicyConfig struct { @@ -55,6 +56,11 @@ type CPUNativePolicyConfig struct { CPUAllocationOption string } +type CPUWeightOptions struct { + EnableSettingCPUWeight bool + CPUWeightConfigFile string +} + func NewCPUQRMPluginConfig() *CPUQRMPluginConfig { return &CPUQRMPluginConfig{ CPUDynamicPolicyConfig: CPUDynamicPolicyConfig{},