From 596a871931f1bd8692bc44db335920391a981550 Mon Sep 17 00:00:00 2001
From: Robin Lu <robin.lu@bytedance.com>
Date: Fri, 8 Nov 2024 18:28:55 +0800
Subject: [PATCH] feat(qrm): adding support to cpu.weight

Signed-off-by: Robin Lu <robin.lu@bytedance.com>
---
 .../app/options/qrm/cpu_plugin.go             |  16 ++
 pkg/agent/qrm-plugins/cpu/consts/consts.go    |   1 +
 .../qrm-plugins/cpu/dynamicpolicy/policy.go   |  12 ++
 .../cpu/handlers/cpuweight/const.go           |  21 ++
 .../cpu/handlers/cpuweight/cpuweight_linux.go |  99 +++++++++
 .../cpuweight/cpuweight_linux_test.go         | 189 ++++++++++++++++++
 .../cpuweight/cpuweight_unsupported.go        |  31 +++
 pkg/config/agent/qrm/cpu_plugin.go            |   6 +
 8 files changed, 375 insertions(+)
 create mode 100644 pkg/agent/qrm-plugins/cpu/handlers/cpuweight/const.go
 create mode 100644 pkg/agent/qrm-plugins/cpu/handlers/cpuweight/cpuweight_linux.go
 create mode 100644 pkg/agent/qrm-plugins/cpu/handlers/cpuweight/cpuweight_linux_test.go
 create mode 100644 pkg/agent/qrm-plugins/cpu/handlers/cpuweight/cpuweight_unsupported.go

diff --git a/cmd/katalyst-agent/app/options/qrm/cpu_plugin.go b/cmd/katalyst-agent/app/options/qrm/cpu_plugin.go
index f07503cff2..b9bc2c46a6 100644
--- a/cmd/katalyst-agent/app/options/qrm/cpu_plugin.go
+++ b/cmd/katalyst-agent/app/options/qrm/cpu_plugin.go
@@ -31,6 +31,7 @@ type CPUOptions struct {
 
 	CPUDynamicPolicyOptions
 	CPUNativePolicyOptions
+	CPUWeightOptions
 }
 
 type CPUDynamicPolicyOptions struct {
@@ -48,6 +49,11 @@ type CPUNativePolicyOptions struct {
 	CPUAllocationOption        string
 }
 
+type CPUWeightOptions struct {
+	EnableSettingCPUWeight bool
+	CPUWeightConfigFile    string
+}
+
 func NewCPUOptions() *CPUOptions {
 	return &CPUOptions{
 		PolicyName:             "dynamic",
@@ -70,6 +76,10 @@ func NewCPUOptions() *CPUOptions {
 			EnableFullPhysicalCPUsOnly: false,
 			CPUAllocationOption:        "packed",
 		},
+		CPUWeightOptions: CPUWeightOptions{
+			EnableSettingCPUWeight: false,
+			CPUWeightConfigFile:    "",
+		},
 	}
 }
 
@@ -103,6 +113,10 @@ func (o *CPUOptions) AddFlags(fss *cliflag.NamedFlagSets) {
 	fs.BoolVar(&o.EnableFullPhysicalCPUsOnly, "enable-full-physical-cpus-only",
 		o.EnableFullPhysicalCPUsOnly, "if set true, we will enable extra allocation restrictions to "+
 			"avoid different containers to possibly end up on the same core.")
+	fs.BoolVar(&o.EnableSettingCPUWeight, "enable-setting-cpu-weight",
+		o.EnableSettingCPUWeight, "if set true, we will enable cpu weight related features")
+	fs.StringVar(&o.CPUWeightConfigFile, "qrm-cpu-weight-config-file",
+		o.CPUWeightConfigFile, "cpu weight config file")
 }
 
 func (o *CPUOptions) ApplyTo(conf *qrmconfig.CPUQRMPluginConfig) error {
@@ -118,5 +132,7 @@ func (o *CPUOptions) ApplyTo(conf *qrmconfig.CPUQRMPluginConfig) error {
 	conf.CPUAllocationOption = o.CPUAllocationOption
 	conf.CPUNUMAHintPreferPolicy = o.CPUNUMAHintPreferPolicy
 	conf.CPUNUMAHintPreferLowThreshold = o.CPUNUMAHintPreferLowThreshold
+	conf.EnableSettingCPUWeight = o.EnableSettingCPUWeight
+	conf.CPUWeightConfigFile = o.CPUWeightConfigFile
 	return nil
 }
diff --git a/pkg/agent/qrm-plugins/cpu/consts/consts.go b/pkg/agent/qrm-plugins/cpu/consts/consts.go
index ba2bfb0f51..7edb737ab4 100644
--- a/pkg/agent/qrm-plugins/cpu/consts/consts.go
+++ b/pkg/agent/qrm-plugins/cpu/consts/consts.go
@@ -32,6 +32,7 @@ const (
 	CheckCPUSet                = CPUPluginDynamicPolicyName + "_check_cpuset"
 	SyncCPUIdle                = CPUPluginDynamicPolicyName + "_sync_cpu_idle"
 	CommunicateWithAdvisor     = CPUPluginDynamicPolicyName + "_communicate_with_advisor"
+	SyncCPUWeight              = CPUPluginDynamicPolicyName + "_sync_cpu_weight"
 )
 
 const (
diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go
index 4458f4712b..6657131492 100644
--- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go
+++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go
@@ -42,6 +42,7 @@ import (
 	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction"
 	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state"
 	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/validator"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/handlers/cpuweight"
 	cpuutil "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/util"
 	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util"
 	"github.com/kubewharf/katalyst-core/pkg/agent/utilcomponent/periodicalhandler"
@@ -133,6 +134,7 @@ type DynamicPolicy struct {
 	transitionPeriod              time.Duration
 	cpuNUMAHintPreferPolicy       string
 	cpuNUMAHintPreferLowThreshold float64
+	enableSettingCPUWeight        bool
 }
 
 func NewDynamicPolicy(agentCtx *agent.GenericContext, conf *config.Configuration,
@@ -197,6 +199,7 @@ func NewDynamicPolicy(agentCtx *agent.GenericContext, conf *config.Configuration
 		enableCPUAdvisor:              conf.CPUQRMPluginConfig.EnableCPUAdvisor,
 		cpuNUMAHintPreferPolicy:       conf.CPUQRMPluginConfig.CPUNUMAHintPreferPolicy,
 		cpuNUMAHintPreferLowThreshold: conf.CPUQRMPluginConfig.CPUNUMAHintPreferLowThreshold,
+		enableSettingCPUWeight:        conf.EnableSettingCPUWeight,
 		reservedCPUs:                  reservedCPUs,
 		extraStateFileAbsPath:         conf.ExtraStateFileAbsPath,
 		enableSyncingCPUIdle:          conf.CPUQRMPluginConfig.EnableSyncingCPUIdle,
@@ -332,6 +335,15 @@ func (p *DynamicPolicy) Start() (err error) {
 			p.cpuAdvisorSocketAbsPath, p.cpuPluginSocketAbsPath)
 	}
 
+	if p.enableSettingCPUWeight {
+		general.Infof("setCPUWeight enabled")
+		err = periodicalhandler.RegisterPeriodicalHandlerWithHealthz(cpuconsts.SyncCPUWeight, general.HealthzCheckStateNotReady,
+			qrm.QRMCPUPluginPeriodicalHandlerGroupName, cpuweight.SetCPUWeight, 1800*time.Second, healthCheckTolerationTimes)
+		if err != nil {
+			general.Errorf("start %v failed,err:%v", cpuconsts.SyncCPUWeight, err)
+		}
+	}
+
 	general.Infof("start dynamic policy cpu plugin with sys-advisor")
 	err = p.initAdvisorClientConn()
 	if err != nil {
diff --git a/pkg/agent/qrm-plugins/cpu/handlers/cpuweight/const.go b/pkg/agent/qrm-plugins/cpu/handlers/cpuweight/const.go
new file mode 100644
index 0000000000..5f8730da28
--- /dev/null
+++ b/pkg/agent/qrm-plugins/cpu/handlers/cpuweight/const.go
@@ -0,0 +1,21 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package cpuweight
+
+const (
+	metricNameCPUWeight = "async_handler_cpu_weight"
+)
diff --git a/pkg/agent/qrm-plugins/cpu/handlers/cpuweight/cpuweight_linux.go b/pkg/agent/qrm-plugins/cpu/handlers/cpuweight/cpuweight_linux.go
new file mode 100644
index 0000000000..7bd7f79f8d
--- /dev/null
+++ b/pkg/agent/qrm-plugins/cpu/handlers/cpuweight/cpuweight_linux.go
@@ -0,0 +1,99 @@
+//go:build linux
+// +build linux
+
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package cpuweight
+
+import (
+	"k8s.io/apimachinery/pkg/util/errors"
+
+	cpuconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/consts"
+	coreconfig "github.com/kubewharf/katalyst-core/pkg/config"
+	dynamicconfig "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic"
+	"github.com/kubewharf/katalyst-core/pkg/metaserver"
+	"github.com/kubewharf/katalyst-core/pkg/metrics"
+	cgroupcm "github.com/kubewharf/katalyst-core/pkg/util/cgroup/common"
+	cgroupmgr "github.com/kubewharf/katalyst-core/pkg/util/cgroup/manager"
+	"github.com/kubewharf/katalyst-core/pkg/util/general"
+)
+
+func applyCPUWeightCgroupLevelConfig(conf *coreconfig.Configuration, emitter metrics.MetricEmitter) {
+	if conf.CPUWeightConfigFile == "" {
+		general.Errorf("CPUWeightConfigFile isn't configured")
+		return
+	}
+
+	cpuWightCgroupLevelConfigs := make(map[string]uint64)
+	err := general.LoadJsonConfig(conf.CPUWeightConfigFile, &cpuWightCgroupLevelConfigs)
+	if err != nil {
+		general.Errorf("load CPUWeightCgroupLevelConfig failed with error: %v", err)
+		return
+	}
+
+	for relativeCgPath, weight := range cpuWightCgroupLevelConfigs {
+		err := cgroupmgr.ApplyCPUWithRelativePath(relativeCgPath, &cgroupcm.CPUData{
+			Shares: weight,
+		})
+		if err != nil {
+			general.Errorf("ApplyCPUWeightWithRelativePath in relativeCgPath: %s failed with error: %v",
+				relativeCgPath, err)
+		} else {
+			general.Infof("ApplyCPUWeightWithRelativePath weight: %d in relativeCgPath: %s successfully",
+				weight, relativeCgPath)
+			_ = emitter.StoreInt64(metricNameCPUWeight, int64(weight), metrics.MetricTypeNameRaw,
+				metrics.ConvertMapToTags(map[string]string{
+					"cgPath": relativeCgPath,
+				})...)
+
+		}
+	}
+}
+
+func SetCPUWeight(conf *coreconfig.Configuration,
+	_ interface{}, _ *dynamicconfig.DynamicAgentConfiguration,
+	emitter metrics.MetricEmitter, metaServer *metaserver.MetaServer,
+) {
+	general.Infof("called")
+
+	var errList []error
+	defer func() {
+		_ = general.UpdateHealthzStateByError(cpuconsts.SyncCPUWeight, errors.NewAggregate(errList))
+	}()
+
+	if conf == nil {
+		general.Errorf("nil extraConf")
+		return
+	} else if emitter == nil {
+		general.Errorf("nil emitter")
+		return
+	} else if metaServer == nil {
+		general.Errorf("nil metaServer")
+		return
+	}
+
+	// SettingCPUWeight featuregate.
+	if !conf.EnableSettingCPUWeight {
+		general.Infof("SetCPUWeight disabled")
+		return
+	}
+
+	// checking cgroup-level cpu.weight configuration.
+	if len(conf.CPUWeightConfigFile) > 0 {
+		applyCPUWeightCgroupLevelConfig(conf, emitter)
+	}
+}
diff --git a/pkg/agent/qrm-plugins/cpu/handlers/cpuweight/cpuweight_linux_test.go b/pkg/agent/qrm-plugins/cpu/handlers/cpuweight/cpuweight_linux_test.go
new file mode 100644
index 0000000000..8879fbf459
--- /dev/null
+++ b/pkg/agent/qrm-plugins/cpu/handlers/cpuweight/cpuweight_linux_test.go
@@ -0,0 +1,189 @@
+//go:build linux
+// +build linux
+
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package cpuweight
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	v1 "k8s.io/api/core/v1"
+
+	coreconfig "github.com/kubewharf/katalyst-core/pkg/config"
+	"github.com/kubewharf/katalyst-core/pkg/config/agent"
+	configagent "github.com/kubewharf/katalyst-core/pkg/config/agent"
+	dynamicconfig "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic"
+	"github.com/kubewharf/katalyst-core/pkg/config/agent/qrm"
+	"github.com/kubewharf/katalyst-core/pkg/metaserver"
+	metaagent "github.com/kubewharf/katalyst-core/pkg/metaserver/agent"
+	"github.com/kubewharf/katalyst-core/pkg/metaserver/agent/metric"
+	"github.com/kubewharf/katalyst-core/pkg/metaserver/agent/pod"
+	"github.com/kubewharf/katalyst-core/pkg/metrics"
+	"github.com/kubewharf/katalyst-core/pkg/util/machine"
+)
+
+func makeMetaServer() (*metaserver.MetaServer, error) {
+	server := &metaserver.MetaServer{
+		MetaAgent: &metaagent.MetaAgent{},
+	}
+
+	cpuTopology, err := machine.GenerateDummyCPUTopology(16, 1, 2)
+	if err != nil {
+		return nil, err
+	}
+
+	server.KatalystMachineInfo = &machine.KatalystMachineInfo{
+		CPUTopology: cpuTopology,
+	}
+	server.MetricsFetcher = metric.NewFakeMetricsFetcher(metrics.DummyMetrics{})
+	return server, nil
+}
+
+func TestSetCPUWeight(t *testing.T) {
+	t.Parallel()
+
+	SetCPUWeight(&coreconfig.Configuration{
+		AgentConfiguration: &agent.AgentConfiguration{
+			StaticAgentConfiguration: &configagent.StaticAgentConfiguration{
+				QRMPluginsConfiguration: &qrm.QRMPluginsConfiguration{
+					CPUQRMPluginConfig: &qrm.CPUQRMPluginConfig{
+						CPUWeightOptions: qrm.CPUWeightOptions{
+							EnableSettingCPUWeight: false,
+						},
+					},
+				},
+			},
+		},
+	}, nil, &dynamicconfig.DynamicAgentConfiguration{}, nil, nil)
+
+	SetCPUWeight(&coreconfig.Configuration{
+		AgentConfiguration: &agent.AgentConfiguration{
+			StaticAgentConfiguration: &configagent.StaticAgentConfiguration{
+				QRMPluginsConfiguration: &qrm.QRMPluginsConfiguration{
+					CPUQRMPluginConfig: &qrm.CPUQRMPluginConfig{
+						CPUWeightOptions: qrm.CPUWeightOptions{
+							EnableSettingCPUWeight: true,
+						},
+					},
+				},
+			},
+		},
+	}, nil, &dynamicconfig.DynamicAgentConfiguration{}, nil, nil)
+
+	metaServer, err := makeMetaServer()
+	assert.NoError(t, err)
+	metaServer.PodFetcher = &pod.PodFetcherStub{PodList: []*v1.Pod{}}
+	SetCPUWeight(&coreconfig.Configuration{
+		AgentConfiguration: &agent.AgentConfiguration{
+			StaticAgentConfiguration: &configagent.StaticAgentConfiguration{
+				QRMPluginsConfiguration: &qrm.QRMPluginsConfiguration{
+					CPUQRMPluginConfig: &qrm.CPUQRMPluginConfig{
+						CPUWeightOptions: qrm.CPUWeightOptions{
+							EnableSettingCPUWeight: true,
+						},
+					},
+				},
+			},
+		},
+	}, metrics.DummyMetrics{}, &dynamicconfig.DynamicAgentConfiguration{}, metrics.DummyMetrics{}, metaServer)
+
+	SetCPUWeight(&coreconfig.Configuration{
+		AgentConfiguration: &agent.AgentConfiguration{
+			StaticAgentConfiguration: &configagent.StaticAgentConfiguration{
+				QRMPluginsConfiguration: &qrm.QRMPluginsConfiguration{
+					CPUQRMPluginConfig: &qrm.CPUQRMPluginConfig{
+						CPUWeightOptions: qrm.CPUWeightOptions{
+							EnableSettingCPUWeight: true,
+						},
+					},
+				},
+			},
+		},
+	}, nil, &dynamicconfig.DynamicAgentConfiguration{}, metrics.DummyMetrics{}, metaServer)
+
+	/*
+		applyCPUWeightCgroupLevelConfig(&coreconfig.Configuration{
+			AgentConfiguration: &agent.AgentConfiguration{
+				StaticAgentConfiguration: &configagent.StaticAgentConfiguration{
+					QRMPluginsConfiguration: &qrm.QRMPluginsConfiguration{
+						CPUQRMPluginConfig: &qrm.CPUQRMPluginConfig{
+							CPUWeightOptions: qrm.CPUWeightOptions{
+								EnableSettingCPUWeight: true,
+							},
+						},
+					},
+				},
+			},
+		}, metrics.DummyMetrics{})
+
+		applyCPUWeightCgroupLevelConfig(&coreconfig.Configuration{
+			AgentConfiguration: &agent.AgentConfiguration{
+				StaticAgentConfiguration: &configagent.StaticAgentConfiguration{
+					QRMPluginsConfiguration: &qrm.QRMPluginsConfiguration{
+						CPUQRMPluginConfig: &qrm.CPUQRMPluginConfig{
+							CPUWeightOptions: qrm.CPUWeightOptions{
+								EnableSettingCPUWeight: true,
+								CPUWeightConfigFile:    "fake",
+							},
+						},
+					},
+				},
+			},
+		}, metrics.DummyMetrics{})
+
+		// Create a temporary file
+		tempFile, err := ioutil.TempFile("", "cpuweight.json")
+		if err != nil {
+			fmt.Println("Error creating temporary file:", err)
+			return
+		}
+		defer os.Remove(tempFile.Name()) // Defer removing the temporary file
+
+		// Write the JSON content to the temporary file
+		jsonContent := `{
+			    "fake": 200,
+			    "fake2": 400
+			        }`
+
+		if _, err := tempFile.WriteString(jsonContent); err != nil {
+			fmt.Println("Error writing to temporary file:", err)
+			return
+		}
+		absPathCgroup, err := filepath.Abs(tempFile.Name())
+		if err != nil {
+			fmt.Println("Error obtaining absolute path:", err)
+			return
+		}
+
+		applyCPUWeightCgroupLevelConfig(&coreconfig.Configuration{
+			AgentConfiguration: &agent.AgentConfiguration{
+				StaticAgentConfiguration: &configagent.StaticAgentConfiguration{
+					QRMPluginsConfiguration: &qrm.QRMPluginsConfiguration{
+						CPUQRMPluginConfig: &qrm.CPUQRMPluginConfig{
+							CPUWeightOptions: qrm.CPUWeightOptions{
+								EnableSettingCPUWeight: true,
+								CPUWeightConfigFile:    absPathCgroup,
+							},
+						},
+					},
+				},
+			},
+		}, metrics.DummyMetrics{})
+	*/
+}
diff --git a/pkg/agent/qrm-plugins/cpu/handlers/cpuweight/cpuweight_unsupported.go b/pkg/agent/qrm-plugins/cpu/handlers/cpuweight/cpuweight_unsupported.go
new file mode 100644
index 0000000000..62a5ad8bf3
--- /dev/null
+++ b/pkg/agent/qrm-plugins/cpu/handlers/cpuweight/cpuweight_unsupported.go
@@ -0,0 +1,31 @@
+//go:build !linux
+
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package cpuweight
+
+import (
+	coreconfig "github.com/kubewharf/katalyst-core/pkg/config"
+	dynamicconfig "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic"
+	"github.com/kubewharf/katalyst-core/pkg/metaserver"
+	"github.com/kubewharf/katalyst-core/pkg/metrics"
+)
+
+func SetCpuWeight(conf *coreconfig.Configuration,
+	_ interface{}, _ *dynamicconfig.DynamicAgentConfiguration,
+	emitter metrics.MetricEmitter, metaServer *metaserver.MetaServer) {
+}
diff --git a/pkg/config/agent/qrm/cpu_plugin.go b/pkg/config/agent/qrm/cpu_plugin.go
index 64c9be495a..868c704adc 100644
--- a/pkg/config/agent/qrm/cpu_plugin.go
+++ b/pkg/config/agent/qrm/cpu_plugin.go
@@ -26,6 +26,7 @@ type CPUQRMPluginConfig struct {
 
 	CPUDynamicPolicyConfig
 	CPUNativePolicyConfig
+	CPUWeightOptions
 }
 
 type CPUDynamicPolicyConfig struct {
@@ -55,6 +56,11 @@ type CPUNativePolicyConfig struct {
 	CPUAllocationOption string
 }
 
+type CPUWeightOptions struct {
+	EnableSettingCPUWeight bool
+	CPUWeightConfigFile    string
+}
+
 func NewCPUQRMPluginConfig() *CPUQRMPluginConfig {
 	return &CPUQRMPluginConfig{
 		CPUDynamicPolicyConfig: CPUDynamicPolicyConfig{},