From 85a5392914c2b21cd2240097696b40c15c30969d Mon Sep 17 00:00:00 2001 From: linzhecheng Date: Mon, 2 Sep 2024 11:28:56 +0800 Subject: [PATCH] fix: check having upper bound If the region is not reclaimable, we have no need to care about whether it is upper bound. Signed-off-by: linzhecheng --- .../headroomassembler/assembler_common.go | 2 +- .../assembler_common_test.go | 96 ++++++++++++++++--- .../assembler_common_test.go | 1 + .../qosaware/resource/cpu/region/region.go | 2 + .../resource/cpu/region/region_base.go | 11 ++- pkg/util/machine/zone.go | 8 ++ pkg/util/machine/zone_linux.go | 8 -- 7 files changed, 102 insertions(+), 26 deletions(-) diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/headroomassembler/assembler_common.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/headroomassembler/assembler_common.go index 921812026..c378542dd 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/headroomassembler/assembler_common.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/headroomassembler/assembler_common.go @@ -91,7 +91,7 @@ func (ha *HeadroomAssemblerCommon) GetHeadroom() (resource.Quantity, error) { if !ok || regionInfo == nil || regionInfo.Headroom < 0 { return resource.Quantity{}, fmt.Errorf("failed to get headroom for %v", r.Name()) } - if regionInfo.RegionStatus.BoundType == types.BoundUpper { + if regionInfo.RegionStatus.BoundType == types.BoundUpper && r.EnableReclaim() { general.Infof("region %v is in status of upper bound", regionInfo.RegionName) hasUpperBound = true } diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/headroomassembler/assembler_common_test.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/headroomassembler/assembler_common_test.go index 1ff8965e3..e03428d02 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/headroomassembler/assembler_common_test.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/headroomassembler/assembler_common_test.go @@ -116,8 +116,10 @@ func TestHeadroomAssemblerCommon_GetHeadroom(t *testing.T) { name: "normal report", fields: fields{ entries: map[string]*types.RegionInfo{ - "share-0": { - RegionType: configapi.QoSRegionTypeShare, + "share": { + RegionType: configapi.QoSRegionTypeShare, + OwnerPoolName: "share", + BindingNumas: machine.NewCPUSet(0, 1), }, }, cnr: &v1alpha1.CustomNodeResource{ @@ -225,8 +227,10 @@ func TestHeadroomAssemblerCommon_GetHeadroom(t *testing.T) { name: "disable util based", fields: fields{ entries: map[string]*types.RegionInfo{ - "share-0": { - RegionType: configapi.QoSRegionTypeShare, + "share": { + RegionType: configapi.QoSRegionTypeShare, + OwnerPoolName: "share", + BindingNumas: machine.NewCPUSet(0, 1), }, }, cnr: &v1alpha1.CustomNodeResource{ @@ -271,8 +275,10 @@ func TestHeadroomAssemblerCommon_GetHeadroom(t *testing.T) { name: "gap by oversold ratio", fields: fields{ entries: map[string]*types.RegionInfo{ - "share-0": { - RegionType: configapi.QoSRegionTypeShare, + "share": { + RegionType: configapi.QoSRegionTypeShare, + OwnerPoolName: "share", + BindingNumas: machine.NewCPUSet(0, 1), }, }, cnr: &v1alpha1.CustomNodeResource{ @@ -317,8 +323,10 @@ func TestHeadroomAssemblerCommon_GetHeadroom(t *testing.T) { name: "over maximum core utilization", fields: fields{ entries: map[string]*types.RegionInfo{ - "share-0": { - RegionType: configapi.QoSRegionTypeShare, + "share": { + RegionType: configapi.QoSRegionTypeShare, + OwnerPoolName: "share", + BindingNumas: machine.NewCPUSet(0, 1), }, }, cnr: &v1alpha1.CustomNodeResource{ @@ -363,8 +371,10 @@ func TestHeadroomAssemblerCommon_GetHeadroom(t *testing.T) { name: "limited by capacity", fields: fields{ entries: map[string]*types.RegionInfo{ - "share-0": { - RegionType: configapi.QoSRegionTypeShare, + "share": { + RegionType: configapi.QoSRegionTypeShare, + OwnerPoolName: "share", + BindingNumas: machine.NewCPUSet(0, 1), }, }, cnr: &v1alpha1.CustomNodeResource{ @@ -407,6 +417,60 @@ func TestHeadroomAssemblerCommon_GetHeadroom(t *testing.T) { }, want: *resource.NewQuantity(96, resource.DecimalSI), }, + { + name: "numa-exclusive region headroom", + fields: fields{ + entries: map[string]*types.RegionInfo{ + "dedicated": { + RegionType: configapi.QoSRegionTypeDedicatedNumaExclusive, + OwnerPoolName: "dedicated", + BindingNumas: machine.NewCPUSet(0), + Headroom: 10, + RegionStatus: types.RegionStatus{ + BoundType: types.BoundUpper, + }, + }, + }, + cnr: &v1alpha1.CustomNodeResource{ + Status: v1alpha1.CustomNodeResourceStatus{ + Resources: v1alpha1.Resources{ + Allocatable: &v1.ResourceList{ + consts.ReclaimedResourceMilliCPU: resource.MustParse("86000"), + }, + }, + }, + }, + reclaimedResourceConfiguration: &reclaimedresource.ReclaimedResourceConfiguration{ + EnableReclaim: true, + CPUHeadroomConfiguration: &cpuheadroom.CPUHeadroomConfiguration{ + CPUUtilBasedConfiguration: &cpuheadroom.CPUUtilBasedConfiguration{ + Enable: false, + TargetReclaimedCoreUtilization: 0.6, + MaxReclaimedCoreUtilization: 0.8, + MaxOversoldRate: 1.5, + MaxHeadroomCapacityRate: 1., + }, + }, + }, + setFakeMetric: func(store *metric.FakeMetricsFetcher) { + now := time.Now() + for i := 0; i < 96; i++ { + store.SetCPUMetric(i, pkgconsts.MetricCPUUsageRatio, utilmetric.MetricData{Value: 0.3, Time: &now}) + } + store.SetCgroupMetric("/kubepods/besteffort", pkgconsts.MetricCPUUsageCgroup, utilmetric.MetricData{Value: 28.8, Time: &now}) + }, + setMetaCache: func(cache *metacache.MetaCacheImp) { + err := cache.SetPoolInfo(state.PoolNameReclaim, &types.PoolInfo{ + PoolName: state.PoolNameReclaim, + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.MustParse("0-85"), + }, + }) + require.NoError(t, err) + }, + }, + want: *resource.NewQuantity(58, resource.DecimalSI), + }, } for _, tt := range tests { tt := tt @@ -434,15 +498,17 @@ func TestHeadroomAssemblerCommon_GetHeadroom(t *testing.T) { metaServer := generateTestMetaServer(t, tt.fields.cnr, tt.fields.podList, metricsFetcher) - if tt.fields.regions == nil { - shareRegion := region.NewQoSRegionBase("share", "share", configapi.QoSRegionTypeShare, conf, nil, false, metaCache, metaServer, metrics.DummyMetrics{}) - shareRegion.SetBindingNumas(machine.NewCPUSet(0, 1)) + for name, regionInfo := range tt.fields.entries { + r := region.NewQoSRegionBase(name, regionInfo.OwnerPoolName, regionInfo.RegionType, conf, nil, false, metaCache, metaServer, metrics.DummyMetrics{}) + r.SetBindingNumas(regionInfo.BindingNumas) tt.fields.regions = map[string]region.QoSRegion{ - "share": shareRegion, + name: r, } } + reservedForReclaim := map[int]int{0: 2, 1: 2} + numaAvailable := map[int]int{0: 46, 1: 46} - ha := NewHeadroomAssemblerCommon(conf, nil, &tt.fields.regions, nil, nil, nil, metaCache, metaServer, metrics.DummyMetrics{}) + ha := NewHeadroomAssemblerCommon(conf, nil, &tt.fields.regions, &reservedForReclaim, &numaAvailable, nil, metaCache, metaServer, metrics.DummyMetrics{}) store := metricsFetcher.(*metric.FakeMetricsFetcher) tt.fields.setFakeMetric(store) diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler_common_test.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler_common_test.go index 6ead78af3..5dd173eda 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler_common_test.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler_common_test.go @@ -110,6 +110,7 @@ func (fake *FakeRegion) IsNumaBinding() bool { return fake.isNumaBinding } func (fake *FakeRegion) SetThrottled(throttled bool) { fake.throttled = throttled } +func (fake *FakeRegion) EnableReclaim() bool { return true } func (fake *FakeRegion) AddContainer(ci *types.ContainerInfo) error { return nil } func (fake *FakeRegion) TryUpdateProvision() {} func (fake *FakeRegion) TryUpdateHeadroom() {} diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region.go index bcbb1edc4..64c80d461 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region.go @@ -69,6 +69,8 @@ type QoSRegion interface { IsThrottled() bool + EnableReclaim() bool + // GetProvisionPolicy returns provision policy for this region, // the first is policy with top priority, while the second is the policy that is in-use currently GetProvisionPolicy() (types.CPUProvisionPolicyName, types.CPUProvisionPolicyName) diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_base.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_base.go index 5a4020927..778b69a7b 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_base.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_base.go @@ -240,7 +240,7 @@ func NewQoSRegionBase(name string, ownerPoolName string, regionType v1alpha1.QoS if r.conf.PolicyRama.EnableBorwein { r.borweinController = borweinctrl.NewBorweinController(name, regionType, ownerPoolName, conf, metaReader, emitter) } - r.enableReclaim = r.EnableReclaim + r.enableReclaim = r.EnableReclaimFunc klog.Infof("[qosaware-cpu] created region [%v/%v/%v]", r.Name(), r.Type(), r.OwnerPoolName()) @@ -439,6 +439,13 @@ func (r *QoSRegionBase) GetProvisionPolicy() (policyTopPriority types.CPUProvisi return } +func (r *QoSRegionBase) EnableReclaim() bool { + r.Lock() + defer r.Unlock() + + return r.enableReclaim() +} + func (r *QoSRegionBase) GetHeadRoomPolicy() (policyTopPriority types.CPUHeadroomPolicyName, policyInUse types.CPUHeadroomPolicyName) { r.Lock() defer r.Unlock() @@ -802,7 +809,7 @@ func (r *QoSRegionBase) updateOvershootStatus() bool { return overshoot } -func (r *QoSRegionBase) EnableReclaim() bool { +func (r *QoSRegionBase) EnableReclaimFunc() bool { return r.ResourceEssentials.EnableReclaim } diff --git a/pkg/util/machine/zone.go b/pkg/util/machine/zone.go index a5382056a..6ffe3f3c3 100644 --- a/pkg/util/machine/zone.go +++ b/pkg/util/machine/zone.go @@ -23,6 +23,14 @@ import ( "strings" ) +type NormalZoneInfo struct { + Node int64 + Free uint64 + Min uint64 + Low uint64 + FileInactive uint64 +} + var nodeZoneRE = regexp.MustCompile(`(\d+), zone\s+(\w+)`) func parseNormalZoneInfo(zoneInfoData []byte) ([]NormalZoneInfo, error) { diff --git a/pkg/util/machine/zone_linux.go b/pkg/util/machine/zone_linux.go index e4c638fd3..bf7f9ddec 100644 --- a/pkg/util/machine/zone_linux.go +++ b/pkg/util/machine/zone_linux.go @@ -22,14 +22,6 @@ import ( "os" ) -type NormalZoneInfo struct { - Node int64 - Free uint64 - Min uint64 - Low uint64 - FileInactive uint64 -} - func GetNormalZoneInfo(zoneInfoPath string) []NormalZoneInfo { data, err := os.ReadFile(zoneInfoPath) if err != nil {