From 9e6f2fee2bf811c00eb32f728d096f8626a53fd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=AD=99=E5=81=A5=E4=BF=9E?= Date: Thu, 20 Jun 2024 12:14:39 +0800 Subject: [PATCH] fix(qrm): make ramp up cpus not overlapping with cpus in NUMAs containing NUMA_binding pods --- .../cpu/dynamicpolicy/policy_advisor_handler.go | 6 +++--- .../cpu/dynamicpolicy/policy_allocation_handlers.go | 9 +++------ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_advisor_handler.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_advisor_handler.go index 3275a2cd9..409045e9d 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_advisor_handler.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_advisor_handler.go @@ -539,12 +539,12 @@ func (p *DynamicPolicy) applyBlocks(blockCPUSet advisorapi.BlockCPUSet, resp *ad return fmt.Errorf("GetSharedBindingNUMAs failed with error: %v", err) } sharedBindingNUMACPUs := p.machineInfo.CPUDetails.CPUsInNUMANodes(sharedBindingNUMAs.UnsortedList()...) - // rampUpCPUs include reclaim pool + // rampUpCPUs include reclaim pool in NUMAs without NUMA_binding cpus rampUpCPUs := p.machineInfo.CPUDetails.CPUs(). Difference(p.reservedCPUs). Difference(dedicatedCPUSet). - Difference(sharedBindingNUMACPUs). - Union(newEntries[state.PoolNameReclaim][state.FakedContainerName].AllocationResult) + Difference(sharedBindingNUMACPUs) + rampUpCPUsTopologyAwareAssignments, err := machine.GetNumaAwareAssignments(p.machineInfo.CPUTopology, rampUpCPUs) if err != nil { return fmt.Errorf("unable to calculate topologyAwareAssignments for rampUpCPUs, result cpuset: %s, error: %v", diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_allocation_handlers.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_allocation_handlers.go index b576c9a8a..a89346ee8 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_allocation_handlers.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_allocation_handlers.go @@ -68,9 +68,7 @@ func (p *DynamicPolicy) sharedCoresWithoutNUMABindingAllocationHandler(_ context machineState := p.state.GetMachineState() pooledCPUs := machineState.GetFilteredAvailableCPUSet(p.reservedCPUs, - func(ai *state.AllocationInfo) bool { - return state.CheckDedicated(ai) || state.CheckNUMABinding(ai) - }, state.CheckDedicatedNUMABinding) + state.CheckDedicated, state.CheckNUMABinding) if pooledCPUs.IsEmpty() { general.Errorf("pod: %s/%s, container: %s get empty pooledCPUs", req.PodNamespace, req.PodName, req.ContainerName) @@ -880,12 +878,11 @@ func (p *DynamicPolicy) applyPoolsAndIsolatedInfo(poolsCPUSet map[string]machine } sharedBindingNUMACPUs := p.machineInfo.CPUDetails.CPUsInNUMANodes(sharedBindingNUMAs.UnsortedList()...) - // rampUpCPUs include reclaim pool + // rampUpCPUs include reclaim pool in NUMAs without NUMA_binding cpus rampUpCPUs := machineState.GetFilteredAvailableCPUSet(p.reservedCPUs, nil, state.CheckDedicatedNUMABinding). Difference(unionDedicatedIsolatedCPUSet). - Difference(sharedBindingNUMACPUs). - Union(poolsCPUSet[state.PoolNameReclaim]) + Difference(sharedBindingNUMACPUs) rampUpCPUsTopologyAwareAssignments, err := machine.GetNumaAwareAssignments(p.machineInfo.CPUTopology, rampUpCPUs) if err != nil {