Skip to content

Commit

Permalink
fix(qrm): return an error if no available nics on a node (#195)
Browse files Browse the repository at this point in the history
* fix: fix a bug for 0 vaid nic

* fix: fix the panic caused by 0 valid nic

* fix: fix a lint error in unit tests

* fix: remove commented code

* fix: return an error if len(candidateNICs) == 0
  • Loading branch information
smart2003 authored Aug 10, 2023
1 parent c17cf0e commit c261f1e
Show file tree
Hide file tree
Showing 4 changed files with 383 additions and 168 deletions.
4 changes: 0 additions & 4 deletions pkg/agent/qrm-plugins/network/state/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,6 @@ import (
// GenerateMachineState returns NICResourcesMap based on
// machine info and reserved resources
func GenerateMachineState(conf *qrm.QRMPluginsConfiguration, nics []machine.InterfaceInfo, reservation map[string]uint32) (NICMap, error) {
if len(nics) == 0 {
return nil, fmt.Errorf("GenerateMachineState got invalid nics")
}

defaultMachineState := make(NICMap)
for _, iface := range nics {
reservedBandwidth := reservation[iface.Iface]
Expand Down
24 changes: 18 additions & 6 deletions pkg/agent/qrm-plugins/network/staticpolicy/policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,15 @@ func NewStaticPolicy(agentCtx *agent.GenericContext, conf *config.Configuration,
// it is incorrect to reserve bandwidth on those diabled NICs.
// we only count active NICs as available network devices and allocate bandwidth on them
enabledNICs := filterNICsByAvailability(agentCtx.KatalystMachineInfo.ExtraNetworkInfo.Interface, nil, nil)

// the NICs should be in order by interface name so that we can adopt specific policies for bandwidth reservation or allocation
// e.g. reserve bandwidth for high-priority tasks on the first NIC
sort.SliceStable(enabledNICs, func(i, j int) bool {
return enabledNICs[i].Iface < enabledNICs[j].Iface
})
if len(enabledNICs) != 0 {
// the NICs should be in order by interface name so that we can adopt specific policies for bandwidth reservation or allocation
// e.g. reserve bandwidth for high-priority tasks on the first NIC
sort.SliceStable(enabledNICs, func(i, j int) bool {
return enabledNICs[i].Iface < enabledNICs[j].Iface
})
} else {
general.Infof("no valid nics on this node")
}

// we only support one spreading policy for now: reserve the bandwidth on the first NIC.
// TODO: make the reservation policy configurable
Expand Down Expand Up @@ -781,6 +784,11 @@ func (p *StaticPolicy) calculateHints(req *pluginapi.ResourceRequest) (map[strin
},
}

// return empty hints immediately if no valid nics on this node
if len(p.nics) == 0 {
return hints, nil
}

candidateNICs, err := p.selectNICsByReq(req)
if err != nil {
return hints, fmt.Errorf("failed to select available NICs: %v", err)
Expand Down Expand Up @@ -865,6 +873,10 @@ func (p *StaticPolicy) selectNICsByReq(req *pluginapi.ResourceRequest) ([]machin
filterNICsByHint,
}

if len(p.nics) == 0 {
return []machine.InterfaceInfo{}, nil
}

candidateNICs, err := filterAvailableNICsByReq(p.nics, req, p.agentCtx, nicFilters)
if err != nil {
return nil, fmt.Errorf("filterAvailableNICsByReq failed with error: %v", err)
Expand Down
Loading

0 comments on commit c261f1e

Please sign in to comment.