Skip to content

Commit

Permalink
Network bandwidth allocation (#123)
Browse files Browse the repository at this point in the history
* feat: update config & options for network plugin

* feat: implement state checkpoint for network plugin

* feat: implement the bandwidth allocation

* feat: add unit tests and fix some bugs

* fix: fix lint errors

* fix: add missed licenses

* fix: fix lint errors

* fix: update per Wei's comments

* feat: implement GetTopologyawareXXX

* feat: use general.LoggerWithPrefix to replace klog with function prefix

* fix: bump up go.mod

* fix: update per Jianyu's comments

* fix: update per Jianyu's comments

* fix: set IsNodeResource to true

* fix: set IsNodeResource to true in unit tests

* fix: return min(egress, ingress) as node capacity and allocatable

* fix: remove resourceIdentifier prefix if ns is empty

* fix: fix a problem caused by the previous rebase

* fix: fix the resourceIdentifier inconsistency problem
  • Loading branch information
smart2003 authored Aug 8, 2023
1 parent 785d49b commit 2f6c4fb
Show file tree
Hide file tree
Showing 12 changed files with 2,099 additions and 241 deletions.
20 changes: 20 additions & 0 deletions cmd/katalyst-agent/app/options/qrm/network_plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ import (
type NetworkOptions struct {
PolicyName string
NetClass NetClassOptions
ReservedBandwidth uint32
EgressCapacityRate float32
IngressCapacityRate float32
SkipNetworkStateCorruption bool
PodLevelNetClassAnnoKey string
PodLevelNetAttributesAnnoKeys string
IPv4ResourceAllocationAnnotationKey string
Expand All @@ -51,6 +55,10 @@ func NewNetworkOptions() *NetworkOptions {
return &NetworkOptions{
PolicyName: "static",
PodLevelNetClassAnnoKey: consts.PodAnnotationNetClassKey,
ReservedBandwidth: 0,
EgressCapacityRate: 0.94,
IngressCapacityRate: 0.9,
SkipNetworkStateCorruption: false,
PodLevelNetAttributesAnnoKeys: "",
IPv4ResourceAllocationAnnotationKey: "qrm.katalyst.kubewharf.io/inet_addr",
IPv6ResourceAllocationAnnotationKey: "qrm.katalyst.kubewharf.io/inet_addr_ipv6",
Expand All @@ -74,6 +82,14 @@ func (o *NetworkOptions) AddFlags(fss *cliflag.NamedFlagSets) {
o.NetClass.DedicatedCores, "net class id for dedicated_cores")
fs.Uint32Var(&o.NetClass.SystemCores, "network-resource-plugin-class-id-system-cores",
o.NetClass.SystemCores, "net class id for system_cores")
fs.Uint32Var(&o.ReservedBandwidth, "network-resource-plugin-reserved-bandwidth",
o.ReservedBandwidth, "reserved bandwidth for business-critical jobs")
fs.Float32Var(&o.EgressCapacityRate, "network-resource-plugin-egress-capacity-rate",
o.EgressCapacityRate, "ratio of available egress capacity to egress line speed")
fs.Float32Var(&o.IngressCapacityRate, "network-resource-plugin-ingress-capacity-rate",
o.IngressCapacityRate, "ratio of available ingress capacity to ingress line speed")
fs.BoolVar(&o.SkipNetworkStateCorruption, "skip-network-state-corruption",
o.SkipNetworkStateCorruption, "if set true, we will skip network state corruption")
fs.StringVar(&o.PodLevelNetClassAnnoKey, "network-resource-plugin-net-class-annotation-key",
o.PodLevelNetClassAnnoKey, "The annotation key of pod-level net class")
fs.StringVar(&o.PodLevelNetAttributesAnnoKeys, "network-resource-plugin-net-attributes-keys",
Expand All @@ -98,6 +114,10 @@ func (o *NetworkOptions) ApplyTo(conf *qrmconfig.NetworkQRMPluginConfig) error {
conf.NetClass.SharedCores = o.NetClass.SharedCores
conf.NetClass.DedicatedCores = o.NetClass.DedicatedCores
conf.NetClass.SystemCores = o.NetClass.SystemCores
conf.ReservedBandwidth = o.ReservedBandwidth
conf.EgressCapacityRate = o.EgressCapacityRate
conf.IngressCapacityRate = o.IngressCapacityRate
conf.SkipNetworkStateCorruption = o.SkipNetworkStateCorruption
conf.PodLevelNetClassAnnoKey = o.PodLevelNetClassAnnoKey
conf.PodLevelNetAttributesAnnoKeys = o.PodLevelNetAttributesAnnoKeys
conf.IPv4ResourceAllocationAnnotationKey = o.IPv4ResourceAllocationAnnotationKey
Expand Down
62 changes: 62 additions & 0 deletions pkg/agent/qrm-plugins/network/state/checkpoint.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
Copyright 2022 The Katalyst Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package state

import (
"encoding/json"

"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum"
)

var _ checkpointmanager.Checkpoint = &NetworkPluginCheckpoint{}

type NetworkPluginCheckpoint struct {
PolicyName string `json:"policyName"`
MachineState NICMap `json:"machineState"`
PodEntries PodEntries `json:"pod_entries"`
Checksum checksum.Checksum `json:"checksum"`
}

func NewNetworkPluginCheckpoint() *NetworkPluginCheckpoint {
return &NetworkPluginCheckpoint{
PodEntries: make(PodEntries),
MachineState: make(NICMap),
}
}

// MarshalCheckpoint returns marshaled checkpoint
func (cp *NetworkPluginCheckpoint) MarshalCheckpoint() ([]byte, error) {
// make sure checksum wasn't set before, so it doesn't affect output checksum
cp.Checksum = 0
cp.Checksum = checksum.New(cp)
return json.Marshal(*cp)
}

// UnmarshalCheckpoint tries to unmarshal passed bytes to checkpoint
func (cp *NetworkPluginCheckpoint) UnmarshalCheckpoint(blob []byte) error {
return json.Unmarshal(blob, cp)
}

// VerifyChecksum verifies that current checksum of checkpoint is valid
func (cp *NetworkPluginCheckpoint) VerifyChecksum() error {
ck := cp.Checksum
cp.Checksum = 0
err := ck.Verify(cp)
cp.Checksum = ck
return err
}
Loading

0 comments on commit 2f6c4fb

Please sign in to comment.