Skip to content

Commit

Permalink
VM live migrate (#3767)
Browse files Browse the repository at this point in the history
optimize vm live migrate

---------

Signed-off-by: bobz965 <[email protected]>
Co-authored-by: Oilbeater <[email protected]>
  • Loading branch information
zbb88888 and oilbeater authored Mar 5, 2024
1 parent 5afcc3e commit 116c24c
Show file tree
Hide file tree
Showing 7 changed files with 349 additions and 17 deletions.
84 changes: 84 additions & 0 deletions mocks/pkg/ovs/interface.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 1 addition & 3 deletions pkg/controller/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -585,9 +585,7 @@ func (c *Controller) updateProviderNetworkForNodeDeletion(pn *kubeovnv1.Provider
}
}
if changed {
if newPn == nil {
newPn = pn.DeepCopy()
}
newPn = pn.DeepCopy()
newPn.Spec.CustomInterfaces = customInterfaces
}
if newPn != nil {
Expand Down
119 changes: 113 additions & 6 deletions pkg/controller/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -611,7 +611,6 @@ func (c *Controller) handleAddOrUpdatePod(key string) (err error) {
return nil
}
pod = cachedPod.DeepCopy()
// check if allocate subnet is need. also allocate subnet when hotplug nic
needAllocatePodNets := needAllocateSubnets(pod, podNets)
if len(needAllocatePodNets) != 0 {
if cachedPod, err = c.reconcileAllocateSubnets(cachedPod, pod, needAllocatePodNets); err != nil {
Expand Down Expand Up @@ -641,6 +640,16 @@ func (c *Controller) reconcileAllocateSubnets(cachedPod, pod *v1.Pod, needAlloca
podName := c.getNameByPod(pod)
// todo: isVmPod, getPodType, getNameByPod has duplicated logic

var err error
var isMigrate, migrated, migratedFail bool
var vmKey, srcNodeName, targetNodeName string
if isVMPod && c.config.EnableKeepVMIP {
vmKey = fmt.Sprintf("%s/%s", namespace, vmName)
if isMigrate, migrated, migratedFail, srcNodeName, targetNodeName, err = c.migrateVM(pod, vmKey); err != nil {
klog.Error(err)
return nil, err
}
}
// Avoid create lsp for already running pod in ovn-nb when controller restart
for _, podNet := range needAllocatePodNets {
// the subnet may changed when alloc static ip from the latter subnet after ns supports multi subnets
Expand Down Expand Up @@ -669,10 +678,11 @@ func (c *Controller) reconcileAllocateSubnets(cachedPod, pod *v1.Pod, needAlloca
delete(pod.Annotations, fmt.Sprintf(util.PodNicAnnotationTemplate, podNet.ProviderName))
}
pod.Annotations[fmt.Sprintf(util.AllocatedAnnotationTemplate, podNet.ProviderName)] = "true"
if isVMPod && c.config.EnableKeepVMIP {

if vmKey != "" {
pod.Annotations[fmt.Sprintf(util.VMTemplate, podNet.ProviderName)] = vmName
if err := c.changeVMSubnet(vmName, namespace, podNet.ProviderName, subnet.Name); err != nil {
klog.Errorf("change subnet of pod %s/%s to %s failed: %v", namespace, name, subnet.Name, err)
klog.Errorf("vm %s change subnet to %s failed: %v", vmKey, subnet.Name, err)
return nil, err
}
}
Expand Down Expand Up @@ -719,13 +729,31 @@ func (c *Controller) reconcileAllocateSubnets(cachedPod, pod *v1.Pod, needAlloca
DHCPv4OptionsUUID: subnet.Status.DHCPv4OptionsUUID,
DHCPv6OptionsUUID: subnet.Status.DHCPv6OptionsUUID,
}

if err := c.OVNNbClient.CreateLogicalSwitchPort(subnet.Name, portName, ipStr, mac, podName, pod.Namespace, portSecurity, securityGroupAnnotation, vips, podNet.Subnet.Spec.EnableDHCP, dhcpOptions, subnet.Spec.Vpc); err != nil {
if err := c.OVNNbClient.CreateLogicalSwitchPort(subnet.Name, portName, ipStr, mac, podName, pod.Namespace,
portSecurity, securityGroupAnnotation, vips, podNet.Subnet.Spec.EnableDHCP, dhcpOptions, subnet.Spec.Vpc); err != nil {
c.recorder.Eventf(pod, v1.EventTypeWarning, "CreateOVNPortFailed", err.Error())
klog.Errorf("%v", err)
return nil, err
}

if isMigrate {
if migrated {
klog.Infof("migrate end reset options for lsp %s from %s to %s, migrated fail: %t", portName, srcNodeName, targetNodeName, migratedFail)
if err := c.OVNNbClient.ResetLogicalSwitchPortMigrateOptions(portName, srcNodeName, targetNodeName, migratedFail); err != nil {
err = fmt.Errorf("failed to clean migrate options for lsp %s, %v", portName, err)
klog.Error(err)
return nil, err
}
} else {
klog.Infof("migrate start set options for lsp %s from %s to %s", portName, srcNodeName, targetNodeName)
if err := c.OVNNbClient.SetLogicalSwitchPortMigrateOptions(portName, srcNodeName, targetNodeName); err != nil {
err = fmt.Errorf("failed to set migrate options for lsp %s, %v", portName, err)
klog.Error(err)
return nil, err
}
}
}

if pod.Annotations[fmt.Sprintf(util.Layer2ForwardAnnotationTemplate, podNet.ProviderName)] == "true" {
if err := c.OVNNbClient.EnablePortLayer2forward(portName); err != nil {
c.recorder.Eventf(pod, v1.EventTypeWarning, "SetOVNPortL2ForwardFailed", err.Error())
Expand Down Expand Up @@ -1008,6 +1036,19 @@ func (c *Controller) handleDeletePod(key string) error {
}
isVMPod, vmName := isVMPod(pod)
if isVMPod && c.config.EnableKeepVMIP {
ports, err := c.OVNNbClient.ListNormalLogicalSwitchPorts(true, map[string]string{"pod": podKey})
if err != nil {
klog.Errorf("failed to list lsps of pod '%s', %v", pod.Name, err)
return err
}
for _, port := range ports {
klog.Infof("clean migrate options for vm lsp %s", port.Name)
if err := c.OVNNbClient.CleanLogicalSwitchPortMigrateOptions(port.Name); err != nil {
err = fmt.Errorf("failed to clean migrate options for vm lsp %s, %v", port.Name, err)
klog.Error(err)
return err
}
}
vmToBeDel := c.isVMToDel(pod, vmName)
isDelete, err := appendCheckPodToDel(c, pod, vmName, util.VMInstance)
if pod.DeletionTimestamp != nil {
Expand Down Expand Up @@ -1341,6 +1382,9 @@ func getNextHopByTunnelIP(gw []net.IP) string {
}

func needAllocateSubnets(pod *v1.Pod, nets []*kubeovnNet) []*kubeovnNet {
// check if allocate from subnet is need.
// allocate subnet when change subnet to hotplug nic
// allocate subnet when migrate vm
if !isPodAlive(pod) {
return nil
}
Expand All @@ -1349,9 +1393,15 @@ func needAllocateSubnets(pod *v1.Pod, nets []*kubeovnNet) []*kubeovnNet {
return nets
}

migrate := false
if job, ok := pod.Annotations[util.MigrationJobAnnotation]; ok {
klog.Infof("pod %s/%s is in the migration job %s", pod.Namespace, pod.Name, job)
migrate = true
}

result := make([]*kubeovnNet, 0, len(nets))
for _, n := range nets {
if pod.Annotations[fmt.Sprintf(util.AllocatedAnnotationTemplate, n.ProviderName)] != "true" {
if migrate || pod.Annotations[fmt.Sprintf(util.AllocatedAnnotationTemplate, n.ProviderName)] != "true" {
result = append(result, n)
}
}
Expand Down Expand Up @@ -2089,3 +2139,60 @@ func (c *Controller) getVirtualIPs(pod *v1.Pod, podNets []*kubeovnNet) map[strin
}
return vipsMap
}

// migrate vm return migrate, migrated, fail, src node, target node, err
func (c *Controller) migrateVM(pod *v1.Pod, vmKey string) (bool, bool, bool, string, string, error) {
// try optimize vm migration, no need return error
// migrate true means need ovn set migrate options
// migrated ok means need set migrate options to target node
// migrated failed means need set migrate options to source node
if _, ok := pod.Annotations[util.MigrationJobAnnotation]; !ok {
return false, false, false, "", "", nil
}
if _, ok := pod.Annotations[util.MigrationSourceAnnotation]; ok {
klog.Infof("will migrate out vm %s pod %s from source node %s", vmKey, pod.Name, pod.Spec.NodeName)
return false, false, false, "", "", nil
}
// ovn set migrator only in the process of target vm pod
if _, ok := pod.Annotations[util.MigrationTargetAnnotation]; !ok {
return false, false, false, "", "", nil
}
srcNode, ok := pod.Annotations[util.MigrationSourceNodeAnnotation]
if !ok || srcNode == "" {
err := fmt.Errorf("vm %s migration source node is not set", vmKey)
klog.Warning(err)
return false, false, false, "", "", nil
}
targetNode := pod.Spec.NodeName
if targetNode == "" {
err := fmt.Errorf("vm %s migration target node is not set", vmKey)
klog.Warning(err)
return false, false, false, "", "", nil
}
migratePhase, ok := pod.Annotations[util.MigrationPhaseAnnotation]
if !ok {
err := fmt.Errorf("vm %s migration phase is not set", vmKey)
klog.Warning(err)
return false, false, false, "", "", nil
}
// check migrate phase
if migratePhase == "" {
err := fmt.Errorf("vm %s migration phase is empty", vmKey)
klog.Warning(err)
return false, false, false, "", "", nil
}
if migratePhase == util.MigrationPhaseStarted {
klog.Infof("start to migrate src vm %s from %s to %s", vmKey, srcNode, targetNode)
return true, false, false, srcNode, targetNode, nil
}
if migratePhase == util.MigrationPhaseSucceeded {
klog.Infof("succeed to migrate src vm %s from %s to %s", vmKey, srcNode, targetNode)
return true, true, false, srcNode, targetNode, nil
}
if migratePhase == util.MigrationPhaseFailed {
klog.Infof("failed to migrate src vm %s from %s to %s", vmKey, srcNode, targetNode)
return true, true, true, srcNode, targetNode, nil
}

return false, false, false, "", "", nil
}
4 changes: 4 additions & 0 deletions pkg/ovs/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ type LogicalSwitchPort interface {
ListLogicalSwitchPortsWithLegacyExternalIDs() ([]ovnnb.LogicalSwitchPort, error)
GetLogicalSwitchPort(lspName string, ignoreNotFound bool) (*ovnnb.LogicalSwitchPort, error)
LogicalSwitchPortExists(name string) (bool, error)
// vm live migrate
SetLogicalSwitchPortMigrateOptions(lspName, srcNodeName, targetNodeName string) error
ResetLogicalSwitchPortMigrateOptions(lspName, srcNodeName, targetNodeName string, migratedFail bool) error
CleanLogicalSwitchPortMigrateOptions(lspName string) error
}

type LoadBalancer interface {
Expand Down
9 changes: 3 additions & 6 deletions pkg/ovs/ovn-nb-load_balancer.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,12 +152,9 @@ func (c *OVNNbClient) LoadBalancerDeleteVip(lbName, vipEndpoint string, ignoreHe
klog.Errorf("failed to delete lb ip port mapping: %v", err)
return err
}

if lbhc != nil {
if err = c.LoadBalancerDeleteHealthCheck(lbName, lbhc.UUID); err != nil {
klog.Errorf("failed to delete lb health check: %v", err)
return err
}
if err = c.LoadBalancerDeleteHealthCheck(lbName, lbhc.UUID); err != nil {
klog.Errorf("failed to delete lb health check: %v", err)
return err
}
}
if lb == nil || len(lb.Vips) == 0 {
Expand Down
Loading

0 comments on commit 116c24c

Please sign in to comment.