Skip to content

Commit

Permalink
fix: Do not move nodes that have already been attached to load balanc…
Browse files Browse the repository at this point in the history
…ers after restarting

When using multislb, the node distribution could be changed after restarting the ccm.
This is because the node distribution cache would be lost after restarting.
This PR restores the node distribution each time the ccm is restarted.
  • Loading branch information
nilo19 authored and k8s-infra-cherrypick-robot committed Sep 11, 2024
1 parent 6775047 commit 0675f9b
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 3 deletions.
52 changes: 50 additions & 2 deletions pkg/provider/azure_loadbalancer.go
Original file line number Diff line number Diff line change
Expand Up @@ -1650,7 +1650,7 @@ func (az *Cloud) reconcileMultipleStandardLoadBalancerConfigurations(
}
}

return az.reconcileMultipleStandardLoadBalancerBackendNodes("", lbs, service, nodes)
return az.reconcileMultipleStandardLoadBalancerBackendNodes(clusterName, "", lbs, service, nodes, true)
}

// reconcileLoadBalancer ensures load balancer exists and the frontend ip config is setup.
Expand Down Expand Up @@ -1852,7 +1852,7 @@ func (az *Cloud) reconcileLoadBalancer(ctx context.Context, clusterName string,
}()

if az.useMultipleStandardLoadBalancers() {
err := az.reconcileMultipleStandardLoadBalancerBackendNodes(lbName, existingLBs, service, nodes)
err := az.reconcileMultipleStandardLoadBalancerBackendNodes(clusterName, lbName, existingLBs, service, nodes, false)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -2151,11 +2151,27 @@ func isLBInList(lbs *[]network.LoadBalancer, lbConfigName string) bool {
// 2. We only check nodes that are not matched by primary vmSet before we ensure
// hosts in pool. So the number API calls is under control.
func (az *Cloud) reconcileMultipleStandardLoadBalancerBackendNodes(
clusterName string,
lbName string,
lbs *[]network.LoadBalancer,
service *v1.Service,
nodes []*v1.Node,
init bool,
) error {
logger := klog.Background().WithName("reconcileMultipleStandardLoadBalancerBackendNodes").
WithValues(
"clusterName", clusterName,
"lbName", lbName,
"service", service.Name,
"init", init,
)
if init {
if err := az.recordExistingNodesOnLoadBalancers(clusterName, lbs); err != nil {
logger.Error(err, "failed to record existing nodes on load balancers")
return err
}
}

// Remove the nodes from the load balancer configurations if they are not in the node list.
nodeNameToLBConfigIDXMap := az.removeDeletedNodesFromLoadBalancerConfigurations(nodes)

Expand All @@ -2172,6 +2188,38 @@ func (az *Cloud) reconcileMultipleStandardLoadBalancerBackendNodes(
return nil
}

// recordExistingNodesOnLoadBalancers restores the node distribution
// across multiple load balancers each time the cloud provider restarts.
func (az *Cloud) recordExistingNodesOnLoadBalancers(clusterName string, lbs *[]network.LoadBalancer) error {
bi, ok := az.LoadBalancerBackendPool.(*backendPoolTypeNodeIP)
if !ok {
return errors.New("must use backend pool type nodeIP")
}
bpNames := getBackendPoolNames(clusterName)
for _, lb := range *lbs {
if lb.LoadBalancerPropertiesFormat == nil ||
lb.LoadBalancerPropertiesFormat.BackendAddressPools == nil {
continue
}
lbName := ptr.Deref(lb.Name, "")
for _, backendPool := range *lb.LoadBalancerPropertiesFormat.BackendAddressPools {
backendPool := backendPool
if found, _ := isLBBackendPoolsExisting(bpNames, backendPool.Name); found {
nodeNames := bi.getBackendPoolNodeNames(&backendPool)
for i, multiSLBConfig := range az.MultipleStandardLoadBalancerConfigurations {
if strings.EqualFold(trimSuffixIgnoreCase(
lbName, consts.InternalLoadBalancerNameSuffix,
), multiSLBConfig.Name) {
az.MultipleStandardLoadBalancerConfigurations[i].ActiveNodes =
utilsets.SafeInsert(multiSLBConfig.ActiveNodes, nodeNames...)
}
}
}
}
}
return nil
}

func (az *Cloud) reconcileMultipleStandardLoadBalancerConfigurationStatus(wantLb bool, svcName, lbName string) {
lbName = trimSuffixIgnoreCase(lbName, consts.InternalLoadBalancerNameSuffix)
for i := range az.MultipleStandardLoadBalancerConfigurations {
Expand Down
14 changes: 14 additions & 0 deletions pkg/provider/azure_loadbalancer_backendpool.go
Original file line number Diff line number Diff line change
Expand Up @@ -774,6 +774,20 @@ func (bi *backendPoolTypeNodeIP) GetBackendPrivateIPs(clusterName string, servic
return backendPrivateIPv4s.UnsortedList(), backendPrivateIPv6s.UnsortedList()
}

// getBackendPoolNameForService returns all node names in the backend pool.
func (bi *backendPoolTypeNodeIP) getBackendPoolNodeNames(bp *network.BackendAddressPool) []string {
nodeNames := utilsets.NewString()
if bp.BackendAddressPoolPropertiesFormat != nil && bp.LoadBalancerBackendAddresses != nil {
for _, backendAddress := range *bp.LoadBalancerBackendAddresses {
if backendAddress.LoadBalancerBackendAddressPropertiesFormat != nil {
ip := ptr.Deref(backendAddress.IPAddress, "")

Check failure on line 783 in pkg/provider/azure_loadbalancer_backendpool.go

View workflow job for this annotation

GitHub Actions / Lint

undefined: ptr) (typecheck)

Check failure on line 783 in pkg/provider/azure_loadbalancer_backendpool.go

View workflow job for this annotation

GitHub Actions / Lint

undefined: ptr) (typecheck)

Check failure on line 783 in pkg/provider/azure_loadbalancer_backendpool.go

View workflow job for this annotation

GitHub Actions / Lint

undefined: ptr) (typecheck)

Check failure on line 783 in pkg/provider/azure_loadbalancer_backendpool.go

View workflow job for this annotation

GitHub Actions / Lint

undefined: ptr) (typecheck)

Check failure on line 783 in pkg/provider/azure_loadbalancer_backendpool.go

View workflow job for this annotation

GitHub Actions / Lint

undefined: ptr) (typecheck)
nodeNames.Insert(bi.nodePrivateIPToNodeNameMap[ip])
}
}
}
return nodeNames.UnsortedList()
}

func newBackendPool(lb *network.LoadBalancer, isBackendPoolPreConfigured bool, preConfiguredBackendPoolLoadBalancerTypes, serviceName, lbBackendPoolName string) bool {
if isBackendPoolPreConfigured {
klog.V(2).Infof("newBackendPool for service (%s)(true): lb backendpool - PreConfiguredBackendPoolLoadBalancerTypes %s has been set but can not find corresponding backend pool %q, ignoring it",
Expand Down
96 changes: 95 additions & 1 deletion pkg/provider/azure_loadbalancer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7599,6 +7599,7 @@ func TestReconcileMultipleStandardLoadBalancerConfigurations(t *testing.T) {
}

if tc.useMultipleLB {
az.LoadBalancerBackendPool = newBackendPoolTypeNodeIP(az)
az.MultipleStandardLoadBalancerConfigurations = []MultipleStandardLoadBalancerConfiguration{
{Name: "lb1"},
{Name: "lb2"},
Expand Down Expand Up @@ -8076,6 +8077,7 @@ func TestReconcileMultipleStandardLoadBalancerNodes(t *testing.T) {
for _, tc := range []struct {
description string
lbName string
init bool
existingLBConfigs []MultipleStandardLoadBalancerConfiguration
existingNodes []*v1.Node
existingLBs []network.LoadBalancer
Expand Down Expand Up @@ -8424,16 +8426,108 @@ func TestReconcileMultipleStandardLoadBalancerNodes(t *testing.T) {
"lb4": nil,
},
},
{
description: "should record current node distributions after restarting the controller",
init: true,
existingLBConfigs: []MultipleStandardLoadBalancerConfiguration{
{
Name: "lb1",
MultipleStandardLoadBalancerConfigurationSpec: MultipleStandardLoadBalancerConfigurationSpec{
PrimaryVMSet: "vmss-1",
},
},
{
Name: "lb2",
MultipleStandardLoadBalancerConfigurationSpec: MultipleStandardLoadBalancerConfigurationSpec{
PrimaryVMSet: "vmss-2",
},
},
},
existingNodes: []*v1.Node{
getTestNodeWithMetadata("node1", "vmss-1", map[string]string{"k1": "v1"}, "10.1.0.1"),
getTestNodeWithMetadata("node2", "vmss-2", map[string]string{"k3": "v3"}, "10.1.0.2"),
getTestNodeWithMetadata("node3", "vmss-3", map[string]string{"k2": "v2"}, "10.1.0.3"),
getTestNodeWithMetadata("node5", "vmss-5", map[string]string{"k2": "v2"}, "10.1.0.5"),
getTestNodeWithMetadata("node6", "vmss-6", map[string]string{"k3": "v3"}, "10.1.0.6"),
},
existingLBs: []network.LoadBalancer{
{
Name: ptr.To("lb1-internal"),
LoadBalancerPropertiesFormat: &network.LoadBalancerPropertiesFormat{
BackendAddressPools: &[]network.BackendAddressPool{
{
Name: ptr.To("kubernetes"),
BackendAddressPoolPropertiesFormat: &network.BackendAddressPoolPropertiesFormat{
LoadBalancerBackendAddresses: &[]network.LoadBalancerBackendAddress{
{
Name: ptr.To("node2"),
LoadBalancerBackendAddressPropertiesFormat: &network.LoadBalancerBackendAddressPropertiesFormat{
IPAddress: ptr.To("10.1.0.2"),
},
},
},
},
},
},
},
},
{
Name: ptr.To("lb2"),
LoadBalancerPropertiesFormat: &network.LoadBalancerPropertiesFormat{
BackendAddressPools: &[]network.BackendAddressPool{
{
Name: ptr.To("kubernetes"),
BackendAddressPoolPropertiesFormat: &network.BackendAddressPoolPropertiesFormat{
LoadBalancerBackendAddresses: &[]network.LoadBalancerBackendAddress{
{
Name: ptr.To("node3"),
LoadBalancerBackendAddressPropertiesFormat: &network.LoadBalancerBackendAddressPropertiesFormat{
IPAddress: ptr.To("10.1.0.3"),
},
},
{
Name: ptr.To("node4"),
LoadBalancerBackendAddressPropertiesFormat: &network.LoadBalancerBackendAddressPropertiesFormat{
IPAddress: ptr.To("10.1.0.4"),
},
},
{
Name: ptr.To("node5"),
LoadBalancerBackendAddressPropertiesFormat: &network.LoadBalancerBackendAddressPropertiesFormat{
IPAddress: ptr.To("10.1.0.5"),
},
},
},
},
},
},
},
},
},
expectedLBToNodesMap: map[string]*utilsets.IgnoreCaseSet{
"lb1": utilsets.NewString("node1", "node6"),
"lb2": utilsets.NewString("node2", "node3", "node5"),
},
},
} {
tc := tc
t.Run(tc.description, func(t *testing.T) {
az := GetTestCloud(ctrl)
ss, _ := NewTestScaleSet(ctrl)
ss.DisableAvailabilitySetNodes = true
az.VMSet = ss
az.nodePrivateIPToNodeNameMap = map[string]string{
"10.1.0.1": "node1",
"10.1.0.2": "node2",
"10.1.0.3": "node3",
"10.1.0.4": "node4",
"10.1.0.5": "node5",
"10.1.0.6": "node6",
}
az.LoadBalancerBackendPool = newBackendPoolTypeNodeIP(az)
az.MultipleStandardLoadBalancerConfigurations = tc.existingLBConfigs
svc := getTestService("test", v1.ProtocolTCP, nil, false)
_ = az.reconcileMultipleStandardLoadBalancerBackendNodes(tc.lbName, &tc.existingLBs, &svc, tc.existingNodes)
_ = az.reconcileMultipleStandardLoadBalancerBackendNodes("kubernetes", tc.lbName, &tc.existingLBs, &svc, tc.existingNodes, tc.init)

expectedLBToNodesMap := make(map[string]*utilsets.IgnoreCaseSet)
for _, multiSLBConfig := range az.MultipleStandardLoadBalancerConfigurations {
Expand Down

0 comments on commit 0675f9b

Please sign in to comment.