Skip to content

Commit

Permalink
fix(replica-auto-balance): loop when node has no storage available
Browse files Browse the repository at this point in the history
ref: 6671

Signed-off-by: Chin-Ya Huang <[email protected]>
(cherry picked from commit dccf8de)
  • Loading branch information
c3y1huang authored and David Ko committed Dec 11, 2023
1 parent 74a0b3e commit 65150b4
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 63 deletions.
70 changes: 7 additions & 63 deletions controller/volume_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -2134,6 +2134,8 @@ func (vc *VolumeController) listReadySchedulableAndScheduledNodes(volume *longho
return nil, err
}

readyNodes = vc.scheduler.FilterNodesSchedulableForVolume(readyNodes, volume)

filteredReadyNodes := readyNodes
if len(volume.Spec.NodeSelector) != 0 {
for nodeName, node := range readyNodes {
Expand Down Expand Up @@ -2295,7 +2297,7 @@ func (vc *VolumeController) getReplenishReplicasCount(v *longhorn.Volume, rs map
if adjustCount := vc.getReplicaCountForAutoBalanceLeastEffort(v, e, rs, vc.getReplicaCountForAutoBalanceNode); adjustCount != 0 {
return adjustCount, ""
}
adjustNodeAffinity := ""

var nCandidates []string
adjustCount, _, nCandidates := vc.getReplicaCountForAutoBalanceBestEffort(v, e, rs, vc.getReplicaCountForAutoBalanceNode)
if adjustCount == 0 {
Expand All @@ -2304,73 +2306,15 @@ func (vc *VolumeController) getReplenishReplicasCount(v *longhorn.Volume, rs map
nCandidates = vc.getNodeCandidatesForAutoBalanceZone(v, e, rs, zCandidates)
}
}
if adjustCount != 0 {
// TODO: remove checking and let schedular handle this part after
// https://github.com/longhorn/longhorn/issues/2667
schedulableCandidates := vc.getIsSchedulableToDiskNodes(v, nCandidates)
if len(schedulableCandidates) != 0 {
// TODO: select replica auto-balance best-effort node from candidate list.
// https://github.com/longhorn/longhorn/issues/2667
adjustNodeAffinity = schedulableCandidates[0]
}
return adjustCount, adjustNodeAffinity
if adjustCount != 0 && len(nCandidates) != 0 {
// TODO: https://github.com/longhorn/longhorn/issues/2667
return adjustCount, nCandidates[0]
}
return adjustCount, adjustNodeAffinity
return adjustCount, ""
}
return 0, ""
}

func (vc *VolumeController) getIsSchedulableToDiskNodes(v *longhorn.Volume, nodeNames []string) (schedulableNodeNames []string) {
log := getLoggerForVolume(vc.logger, v)
defer func() {
if len(schedulableNodeNames) == 0 {
log.Debugf("Found 0 node has at least one schedulable disk")
} else {
log.Infof("Found node %v has at least one schedulable disk", schedulableNodeNames)
}
}()

if len(nodeNames) == 0 {
return schedulableNodeNames
}

for _, nodeName := range nodeNames {
scheduleNode := false
node, err := vc.ds.GetNode(nodeName)
if err != nil {
continue
}
for fsid, diskStatus := range node.Status.DiskStatus {
diskSpec, exists := node.Spec.Disks[fsid]
if !exists {
continue
}

if !diskSpec.AllowScheduling || diskSpec.EvictionRequested {
continue
}

if types.GetCondition(diskStatus.Conditions, longhorn.DiskConditionTypeSchedulable).Status != longhorn.ConditionStatusTrue {
continue
}

diskInfo, err := vc.scheduler.GetDiskSchedulingInfo(diskSpec, diskStatus)
if err != nil {
continue
}

if vc.scheduler.IsSchedulableToDisk(v.Spec.Size, v.Status.ActualSize, diskInfo) {
scheduleNode = true
break
}
}
if scheduleNode {
schedulableNodeNames = append(schedulableNodeNames, nodeName)
}
}
return schedulableNodeNames
}

func (vc *VolumeController) getNodeCandidatesForAutoBalanceZone(v *longhorn.Volume, e *longhorn.Engine, rs map[string]*longhorn.Replica, zones []string) (candidateNames []string) {
log := getLoggerForVolume(vc.logger, v).WithFields(
logrus.Fields{
Expand Down
36 changes: 36 additions & 0 deletions scheduler/replica_scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,42 @@ func (rcs *ReplicaScheduler) IsSchedulableToDisk(size int64, requiredStorage int
(size+info.StorageScheduled) <= int64(float64(info.StorageMaximum-info.StorageReserved)*float64(info.OverProvisioningPercentage)/100)
}

// FilterNodesSchedulableForVolume filters nodes that are schedulable for a given volume based on the disk space.
func (rcs *ReplicaScheduler) FilterNodesSchedulableForVolume(nodes map[string]*longhorn.Node, volume *longhorn.Volume) map[string]*longhorn.Node {
filteredNodes := map[string]*longhorn.Node{}
for _, node := range nodes {
isSchedulable := false

for diskName, diskStatus := range node.Status.DiskStatus {
diskSpec, exists := node.Spec.Disks[diskName]
if !exists {
continue
}

diskInfo, err := rcs.GetDiskSchedulingInfo(diskSpec, diskStatus)
if err != nil {
logrus.WithError(err).Debugf("Failed to get disk scheduling info for disk %v on node %v", diskName, node.Name)
continue
}

if rcs.IsSchedulableToDisk(volume.Spec.Size, volume.Status.ActualSize, diskInfo) {
isSchedulable = true
break
}
}

if isSchedulable {
logrus.Tracef("Found node %v schedulable for volume %v", node.Name, volume.Name)
filteredNodes[node.Name] = node
}
}

if len(filteredNodes) == 0 {
logrus.Debugf("Found no nodes schedulable for volume %v", volume.Name)
}
return filteredNodes
}

func (rcs *ReplicaScheduler) isDiskNotFull(info *DiskSchedulingInfo) bool {
// StorageAvailable = the space can be used by 3rd party or Longhorn system.
return info.StorageMaximum > 0 && info.StorageAvailable > 0 &&
Expand Down

0 comments on commit 65150b4

Please sign in to comment.