diff --git a/controller/volume_controller.go b/controller/volume_controller.go index 40f2d63ce9..9661f49fa3 100644 --- a/controller/volume_controller.go +++ b/controller/volume_controller.go @@ -2134,6 +2134,8 @@ func (vc *VolumeController) listReadySchedulableAndScheduledNodes(volume *longho return nil, err } + readyNodes = vc.scheduler.FilterNodesSchedulableForVolume(readyNodes, volume) + filteredReadyNodes := readyNodes if len(volume.Spec.NodeSelector) != 0 { for nodeName, node := range readyNodes { @@ -2295,7 +2297,7 @@ func (vc *VolumeController) getReplenishReplicasCount(v *longhorn.Volume, rs map if adjustCount := vc.getReplicaCountForAutoBalanceLeastEffort(v, e, rs, vc.getReplicaCountForAutoBalanceNode); adjustCount != 0 { return adjustCount, "" } - adjustNodeAffinity := "" + var nCandidates []string adjustCount, _, nCandidates := vc.getReplicaCountForAutoBalanceBestEffort(v, e, rs, vc.getReplicaCountForAutoBalanceNode) if adjustCount == 0 { @@ -2304,73 +2306,15 @@ func (vc *VolumeController) getReplenishReplicasCount(v *longhorn.Volume, rs map nCandidates = vc.getNodeCandidatesForAutoBalanceZone(v, e, rs, zCandidates) } } - if adjustCount != 0 { - // TODO: remove checking and let schedular handle this part after - // https://github.com/longhorn/longhorn/issues/2667 - schedulableCandidates := vc.getIsSchedulableToDiskNodes(v, nCandidates) - if len(schedulableCandidates) != 0 { - // TODO: select replica auto-balance best-effort node from candidate list. - // https://github.com/longhorn/longhorn/issues/2667 - adjustNodeAffinity = schedulableCandidates[0] - } - return adjustCount, adjustNodeAffinity + if adjustCount != 0 && len(nCandidates) != 0 { + // TODO: https://github.com/longhorn/longhorn/issues/2667 + return adjustCount, nCandidates[0] } - return adjustCount, adjustNodeAffinity + return adjustCount, "" } return 0, "" } -func (vc *VolumeController) getIsSchedulableToDiskNodes(v *longhorn.Volume, nodeNames []string) (schedulableNodeNames []string) { - log := getLoggerForVolume(vc.logger, v) - defer func() { - if len(schedulableNodeNames) == 0 { - log.Debugf("Found 0 node has at least one schedulable disk") - } else { - log.Infof("Found node %v has at least one schedulable disk", schedulableNodeNames) - } - }() - - if len(nodeNames) == 0 { - return schedulableNodeNames - } - - for _, nodeName := range nodeNames { - scheduleNode := false - node, err := vc.ds.GetNode(nodeName) - if err != nil { - continue - } - for fsid, diskStatus := range node.Status.DiskStatus { - diskSpec, exists := node.Spec.Disks[fsid] - if !exists { - continue - } - - if !diskSpec.AllowScheduling || diskSpec.EvictionRequested { - continue - } - - if types.GetCondition(diskStatus.Conditions, longhorn.DiskConditionTypeSchedulable).Status != longhorn.ConditionStatusTrue { - continue - } - - diskInfo, err := vc.scheduler.GetDiskSchedulingInfo(diskSpec, diskStatus) - if err != nil { - continue - } - - if vc.scheduler.IsSchedulableToDisk(v.Spec.Size, v.Status.ActualSize, diskInfo) { - scheduleNode = true - break - } - } - if scheduleNode { - schedulableNodeNames = append(schedulableNodeNames, nodeName) - } - } - return schedulableNodeNames -} - func (vc *VolumeController) getNodeCandidatesForAutoBalanceZone(v *longhorn.Volume, e *longhorn.Engine, rs map[string]*longhorn.Replica, zones []string) (candidateNames []string) { log := getLoggerForVolume(vc.logger, v).WithFields( logrus.Fields{ diff --git a/scheduler/replica_scheduler.go b/scheduler/replica_scheduler.go index 2b41255a35..1afbe9d8e7 100644 --- a/scheduler/replica_scheduler.go +++ b/scheduler/replica_scheduler.go @@ -653,6 +653,42 @@ func (rcs *ReplicaScheduler) IsSchedulableToDisk(size int64, requiredStorage int (size+info.StorageScheduled) <= int64(float64(info.StorageMaximum-info.StorageReserved)*float64(info.OverProvisioningPercentage)/100) } +// FilterNodesSchedulableForVolume filters nodes that are schedulable for a given volume based on the disk space. +func (rcs *ReplicaScheduler) FilterNodesSchedulableForVolume(nodes map[string]*longhorn.Node, volume *longhorn.Volume) map[string]*longhorn.Node { + filteredNodes := map[string]*longhorn.Node{} + for _, node := range nodes { + isSchedulable := false + + for diskName, diskStatus := range node.Status.DiskStatus { + diskSpec, exists := node.Spec.Disks[diskName] + if !exists { + continue + } + + diskInfo, err := rcs.GetDiskSchedulingInfo(diskSpec, diskStatus) + if err != nil { + logrus.WithError(err).Debugf("Failed to get disk scheduling info for disk %v on node %v", diskName, node.Name) + continue + } + + if rcs.IsSchedulableToDisk(volume.Spec.Size, volume.Status.ActualSize, diskInfo) { + isSchedulable = true + break + } + } + + if isSchedulable { + logrus.Tracef("Found node %v schedulable for volume %v", node.Name, volume.Name) + filteredNodes[node.Name] = node + } + } + + if len(filteredNodes) == 0 { + logrus.Debugf("Found no nodes schedulable for volume %v", volume.Name) + } + return filteredNodes +} + func (rcs *ReplicaScheduler) isDiskNotFull(info *DiskSchedulingInfo) bool { // StorageAvailable = the space can be used by 3rd party or Longhorn system. return info.StorageMaximum > 0 && info.StorageAvailable > 0 &&