From e250afa56ff5d22019a8d7fa07ae7aa026ee3d9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E7=A5=96=E5=BB=BA?= Date: Thu, 9 Jan 2025 18:04:26 +0800 Subject: [PATCH] cni-server: set node NetworkUnavailable condition after join subnet gateway check (#4915) Signed-off-by: zhangzujian --- charts/kube-ovn/templates/ovn-CR.yaml | 1 + dist/images/install.sh | 1 + pkg/daemon/init.go | 2 +- pkg/daemon/ovs_linux.go | 45 ++++++++++++++++++++++++--- pkg/daemon/ovs_windows.go | 18 ++++++++--- pkg/util/k8s.go | 31 ++++++++++++++++++ 6 files changed, 88 insertions(+), 10 deletions(-) diff --git a/charts/kube-ovn/templates/ovn-CR.yaml b/charts/kube-ovn/templates/ovn-CR.yaml index 7162c29600b..a93d3305aab 100644 --- a/charts/kube-ovn/templates/ovn-CR.yaml +++ b/charts/kube-ovn/templates/ovn-CR.yaml @@ -288,6 +288,7 @@ rules: - ovn-eips - ovn-eips/status - nodes + - nodes/status - pods verbs: - get diff --git a/dist/images/install.sh b/dist/images/install.sh index 28411951a51..6c1695d49dc 100755 --- a/dist/images/install.sh +++ b/dist/images/install.sh @@ -3665,6 +3665,7 @@ rules: - ovn-eips - ovn-eips/status - nodes + - nodes/status - pods verbs: - get diff --git a/pkg/daemon/init.go b/pkg/daemon/init.go index 00ecb75b71a..264db323773 100644 --- a/pkg/daemon/init.go +++ b/pkg/daemon/init.go @@ -87,7 +87,7 @@ func InitNodeGateway(config *Configuration) error { klog.Errorf("failed to get ip %s with mask %s, %v", ip, joinCIDR, err) return err } - return configureNodeNic(portName, ipAddr, gw, joinCIDR, mac, config.MTU) + return configureNodeNic(config.KubeClient, config.NodeName, portName, ipAddr, gw, joinCIDR, mac, config.MTU) } func InitMirror(config *Configuration) error { diff --git a/pkg/daemon/ovs_linux.go b/pkg/daemon/ovs_linux.go index cac85de9201..e94e48bdd2e 100644 --- a/pkg/daemon/ovs_linux.go +++ b/pkg/daemon/ovs_linux.go @@ -22,10 +22,12 @@ import ( sriovutilfs "github.com/k8snetworkplumbingwg/sriovnet/pkg/utils/filesystem" "github.com/vishvananda/netlink" "golang.org/x/sys/unix" + corev1 "k8s.io/api/core/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes" "k8s.io/klog/v2" kubeovnv1 "github.com/kubeovn/kube-ovn/pkg/apis/kubeovn/v1" @@ -591,7 +593,7 @@ func waitNetworkReady(nic, ipAddr, gateway string, underlayGateway, verbose bool return nil } -func configureNodeNic(portName, ip, gw, joinCIDR string, macAddr net.HardwareAddr, mtu int) error { +func configureNodeNic(cs kubernetes.Interface, nodeName, portName, ip, gw, joinCIDR string, macAddr net.HardwareAddr, mtu int) error { ipStr := util.GetIPWithoutMask(ip) raw, err := ovs.Exec(ovs.MayExist, "add-port", "br-int", util.NodeNic, "--", "set", "interface", util.NodeNic, "type=internal", "--", @@ -670,11 +672,20 @@ func configureNodeNic(portName, ip, gw, joinCIDR string, macAddr net.HardwareAdd // ping ovn0 gw to activate the flow klog.Infof("wait ovn0 gw ready") - if err := waitNetworkReady(util.NodeNic, ip, gw, false, true, gatewayCheckMaxRetry, nil); err != nil { + status := corev1.ConditionFalse + reason := "JoinSubnetGatewayReachable" + message := fmt.Sprintf("ping check to gateway ip %s succeeded", gw) + if err = waitNetworkReady(util.NodeNic, ip, gw, false, true, gatewayCheckMaxRetry, nil); err != nil { klog.Errorf("failed to init ovn0 check: %v", err) - return err + status = corev1.ConditionTrue + reason = "JoinSubnetGatewayUnreachable" + message = fmt.Sprintf("ping check to gateway ip %s failed", gw) } - return nil + if err := util.SetNodeNetworkUnavailableCondition(cs, nodeName, status, reason, message); err != nil { + klog.Errorf("failed to set node network unavailable condition: %v", err) + } + + return err } // If OVS restart, the ovn0 port will down and prevent host to pod network, @@ -696,7 +707,31 @@ func (c *Controller) loopOvn0Check() { } ip := node.Annotations[util.IPAddressAnnotation] gw := node.Annotations[util.GatewayAnnotation] - if err := waitNetworkReady(util.NodeNic, ip, gw, false, false, 5, nil); err != nil { + status := corev1.ConditionFalse + reason := "JoinSubnetGatewayReachable" + message := fmt.Sprintf("ping check to gateway ip %s succeeded", gw) + if err = waitNetworkReady(util.NodeNic, ip, gw, false, false, 5, nil); err != nil { + klog.Errorf("failed to init ovn0 check: %v", err) + status = corev1.ConditionTrue + reason = "JoinSubnetGatewayUnreachable" + message = fmt.Sprintf("ping check to gateway ip %s failed", gw) + } + + var alreadySet bool + for _, condition := range node.Status.Conditions { + if condition.Type == corev1.NodeNetworkUnavailable && condition.Status == corev1.ConditionTrue && + condition.Reason == reason && condition.Message == message { + alreadySet = true + break + } + } + if !alreadySet { + if err := util.SetNodeNetworkUnavailableCondition(c.config.KubeClient, c.config.NodeName, status, reason, message); err != nil { + klog.Errorf("failed to set node network unavailable condition: %v", err) + } + } + + if err != nil { util.LogFatalAndExit(err, "failed to ping ovn0 gateway %s", gw) } } diff --git a/pkg/daemon/ovs_windows.go b/pkg/daemon/ovs_windows.go index a0aa28c935b..3ec0d6c50e2 100644 --- a/pkg/daemon/ovs_windows.go +++ b/pkg/daemon/ovs_windows.go @@ -9,7 +9,9 @@ import ( "github.com/Microsoft/hcsshim" "github.com/containernetworking/plugins/pkg/hns" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes" "k8s.io/klog/v2" kubeovnv1 "github.com/kubeovn/kube-ovn/pkg/apis/kubeovn/v1" @@ -262,7 +264,7 @@ func waitNetworkReady(nic, ipAddr, gateway string, underlayGateway, verbose bool return nil } -func configureNodeNic(portName, ip, gw, joinCIDR string, macAddr net.HardwareAddr, mtu int) error { +func configureNodeNic(cs kubernetes.Interface, nodeName, portName, ip, gw, joinCIDR string, macAddr net.HardwareAddr, mtu int) error { ipStr := util.GetIPWithoutMask(ip) raw, err := ovs.Exec(ovs.MayExist, "add-port", "br-int", util.NodeNic, "--", "set", "interface", util.NodeNic, "type=internal", "--", @@ -326,11 +328,19 @@ func configureNodeNic(portName, ip, gw, joinCIDR string, macAddr net.HardwareAdd // ping ovn0 gw to activate the flow klog.Infof("wait ovn0 gw ready") - if err := waitNetworkReady(util.NodeNic, ip, gw, false, true, gatewayCheckMaxRetry); err != nil { + status := corev1.ConditionFalse + reason := "JoinSubnetGatewayReachable" + message := fmt.Sprintf("ping check to gateway ip %s succeeded", gw) + if err = waitNetworkReady(util.NodeNic, ip, gw, false, true, gatewayCheckMaxRetry); err != nil { klog.Errorf("failed to init ovn0 check: %v", err) - return err + status = corev1.ConditionTrue + reason = "JoinSubnetGatewayUnreachable" + message = fmt.Sprintf("ping check to gateway ip %s failed", gw) } - return nil + if err := util.SetNodeNetworkUnavailableCondition(cs, nodeName, status, reason, message); err != nil { + klog.Errorf("failed to set node network unavailable condition: %v", err) + } + return err } // If OVS restart, the ovn0 port will down and prevent host to pod network, diff --git a/pkg/util/k8s.go b/pkg/util/k8s.go index 72b9538c834..e2077784ccb 100644 --- a/pkg/util/k8s.go +++ b/pkg/util/k8s.go @@ -1,7 +1,9 @@ package util import ( + "context" "crypto/tls" + "encoding/json" "errors" "fmt" "net" @@ -15,6 +17,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/selection" + "k8s.io/client-go/kubernetes" "k8s.io/klog/v2" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -169,3 +172,31 @@ func DeploymentIsReady(deployment *appsv1.Deployment) bool { } return true } + +func SetNodeNetworkUnavailableCondition(cs kubernetes.Interface, nodeName string, status v1.ConditionStatus, reason, message string) error { + now := metav1.NewTime(time.Now()) + patch := map[string]map[string][]v1.NodeCondition{ + "status": { + "conditions": []v1.NodeCondition{{ + Type: v1.NodeNetworkUnavailable, + Status: status, + Reason: reason, + Message: message, + LastTransitionTime: now, + LastHeartbeatTime: now, + }}, + }, + } + data, err := json.Marshal(patch) + if err != nil { + klog.Errorf("failed to marshal patch data: %v", err) + return err + } + + if _, err = cs.CoreV1().Nodes().PatchStatus(context.Background(), nodeName, data); err != nil { + klog.Errorf("failed to patch node %s: %v", nodeName, err) + return err + } + + return nil +}