From 80e1a96861bc21e21cff1ce371820ee71652c361 Mon Sep 17 00:00:00 2001 From: Sridhar Gaddam Date: Wed, 21 Jul 2021 20:36:13 +0530 Subject: [PATCH] Fix subctl diagnose hostname mismatch issue Submariner Endpoint stores the hostname info as part of the endpoint object. In most of the K8s clusters, the hostname matches with the nodeName, but on some clusters, it was seen that nodeName does not match. This PR fixes this issue. Also, when more than a single node is labelled as Gateway node, the current code was not handling it properly, this PR fixes it. Fixes issue: https://github.com/submariner-io/submariner-operator/issues/1471 Signed-Off-by: Sridhar Gaddam (cherry picked from commit 9d125f0ef6ddfeda1f3a66f9039cff01a6782439) --- pkg/subctl/cmd/resource.go | 48 +++++++++++++++++++++++------ pkg/subctl/cmd/validate_fw_vxlan.go | 18 ++++++----- pkg/subctl/cmd/validate_tunnel.go | 2 +- 3 files changed, 51 insertions(+), 17 deletions(-) diff --git a/pkg/subctl/cmd/resource.go b/pkg/subctl/cmd/resource.go index 1558baba1..8bf91306b 100644 --- a/pkg/subctl/cmd/resource.go +++ b/pkg/subctl/cmd/resource.go @@ -19,9 +19,7 @@ import ( "bytes" "fmt" "io/ioutil" - "strings" - k8sV1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" v1opts "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" @@ -102,7 +100,7 @@ func getSubmarinerResource(config *rest.Config) *v1alpha1.Submariner { return submariner } -func getEndpointResource(config *rest.Config, clusterID string) *submarinerv1.Endpoint { +func getLocalEndpointResource(config *rest.Config, clusterID string) *submarinerv1.Endpoint { submarinerClient, err := subClientsetv1.NewForConfig(config) exitOnError("Unable to get the Submariner client", err) @@ -127,18 +125,50 @@ func getActiveGatewayNodeName(clientSet *kubernetes.Clientset, hostname string) } for _, node := range nodes.Items { - for _, addr := range node.Status.Addresses { - if addr.Type == k8sV1.NodeHostName { - if strings.HasPrefix(addr.Address, hostname) { - return node.Name - } - } + if node.Name == hostname { + return hostname + } + + // On some platforms, the nodeName does not match with the hostname. + // Submariner Endpoint stores the hostname info in the endpoint and not the nodeName. So, we spawn a + // tiny pod to read the hostname and return the corresponding node. + sPod, err := spawnSnifferPodOnNode(clientSet, node.Name, "default", "hostname") + if err != nil { + return "" + } + + defer sPod.DeletePod() + + if err = sPod.AwaitPodCompletion(); err != nil { + return "" + } + + if sPod.PodOutput[:len(sPod.PodOutput)-1] == hostname { + return node.Name } } return "" } +func getAnyRemoteEndpointResource(config *rest.Config, clusterID string) *submarinerv1.Endpoint { + submarinerClient, err := subClientsetv1.NewForConfig(config) + exitOnError("Unable to get the Submariner client", err) + + endpoints, err := submarinerClient.SubmarinerV1().Endpoints(OperatorNamespace).List(v1opts.ListOptions{}) + if err != nil { + return nil + } + + for _, endpoint := range endpoints.Items { + if endpoint.Spec.ClusterID != clusterID { + return &endpoint + } + } + + return nil +} + func getGatewaysResource(config *rest.Config) *submarinerv1.GatewayList { submarinerClient, err := subClientsetv1.NewForConfig(config) exitOnError("Unable to get the Submariner client", err) diff --git a/pkg/subctl/cmd/validate_fw_vxlan.go b/pkg/subctl/cmd/validate_fw_vxlan.go index 0726fe551..1cb49de01 100644 --- a/pkg/subctl/cmd/validate_fw_vxlan.go +++ b/pkg/subctl/cmd/validate_fw_vxlan.go @@ -91,19 +91,23 @@ func validateFWConfigWithinCluster(config *rest.Config, submariner *v1alpha1.Sub return false } - gateways := getGatewaysResource(config) - if gateways == nil || len(gateways.Items) == 0 { - status.QueueWarningMessage("There are no gateways detected on the cluster.") + localEndpoint := getLocalEndpointResource(config, submariner.Spec.ClusterID) + if localEndpoint == nil { return false } - if len(gateways.Items[0].Status.Connections) == 0 { - status.QueueWarningMessage("There are no active connections to remote clusters.") + remoteEndpoint := getAnyRemoteEndpointResource(config, submariner.Spec.ClusterID) + if remoteEndpoint == nil { + return false + } + + gwNodeName := getActiveGatewayNodeName(clientSet, localEndpoint.Spec.Hostname) + if gwNodeName == "" { return false } podCommand := fmt.Sprintf("timeout %d %s", validationTimeout, TCPSniffVxLANCommand) - sPod, err := spawnSnifferPodOnGatewayNode(clientSet, namespace, podCommand) + sPod, err := spawnSnifferPodOnNode(clientSet, gwNodeName, namespace, podCommand) if err != nil { message := fmt.Sprintf("Error while spawning the sniffer pod on the GatewayNode: %v", err) status.QueueFailureMessage(message) @@ -111,7 +115,7 @@ func validateFWConfigWithinCluster(config *rest.Config, submariner *v1alpha1.Sub } defer sPod.DeletePod() - remoteClusterIP := strings.Split(gateways.Items[0].Status.Connections[0].Endpoint.Subnets[0], "/")[0] + remoteClusterIP := strings.Split(remoteEndpoint.Spec.Subnets[0], "/")[0] podCommand = fmt.Sprintf("nc -w %d %s 8080", validationTimeout/2, remoteClusterIP) cPod, err := spawnClientPodOnNonGatewayNode(clientSet, namespace, podCommand) if err != nil { diff --git a/pkg/subctl/cmd/validate_tunnel.go b/pkg/subctl/cmd/validate_tunnel.go index 0db2f5958..2e164de88 100644 --- a/pkg/subctl/cmd/validate_tunnel.go +++ b/pkg/subctl/cmd/validate_tunnel.go @@ -89,7 +89,7 @@ func validateTunnelConfigAcrossClusters(localCfg, remoteCfg *rest.Config) bool { status.Start(fmt.Sprintf("Checking if tunnels can be setup on Gateway node of cluster %q.", submariner.Spec.ClusterID)) - localEndpoint := getEndpointResource(localCfg, submariner.Spec.ClusterID) + localEndpoint := getLocalEndpointResource(localCfg, submariner.Spec.ClusterID) if localEndpoint == nil { status.QueueWarningMessage("Could not find the local cluster Endpoint") return false