From 7d54cef802a8e926b0cdf817cff372646a921441 Mon Sep 17 00:00:00 2001 From: Houston Putman Date: Wed, 27 Sep 2023 15:40:34 -0400 Subject: [PATCH 1/2] Get correct SolrPods in SolrCloud controller --- controllers/solrcloud_controller.go | 32 +++++++++---- controllers/util/solr_security_util.go | 2 +- controllers/util/solr_util.go | 9 ++++ tests/e2e/suite_test.go | 63 +++++++++++++++++++++----- 4 files changed, 85 insertions(+), 21 deletions(-) diff --git a/controllers/solrcloud_controller.go b/controllers/solrcloud_controller.go index 6bd81e8d..af82887b 100644 --- a/controllers/solrcloud_controller.go +++ b/controllers/solrcloud_controller.go @@ -437,16 +437,21 @@ func (r *SolrCloudReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( // Get the SolrCloud's Pods and initialize them if necessary var podList []corev1.Pod var podSelector labels.Selector - if podSelector, podList, err = r.initializePods(ctx, instance, logger); err != nil { + if podSelector, podList, err = r.initializePods(ctx, instance, statefulSet, logger); err != nil { return requeueOrNot, err } // Make sure the SolrCloud status is up-to-date with the state of the cluster var outOfDatePods util.OutOfDatePodSegmentation var availableUpdatedPodCount int - outOfDatePods, availableUpdatedPodCount, err = createCloudStatus(instance, &newStatus, statefulSet.Status, podSelector, podList) + var shouldRequeue bool + outOfDatePods, availableUpdatedPodCount, shouldRequeue, err = createCloudStatus(instance, &newStatus, statefulSet.Status, podSelector, podList) if err != nil { return requeueOrNot, err + } else if shouldRequeue { + // There is an issue with the status, so requeue to get a more up-to-date view of the cluster + updateRequeueAfter(&requeueOrNot, time.Second*1) + return requeueOrNot, nil } // We only want to do one cluster operation at a time, so we use a lock to ensure that. @@ -620,7 +625,7 @@ func (r *SolrCloudReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( } // InitializePods Ensure that all SolrCloud Pods are initialized -func (r *SolrCloudReconciler) initializePods(ctx context.Context, solrCloud *solrv1beta1.SolrCloud, logger logr.Logger) (podSelector labels.Selector, podList []corev1.Pod, err error) { +func (r *SolrCloudReconciler) initializePods(ctx context.Context, solrCloud *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, logger logr.Logger) (podSelector labels.Selector, podList []corev1.Pod, err error) { foundPods := &corev1.PodList{} selectorLabels := solrCloud.SharedLabels() selectorLabels["technology"] = solrv1beta1.SolrTechnologyLabel @@ -635,14 +640,24 @@ func (r *SolrCloudReconciler) initializePods(ctx context.Context, solrCloud *sol logger.Error(err, "Error listing pods for SolrCloud") return } - podList = foundPods.Items // Initialize the pod's notStopped readinessCondition so that they can receive traffic until they are stopped - for i, pod := range podList { + for _, pod := range foundPods.Items { + isOwnedByCurrentStatefulSet := false + for _, ownerRef := range pod.ObjectMeta.OwnerReferences { + if ownerRef.UID == statefulSet.UID { + isOwnedByCurrentStatefulSet = true + break + } + } + // Do not include pods that match, but are not owned by the current statefulSet + if !isOwnedByCurrentStatefulSet { + continue + } if updatedPod, podError := r.initializePod(ctx, &pod, logger); podError != nil { err = podError } else if updatedPod != nil { - podList[i] = *updatedPod + podList = append(podList, *updatedPod) } } return @@ -676,7 +691,7 @@ func (r *SolrCloudReconciler) initializePod(ctx context.Context, pod *corev1.Pod // Initialize the SolrCloud.Status object func createCloudStatus(solrCloud *solrv1beta1.SolrCloud, newStatus *solrv1beta1.SolrCloudStatus, statefulSetStatus appsv1.StatefulSetStatus, podSelector labels.Selector, - podList []corev1.Pod) (outOfDatePods util.OutOfDatePodSegmentation, availableUpdatedPodCount int, err error) { + podList []corev1.Pod) (outOfDatePods util.OutOfDatePodSegmentation, availableUpdatedPodCount int, shouldRequeue bool, err error) { var otherVersions []string nodeNames := make([]string, len(podList)) nodeStatusMap := map[string]solrv1beta1.SolrNodeStatus{} @@ -798,8 +813,9 @@ func createCloudStatus(solrCloud *solrv1beta1.SolrCloud, extAddress := solrCloud.UrlScheme(true) + "://" + solrCloud.ExternalCommonUrl(solrCloud.Spec.SolrAddressability.External.DomainName, true) newStatus.ExternalCommonAddress = &extAddress } + shouldRequeue = newStatus.ReadyReplicas != statefulSetStatus.ReadyReplicas || newStatus.Replicas != statefulSetStatus.Replicas || newStatus.UpToDateNodes != statefulSetStatus.UpdatedReplicas - return outOfDatePods, availableUpdatedPodCount, nil + return outOfDatePods, availableUpdatedPodCount, shouldRequeue, nil } func (r *SolrCloudReconciler) reconcileNodeService(ctx context.Context, logger logr.Logger, instance *solrv1beta1.SolrCloud, nodeName string) (err error, ip string) { diff --git a/controllers/util/solr_security_util.go b/controllers/util/solr_security_util.go index 3f94de6f..8e76dc01 100644 --- a/controllers/util/solr_security_util.go +++ b/controllers/util/solr_security_util.go @@ -499,7 +499,7 @@ func useSecureProbe(solrCloud *solr.SolrCloud, probe *corev1.Probe, mountPath st javaToolOptionsStr = "" } - probeCommand := fmt.Sprintf("%ssolr api -get \"%s://%s:%d%s\"", javaToolOptionsStr, solrCloud.UrlScheme(false), "${POD_NAME}", probe.HTTPGet.Port.IntVal, probe.HTTPGet.Path) + probeCommand := fmt.Sprintf("%ssolr api -get \"%s://${SOLR_HOST}:%d%s\"", javaToolOptionsStr, solrCloud.UrlScheme(false), probe.HTTPGet.Port.IntVal, probe.HTTPGet.Path) probeCommand = regexp.MustCompile(`\s+`).ReplaceAllString(strings.TrimSpace(probeCommand), " ") // use an Exec instead of an HTTP GET diff --git a/controllers/util/solr_util.go b/controllers/util/solr_util.go index 5c5b6142..b6867ddc 100644 --- a/controllers/util/solr_util.go +++ b/controllers/util/solr_util.go @@ -327,6 +327,15 @@ func GenerateStatefulSet(solrCloud *solr.SolrCloud, solrCloudStatus *solr.SolrCl }, }, }, + { + Name: "POD_IP", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "status.podIP", + APIVersion: "v1", + }, + }, + }, { Name: "POD_NAMESPACE", ValueFrom: &corev1.EnvVarSource{ diff --git a/tests/e2e/suite_test.go b/tests/e2e/suite_test.go index 7ba3e949..f27a90f0 100644 --- a/tests/e2e/suite_test.go +++ b/tests/e2e/suite_test.go @@ -32,6 +32,7 @@ import ( "golang.org/x/text/cases" "golang.org/x/text/language" "io" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" @@ -291,7 +292,7 @@ func writeAllSolrInfoToFiles(ctx context.Context, directory string, namespace st } foundPods := &corev1.PodList{} - Expect(k8sClient.List(ctx, foundPods, listOps)).To(Succeed(), "Could not fetch Solr Operator pod") + Expect(k8sClient.List(ctx, foundPods, listOps)).To(Succeed(), "Could not fetch Solr pods") Expect(foundPods).ToNot(BeNil(), "No Solr pods could be found") for _, pod := range foundPods.Items { writeAllPodInfoToFiles( @@ -300,6 +301,42 @@ func writeAllSolrInfoToFiles(ctx context.Context, directory string, namespace st &pod, ) } + + foundStatefulSets := &appsv1.StatefulSetList{} + Expect(k8sClient.List(ctx, foundStatefulSets, listOps)).To(Succeed(), "Could not fetch Solr statefulSets") + Expect(foundStatefulSets).ToNot(BeNil(), "No Solr statefulSet could be found") + for _, statefulSet := range foundStatefulSets.Items { + writeAllStatefulSetInfoToFiles( + directory+statefulSet.Name+".statefulSet", + &statefulSet, + ) + } +} + +// writeAllStatefulSetInfoToFiles writes the following each to a separate file with the given base name & directory. +// - StatefulSet Spec/Status +// - StatefulSet Events +func writeAllStatefulSetInfoToFiles(baseFilename string, statefulSet *appsv1.StatefulSet) { + // Write statefulSet to a file + statusFile, err := os.Create(baseFilename + ".status.json") + defer statusFile.Close() + Expect(err).ToNot(HaveOccurred(), "Could not open file to save statefulSet status: %s", baseFilename+".status.json") + jsonBytes, marshErr := json.MarshalIndent(statefulSet, "", "\t") + Expect(marshErr).ToNot(HaveOccurred(), "Could not serialize statefulSet json") + _, writeErr := statusFile.Write(jsonBytes) + Expect(writeErr).ToNot(HaveOccurred(), "Could not write statefulSet json to file") + + // Write events for statefulSet to a file + eventsFile, err := os.Create(baseFilename + ".events.json") + defer eventsFile.Close() + Expect(err).ToNot(HaveOccurred(), "Could not open file to save statefulSet events: %s", baseFilename+".events.yaml") + + eventList, err := rawK8sClient.CoreV1().Events(statefulSet.Namespace).Search(scheme.Scheme, statefulSet) + Expect(err).ToNot(HaveOccurred(), "Could not find events for statefulSet: %s", statefulSet.Name) + jsonBytes, marshErr = json.MarshalIndent(eventList, "", "\t") + Expect(marshErr).ToNot(HaveOccurred(), "Could not serialize statefulSet events json") + _, writeErr = eventsFile.Write(jsonBytes) + Expect(writeErr).ToNot(HaveOccurred(), "Could not write statefulSet events json to file") } // writeAllPodInfoToFile writes the following each to a separate file with the given base name & directory. @@ -319,24 +356,26 @@ func writeAllPodInfoToFiles(ctx context.Context, baseFilename string, pod *corev // Write events for pod to a file eventsFile, err := os.Create(baseFilename + ".events.json") defer eventsFile.Close() - Expect(err).ToNot(HaveOccurred(), "Could not open file to save status: %s", baseFilename+".events.yaml") + Expect(err).ToNot(HaveOccurred(), "Could not open file to save pod events: %s", baseFilename+".events.yaml") eventList, err := rawK8sClient.CoreV1().Events(pod.Namespace).Search(scheme.Scheme, pod) Expect(err).ToNot(HaveOccurred(), "Could not find events for pod: %s", pod.Name) jsonBytes, marshErr = json.MarshalIndent(eventList, "", "\t") - Expect(marshErr).ToNot(HaveOccurred(), "Could not serialize events json") + Expect(marshErr).ToNot(HaveOccurred(), "Could not serialize pod events json") _, writeErr = eventsFile.Write(jsonBytes) - Expect(writeErr).ToNot(HaveOccurred(), "Could not write events json to file") + Expect(writeErr).ToNot(HaveOccurred(), "Could not write pod events json to file") // Write pod logs to a file - writePodLogsToFile( - ctx, - baseFilename+".log", - pod.Name, - pod.Namespace, - nil, - "", - ) + if len(pod.Status.ContainerStatuses) > 0 && pod.Status.ContainerStatuses[0].Started != nil && *pod.Status.ContainerStatuses[0].Started { + writePodLogsToFile( + ctx, + baseFilename+".log", + pod.Name, + pod.Namespace, + nil, + "", + ) + } } func writePodLogsToFile(ctx context.Context, filename string, podName string, podNamespace string, startTimeRaw *time.Time, filterLinesWithString string) { From 569c302354d4a9d0d3e0086a4d43b4597a8d27c6 Mon Sep 17 00:00:00 2001 From: Houston Putman Date: Wed, 27 Sep 2023 16:20:03 -0400 Subject: [PATCH 2/2] Fix tests --- controllers/solrcloud_controller_basic_auth_test.go | 2 +- controllers/solrcloud_controller_tls_test.go | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/controllers/solrcloud_controller_basic_auth_test.go b/controllers/solrcloud_controller_basic_auth_test.go index 79972123..7a4cee8c 100644 --- a/controllers/solrcloud_controller_basic_auth_test.go +++ b/controllers/solrcloud_controller_basic_auth_test.go @@ -283,7 +283,7 @@ func expectBasicAuthConfigOnPodTemplateWithGomega(g Gomega, solrCloud *solrv1bet g.Expect(basicAuthSecretVolMount.MountPath).To(Equal("/etc/secrets/"+secretName), "Wrong path used to mount Basic Auth volume") expProbeCmd := fmt.Sprintf("JAVA_TOOL_OPTIONS=\"-Dbasicauth=$(cat /etc/secrets/%s-solrcloud-basic-auth/username):$(cat /etc/secrets/%s-solrcloud-basic-auth/password) -Dsolr.httpclient.builder.factory=org.apache.solr.client.solrj.impl.PreemptiveBasicAuthClientBuilderFactory\" "+ - "solr api -get \"http://${POD_NAME}:8983%s\"", + "solr api -get \"http://${SOLR_HOST}:8983%s\"", solrCloud.Name, solrCloud.Name, expProbePath) g.Expect(mainContainer.LivenessProbe).To(Not(BeNil()), "main container should have a liveness probe defined") g.Expect(mainContainer.LivenessProbe.Exec).To(Not(BeNil()), "liveness probe should have an exec when auth is enabled") diff --git a/controllers/solrcloud_controller_tls_test.go b/controllers/solrcloud_controller_tls_test.go index a585f1d1..22daa964 100644 --- a/controllers/solrcloud_controller_tls_test.go +++ b/controllers/solrcloud_controller_tls_test.go @@ -540,7 +540,7 @@ func expectTLSConfigOnPodTemplateWithGomega(g Gomega, solrCloud *solrv1beta1.Sol if solrCloud.Spec.CustomSolrKubeOptions.PodOptions != nil && solrCloud.Spec.CustomSolrKubeOptions.PodOptions.LivenessProbe != nil { path = solrCloud.Spec.CustomSolrKubeOptions.PodOptions.LivenessProbe.HTTPGet.Path } - g.Expect(mainContainer.LivenessProbe.Exec.Command[2]).To(ContainSubstring(fmt.Sprintf("solr api -get \"%s://%s:%d%s\"", solrCloud.UrlScheme(false), "${POD_NAME}", solrCloud.Spec.SolrAddressability.PodPort, path)), "liveness probe should invoke solr api -get to contact Solr securely") + g.Expect(mainContainer.LivenessProbe.Exec.Command[2]).To(ContainSubstring(fmt.Sprintf("solr api -get \"%s://${SOLR_HOST}:%d%s\"", solrCloud.UrlScheme(false), solrCloud.Spec.SolrAddressability.PodPort, path)), "liveness probe should invoke solr api -get to contact Solr securely") g.Expect(mainContainer.ReadinessProbe).To(Not(BeNil()), "main container should have a readiness probe defined") g.Expect(mainContainer.ReadinessProbe.Exec).To(Not(BeNil()), "readiness probe should have an exec when mTLS is enabled") g.Expect(mainContainer.ReadinessProbe.Exec.Command).To(HaveLen(3), "readiness probe command has wrong number of args") @@ -548,7 +548,7 @@ func expectTLSConfigOnPodTemplateWithGomega(g Gomega, solrCloud *solrv1beta1.Sol if solrCloud.Spec.CustomSolrKubeOptions.PodOptions != nil && solrCloud.Spec.CustomSolrKubeOptions.PodOptions.ReadinessProbe != nil { path = solrCloud.Spec.CustomSolrKubeOptions.PodOptions.ReadinessProbe.HTTPGet.Path } - g.Expect(mainContainer.ReadinessProbe.Exec.Command[2]).To(ContainSubstring(fmt.Sprintf("solr api -get \"%s://%s:%d%s\"", solrCloud.UrlScheme(false), "${POD_NAME}", solrCloud.Spec.SolrAddressability.PodPort, path)), "readiness probe should invoke solr api -get to contact Solr securely") + g.Expect(mainContainer.ReadinessProbe.Exec.Command[2]).To(ContainSubstring(fmt.Sprintf("solr api -get \"%s://${SOLR_HOST}:%d%s\"", solrCloud.UrlScheme(false), solrCloud.Spec.SolrAddressability.PodPort, path)), "readiness probe should invoke solr api -get to contact Solr securely") if solrCloud.Spec.CustomSolrKubeOptions.PodOptions != nil && solrCloud.Spec.CustomSolrKubeOptions.PodOptions.StartupProbe != nil { g.Expect(mainContainer.StartupProbe).To(Not(BeNil()), "main container should have a startup probe defined") g.Expect(mainContainer.StartupProbe.Exec).To(Not(BeNil()), "startup probe should have an exec when auth is enabled") @@ -557,7 +557,7 @@ func expectTLSConfigOnPodTemplateWithGomega(g Gomega, solrCloud *solrv1beta1.Sol if solrCloud.Spec.CustomSolrKubeOptions.PodOptions != nil && solrCloud.Spec.CustomSolrKubeOptions.PodOptions.StartupProbe != nil { path = solrCloud.Spec.CustomSolrKubeOptions.PodOptions.StartupProbe.HTTPGet.Path } - g.Expect(mainContainer.StartupProbe.Exec.Command[2]).To(ContainSubstring(fmt.Sprintf("solr api -get \"%s://%s:%d%s\"", solrCloud.UrlScheme(false), "${POD_NAME}", solrCloud.Spec.SolrAddressability.PodPort, path)), "startup probe should invoke solr api -get to contact Solr securely") + g.Expect(mainContainer.StartupProbe.Exec.Command[2]).To(ContainSubstring(fmt.Sprintf("solr api -get \"%s://${SOLR_HOST}:%d%s\"", solrCloud.UrlScheme(false), solrCloud.Spec.SolrAddressability.PodPort, path)), "startup probe should invoke solr api -get to contact Solr securely") } } else if solrCloud != nil { g.Expect(mainContainer.LivenessProbe).To(Not(BeNil()), "main container should have a liveness probe defined")