From 31f75e1fe61d7d0987231a5df408079239a73a6b Mon Sep 17 00:00:00 2001 From: Fred Rolland Date: Mon, 18 Dec 2023 12:14:36 +0200 Subject: [PATCH] feat: add podAntiAffinity to MOFED - Add podAntiAffinity to MOFED - Add unit test to check PodAntiAffinity rendering - Add unit test to check NodeSelector rendering - Extract methods in state_ofed for better readability Signed-off-by: Fred Rolland --- .../0050_ofed-driver-ds.yaml | 8 ++ pkg/state/state_ofed.go | 106 +++++++++------- pkg/state/state_ofed_test.go | 113 ++++++++++++++++++ 3 files changed, 187 insertions(+), 40 deletions(-) diff --git a/manifests/state-ofed-driver/0050_ofed-driver-ds.yaml b/manifests/state-ofed-driver/0050_ofed-driver-ds.yaml index 4f1da2fd..bfa46405 100644 --- a/manifests/state-ofed-driver/0050_ofed-driver-ds.yaml +++ b/manifests/state-ofed-driver/0050_ofed-driver-ds.yaml @@ -40,6 +40,14 @@ spec: - key: nvidia.com/gpu operator: Exists effect: NoSchedule + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: nvidia.com/ofed-driver + operator: Exists + topologyKey: kubernetes.io/hostname serviceAccountName: ofed-driver hostNetwork: true {{- if .CrSpec.ImagePullSecrets }} diff --git a/pkg/state/state_ofed.go b/pkg/state/state_ofed.go index 472e0264..4a2dd440 100644 --- a/pkg/state/state_ofed.go +++ b/pkg/state/state_ofed.go @@ -397,26 +397,7 @@ func (s *stateOFED) getManifestObjects( } nodeAttr := attrs[0].Attributes - if cr.Spec.OFEDDriver.StartupProbe == nil { - cr.Spec.OFEDDriver.StartupProbe = &mellanoxv1alpha1.PodProbeSpec{ - InitialDelaySeconds: 10, - PeriodSeconds: 10, - } - } - - if cr.Spec.OFEDDriver.LivenessProbe == nil { - cr.Spec.OFEDDriver.LivenessProbe = &mellanoxv1alpha1.PodProbeSpec{ - InitialDelaySeconds: 30, - PeriodSeconds: 30, - } - } - - if cr.Spec.OFEDDriver.ReadinessProbe == nil { - cr.Spec.OFEDDriver.ReadinessProbe = &mellanoxv1alpha1.PodProbeSpec{ - InitialDelaySeconds: 10, - PeriodSeconds: 30, - } - } + setProbesDefaults(cr) // Update MOFED Env variables with defaults for the cluster cr.Spec.OFEDDriver.Env = s.mergeWithDefaultEnvs(cr.Spec.OFEDDriver.Env, nodeAttr) @@ -424,29 +405,15 @@ func (s *stateOFED) getManifestObjects( additionalVolMounts := additionalVolumeMounts{} osname := nodeAttr[nodeinfo.AttrTypeOSName] // set any custom ssl key/certificate configuration provided - if cr.Spec.OFEDDriver.CertConfig != nil && cr.Spec.OFEDDriver.CertConfig.Name != "" { - destinationDir, err := getCertConfigPath(osname) - if err != nil { - return nil, fmt.Errorf("failed to get destination directory for custom TLS certificates config: %v", err) - } - - err = s.handleAdditionalMounts(ctx, &additionalVolMounts, cr.Spec.OFEDDriver.CertConfig.Name, destinationDir) - if err != nil { - return nil, fmt.Errorf("failed to mount volumes for custom TLS certificates: %v", err) - } + err := s.handleCertConfig(ctx, cr, osname, additionalVolMounts) + if err != nil { + return nil, err } // set any custom repo configuration provided - if cr.Spec.OFEDDriver.RepoConfig != nil && cr.Spec.OFEDDriver.RepoConfig.Name != "" { - destinationDir, err := getRepoConfigPath(osname) - if err != nil { - return nil, fmt.Errorf("failed to get destination directory for custom repo config: %v", err) - } - - err = s.handleAdditionalMounts(ctx, &additionalVolMounts, cr.Spec.OFEDDriver.RepoConfig.Name, destinationDir) - if err != nil { - return nil, fmt.Errorf("failed to mount volumes for custom repositories configuration: %v", err) - } + err = s.handleRepoConfig(ctx, cr, osname, additionalVolMounts) + if err != nil { + return nil, err } renderData := &ofedManifestRenderData{ @@ -695,3 +662,62 @@ func (e envVarsWithGet) Get(name string) *v1.EnvVar { return nil } + +// setProbesDefaults populates NicClusterPolicy CR with default Probe values +// if not provided by user +func setProbesDefaults(cr *mellanoxv1alpha1.NicClusterPolicy) { + if cr.Spec.OFEDDriver.StartupProbe == nil { + cr.Spec.OFEDDriver.StartupProbe = &mellanoxv1alpha1.PodProbeSpec{ + InitialDelaySeconds: 10, + PeriodSeconds: 10, + } + } + + if cr.Spec.OFEDDriver.LivenessProbe == nil { + cr.Spec.OFEDDriver.LivenessProbe = &mellanoxv1alpha1.PodProbeSpec{ + InitialDelaySeconds: 30, + PeriodSeconds: 30, + } + } + + if cr.Spec.OFEDDriver.ReadinessProbe == nil { + cr.Spec.OFEDDriver.ReadinessProbe = &mellanoxv1alpha1.PodProbeSpec{ + InitialDelaySeconds: 10, + PeriodSeconds: 30, + } + } +} + +// handleCertConfig handles additional mounts required for Certificates if specified +func (s *stateOFED) handleCertConfig( + ctx context.Context, cr *mellanoxv1alpha1.NicClusterPolicy, osname string, mounts additionalVolumeMounts) error { + if cr.Spec.OFEDDriver.CertConfig != nil && cr.Spec.OFEDDriver.CertConfig.Name != "" { + destinationDir, err := getCertConfigPath(osname) + if err != nil { + return fmt.Errorf("failed to get destination directory for custom TLS certificates config: %v", err) + } + + err = s.handleAdditionalMounts(ctx, &mounts, cr.Spec.OFEDDriver.CertConfig.Name, destinationDir) + if err != nil { + return fmt.Errorf("failed to mount volumes for custom TLS certificates: %v", err) + } + } + return nil +} + +// handleRepoConfig handles additional mounts required for custom repo if specified +func (s *stateOFED) handleRepoConfig( + ctx context.Context, cr *mellanoxv1alpha1.NicClusterPolicy, osname string, mounts additionalVolumeMounts) error { + if cr.Spec.OFEDDriver.RepoConfig != nil && cr.Spec.OFEDDriver.RepoConfig.Name != "" { + destinationDir, err := getRepoConfigPath(osname) + if err != nil { + return fmt.Errorf("failed to get destination directory for custom repo config: %v", err) + } + + err = s.handleAdditionalMounts(ctx, &mounts, cr.Spec.OFEDDriver.RepoConfig.Name, destinationDir) + if err != nil { + return fmt.Errorf("failed to mount volumes for custom repositories configuration: %v", err) + } + } + return nil +} diff --git a/pkg/state/state_ofed_test.go b/pkg/state/state_ofed_test.go index f2f31bf0..513ddb3b 100644 --- a/pkg/state/state_ofed_test.go +++ b/pkg/state/state_ofed_test.go @@ -17,16 +17,24 @@ package state import ( + "context" "strings" + "k8s.io/apimachinery/pkg/runtime" + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" osconfigv1 "github.com/openshift/api/config/v1" + appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/Mellanox/network-operator/api/v1alpha1" "github.com/Mellanox/network-operator/pkg/nodeinfo" + "github.com/Mellanox/network-operator/pkg/render" + "github.com/Mellanox/network-operator/pkg/testing/mocks" + "github.com/Mellanox/network-operator/pkg/utils" ) const ( @@ -35,13 +43,17 @@ const ( testClusterWideNoProxy = "no-proxy-cluster-wide" testNicPolicyHTTPProxy = "http-policy" testNicPolicyNoProxy = "no-proxy-policy" + osName = "ubuntu" + osVer = "22.04" ) var _ = Describe("MOFED state test", func() { var stateOfed stateOFED + var ctx context.Context BeforeEach(func() { stateOfed = stateOFED{} + ctx = context.Background() }) Context("getMofedDriverImageName", func() { @@ -206,4 +218,105 @@ var _ = Describe("MOFED state test", func() { []v1.EnvVar{{Name: "Foo", Value: "Bar"}}), Entry("Ubuntu 23.04", "ubuntu", "23.04", []v1.EnvVar{}, []v1.EnvVar{}), ) + + Context("Render Manifests", func() { + It("Should Render Mofed DaemonSet", func() { + client := mocks.ControllerRuntimeClient{} + manifestBaseDir := "../../manifests/state-ofed-driver" + scheme := runtime.NewScheme() + + files, err := utils.GetFilesWithSuffix(manifestBaseDir, render.ManifestFileSuffix...) + Expect(err).NotTo(HaveOccurred()) + renderer := render.NewRenderer(files) + + ofedState := stateOFED{ + stateSkel: stateSkel{ + name: stateOFEDName, + description: stateOFEDDescription, + client: &client, + scheme: scheme, + renderer: renderer, + }, + } + cr := &v1alpha1.NicClusterPolicy{} + cr.Name = "nic-cluster-policy" + cr.Spec.OFEDDriver = &v1alpha1.OFEDDriverSpec{ + ImageSpec: v1alpha1.ImageSpec{ + Image: "mofed", + Repository: "nvcr.io/mellanox", + Version: "23.10-0.5.5.0", + }, + } + By("Creating NodeProvider with 1 Node") + infoProvider := nodeinfo.NewProvider([]*v1.Node{ + getNode("node1"), + }) + objs, err := ofedState.getManifestObjects(ctx, cr, infoProvider, &dummyProvider{}) + Expect(err).NotTo(HaveOccurred()) + // Expect 4 objects: DaemonSet, Service Account, Role, RoleBinding + Expect(len(objs)).To(Equal(4)) + By("Verify DaemonSet") + for _, obj := range objs { + if obj.GetKind() != "DaemonSet" { + continue + } + ds := appsv1.DaemonSet{} + err = runtime.DefaultUnstructuredConverter.FromUnstructured(obj.Object, &ds) + Expect(err).NotTo(HaveOccurred()) + Expect(ds.Name).To(Equal("mofed-ubuntu22.04-ds")) + verifyDSNodeSelector(ds.Spec.Template.Spec.NodeSelector) + verifyPodAntiInfinity(ds.Spec.Template.Spec.Affinity) + } + }) + }) }) + +func verifyPodAntiInfinity(affinity *v1.Affinity) { + By("Verify PodAntiInfinity") + Expect(affinity).NotTo(BeNil()) + expected := v1.Affinity{ + PodAntiAffinity: &v1.PodAntiAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "nvidia.com/ofed-driver", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + TopologyKey: "kubernetes.io/hostname", + }, + }, + }, + } + Expect(*affinity).To(BeEquivalentTo(expected)) +} + +func verifyDSNodeSelector(selector map[string]string) { + By("Verify NodeSelector") + nsMellanox, ok := selector["feature.node.kubernetes.io/pci-15b3.present"] + Expect(ok).To(BeTrue()) + Expect(nsMellanox).To(Equal("true")) + nsOsName, ok := selector["feature.node.kubernetes.io/system-os_release.ID"] + Expect(ok).To(BeTrue()) + Expect(nsOsName).To(Equal(osName)) + nsOsVer, ok := selector["feature.node.kubernetes.io/system-os_release.VERSION_ID"] + Expect(ok).To(BeTrue()) + Expect(nsOsVer).To(Equal(osVer)) +} + +func getNode(name string) *v1.Node { + return &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Labels: map[string]string{ + nodeinfo.NodeLabelMlnxNIC: "true", + nodeinfo.NodeLabelOSName: osName, + nodeinfo.NodeLabelOSVer: osVer, + nodeinfo.NodeLabelCPUArch: "amd64", + }, + }, + } +}